## Queries Benchmarking

In [172]:
# Connec to the database and set up the table
import psycopg2
import csv
import json
import gc 
import time
import random
from psycopg2 import sql
import string


connection = psycopg2.connect(
    dbname="postgresDB", 
    user="user", 
    password="tprli", 
    host="postgres",  # Or use your database host if it's not local
    port="5432"  # Default PostgreSQL port
)
connection.autocommit = True
cursor = connection.cursor()


cursor.execute("DROP TABLE IF EXISTS bitcoin_addresses;")

# Step 2: Create the table in PostgreSQL if it doesn't exist
create_table_query = """
CREATE TABLE bitcoin_addresses (
    bitcoin_address VARCHAR(34) NOT NULL, -- Bitcoin addresses are usually 26-35 characters
    account VARCHAR(20),                 -- Account numbers, max length derived from the example
    ip_address VARCHAR(15),              -- IPv4 addresses, formatted as strings
    county VARCHAR(50),                  -- County names, variable length
    country_code CHAR(2),                -- Two-letter country codes
    database_column_type VARCHAR(50),   -- Describes column type as text (e.g., mediumint, float),
    PRIMARY KEY (bitcoin_address)
);
"""
cursor.execute(create_table_query)

In [173]:
# import data

def import_csv(path):
    with open(path, 'r') as f:
        next(f) # Skip the header row.

        gc.collect()
        start_time = time.perf_counter()
        cursor.copy_from(f, 'bitcoin_addresses', sep=",")
        end_time = time.perf_counter()
    return end_time - start_time

import_time = import_csv('/home/data.csv')

print("data succesfully imported")

data succesfully imported


In [88]:
def get_random_key():
    """
    Returns a random key from the table 
    """
    cursor.execute("""
        SELECT bitcoin_address FROM bitcoin_addresses ORDER BY RANDOM() LIMIT 1;
    """)
    return cursor.fetchone()

('10YP6UCZM1SACWVYAWHQE3ZD3JK06MLEK',)


NameError: name 'random_string' is not defined

In [None]:
# delete

def delete_entry(key):
    """
    Deletes the entry whose key is key and returns the time needed for this operation
    """
    gc.collect()
    start_time = time.perf_counter()
    cursor.execute("""
        DELETE FROM bitcoin_addresses WHERE bitcoin_address = %s;
    """, key)
    end_time = time.perf_counter()

    return end_time - start_time

print(delete_entry(get_random_key()))

In [None]:
# read

def read_entry(key):
    """
    Reads all columns related to the entry whose key is key and returns the time needed for retrieving the value
    """
    gc.collect()
    start_time = time.perf_counter()
    cursor.execute("""
        SELECT * FROM bitcoin_addresses WHERE bitcoin_address = %s
    """, key)
    end_time = time.perf_counter()

    return end_time - start_time

print(read_entry(get_random_key()))

In [182]:
# update

def get_random_string(length):
    caracteres = string.ascii_letters + string.digits
    return ''.join(random.choices(caracteres, k=length))

print(get_random_string(10))

def update_entry(key):
    """
    Update all columns related to the entry whose key is key with random values and returns the time needed for updating the entry
    """
    ip_address = get_random_string(15)
    county = get_random_string(50)
    country_code = get_random_string(2)
    database_column_type = get_random_string(5)

    gc.collect()
    start_time = time.perf_counter()
    cursor.execute("""
        UPDATE bitcoin_addresses
        SET ip_address = %s, county = %s, country_code = %s, database_column_type = %s
        WHERE bitcoin_address = %s
    """, (ip_address, county, country_code, database_column_type, key))
    end_time = time.perf_counter()

    return end_time - start_time

print(update_entry(get_random_key()))

3HAYvsQFvJ


In [None]:
# insert / create

def add_entry():
    """
    Add an entry with default values and a new key and returns the time taken for that operation.
    We handle the case where the key was already in the database by calling the function once again.
    Eventually the code will return.
    """

    bitcoin_address = get_random_string(33)
    account = get_random_string(20)
    ip_address = get_random_string(15)
    county = get_random_string(50)
    country_code = get_random_string(2)
    database_column_type = get_random_string(5)

    try:
        gc.collect()
        start_time = time.perf_counter()
        cursor.execute("""
            INSERT INTO bitcoin_addresses
            VALUES (%s, %s, %s, %s, %s, %s)
        """, (bitcoin_address, account, ip_address, county, country_code, database_column_type))
        end_time = time.perf_counter()
        return end_time - start_time
    except:
        return add_entry()

    

print(add_entry())