In [4]:
import psycopg2
from psycopg2 import sql
import time
import random
from collections import defaultdict
import concurrent.futures
import json
from pprint import pprint

In [5]:
# pip install psycopg2-binary

In [6]:
data_collection = defaultdict(int)

# connection parameters

In [7]:
num_users = 1000
num_items = 1000
num_queries = 700
max_workers = 16

In [8]:
host="ec2-35-93-129-232.us-west-2.compute.amazonaws.com"  # Replace with your EC2 instance's public DNS
port = "5432"
dbname = "mydatabase"
user = "danlandy"
password = "danlandy"
file_name = 't2_computer2.micro.data.json'

# drop tables

In [9]:
def drop_all_tables():
    try:
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the database successfully")

        # Create a cursor object
        cur = conn.cursor()

        # Define the SQL queries to drop the tables
        drop_queries = [
            "DROP TABLE IF EXISTS users_items;",
            "DROP TABLE IF EXISTS users;",
            "DROP TABLE IF EXISTS items;"
        ]
        
        # Execute each drop query
        for query in drop_queries:
            cur.execute(query)
        
        # Commit the changes
        conn.commit()
        print("Tables dropped successfully")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or drop tables: {e}")


# measure the time for connecting the database and time for fetch the version data

In [10]:
def measure_connection_time(): 
    try:
        # Capture the start time
        start_time = time.time()

        # Connect to the PostgreSQL server
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )

        # Capture the end time
        end_time = time.time()

        # Calculate the connection duration
        connection_time = end_time - start_time
        print(f"Time taken to connect to the database: {connection_time:.6f} seconds")
        data_collection['connection_time'] = connection_time
        
        # Create a cursor object
        cur = conn.cursor()

        # Execute a SQL query
        start_time = time.time()
        cur.execute("SELECT version();")
        result = cur.fetchone()
        end_time = time.time()
        version_fetch_time = end_time - start_time
        
        
        print(f"Time taken to fetch the version data : {version_fetch_time:.6f} seconds")
        
        print("PostgreSQL version:", result)

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database: {e}")


# create users table

In [11]:
def measure_creating_users_table():
    try:
        start_time_connection = time.time(); 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        
        # Create a cursor object
        cur = conn.cursor()

        # Create the 'user_data' table
        create_table_query = '''
        CREATE TABLE users (
            user_id serial  PRIMARY KEY,
            first_name VARCHAR(50),
            last_name VARCHAR(50)
        );
        '''
        
        start_time = time.time()
        cur.execute(create_table_query)
        conn.commit()
        end_time = time.time()
        
        elapsed_time = end_time - start_time; 
        elapsed_time_2 = end_time - start_time_connection; 
        
        data_collection['create_user_table'] = elapsed_time_2
        # data_collection['create_user_table'].append(elapsed_time)
        
        print(f"Time taken to create users table and establishing connections : {elapsed_time_2:.6f} seconds")
        print(f"Time taken to create users table with established connections : {elapsed_time:.6f} seconds")
            
        print("Table 'users' created successfully")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or create table: {e}")


# create Items table

In [12]:
def measure_create_items_table():
    try:
        start_time_connection = time.time() 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the database successfully")

        # Create a cursor object
        cur = conn.cursor()

        # Create the 'items' table within the 'items' schema
        create_table_query = '''
        CREATE TABLE items (
            item_id serial PRIMARY KEY,
            item_name VARCHAR(100),
            value1 varchar(100)
        );
        '''
        start_time = time.time()
        cur.execute(create_table_query)
        conn.commit()
        
        end_time = time.time()
        elapsed_time = end_time - start_time; 
        elapsed_time_2 = end_time - start_time_connection; 
        
        data_collection['create_item_table'] = elapsed_time_2
        # data_collection['create_item_table'].append(elapsed_time)
        
        print(f"Time taken to create items table and establishing connections : {elapsed_time_2:.6f} seconds")
        print(f"Time taken to create items table with established connections : {elapsed_time:.6f} seconds")
        
        print("Table 'items' created successfully")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or create schema/table: {e}")


# create user-item table

In [13]:
def measure_create_users_items_table():
    try:
        start_time_connection = time.time() 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the database successfully")

        # Create a cursor object
        cur = conn.cursor()

        # Create the 'users_items' table
        create_table_query = '''
        CREATE TABLE users_items (
            user_id INTEGER NOT NULL,
            item_id INTEGER NOT NULL,
            PRIMARY KEY (user_id, item_id),
            FOREIGN KEY (user_id) REFERENCES users(user_id),
            FOREIGN KEY (item_id) REFERENCES items(item_id)
        );
        '''
        start_time = time.time()
        cur.execute(create_table_query)
        conn.commit()
        
        end_time = time.time()
        elapsed_time = end_time - start_time; 
        elapsed_time_2 = end_time - start_time_connection; 
        data_collection['create_user_item_table'] = elapsed_time_2
        # data_collection['create_user_item_table'].append(elapsed_time)
        
        print(f"Time taken to create users-items table and establishing connections : {elapsed_time_2:.6f} seconds")
        print(f"Time taken to create users-items table with established connections : {elapsed_time:.6f} seconds")
        
        print("Table 'users_items' created successfully")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or create table: {e}")


# insert users

In [14]:
def measure_insert_one_user():
    start_time = time.time()
    try:
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        # Create a cursor object
        cur = conn.cursor()
        value = ("random_first_name", "random_last_name")
        insert_query = '''
        INSERT INTO users (first_name, last_name) VALUES (%s, %s);
        '''
        
        start_time_without_connection = time.time()
        cur.execute(insert_query, value)
        conn.commit()
        end_time = time.time()
        
        elapsed_time = end_time - start_time
        elapsed_time_without_connection = end_time - start_time_without_connection
        
        data_collection['insert_one_user_with_connection'] = elapsed_time
        data_collection['insert_one_user_without_connection'] = elapsed_time_without_connection

        print(f"insert one user with connectino:{elapsed_time:6f}")
        print(f"insert one user without connectino:{elapsed_time_without_connection:6f}")
       
        cur.close()
        conn.close()
    except Exception as e:
        print(f"Unable to connect to the database or insert values: {e}")

In [15]:
def insert_user(value):
    try:
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        # Create a cursor object
        cur = conn.cursor()
        insert_query = '''
        INSERT INTO users (first_name, last_name) VALUES (%s, %s);
        '''
        cur.execute(insert_query, value)
        conn.commit()
        cur.close()
        conn.close()
    except Exception as e:
        data_collection['insert_many_users_exception'] = data_collection['insert_many_users_exception'] + 1
        print(f"Unable to connect to the database or insert values: {e}")

def measure_insert_users(num_users):
    values = [(f"first_name_2_{i}", f"last_name_2_{i}") for i in range(num_users)]


    start_time = time.time()
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        executor.map(insert_user, values)
    end_time = time.time()

    elapsed_time = end_time - start_time

    data_collection[f'insert_{num_users}_users'] = elapsed_time

    print(f"Time taken to insert 1000 users: {elapsed_time:.6f} seconds")


# measure user exception


In [16]:
def measure_users_exceptions(num_users): 
    '''
    measure the number of exceptoins of inserting 10000 users
    '''
    drop_all_tables()
    measure_creating_users_table()
    measure_insert_users(10000)
    measure_create_items_table()
    measure_create_users_items_table() 
    measure_insert_users(num_users)

# insert items


In [17]:
def insert_item(value):
    try:
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        # Create a cursor object
        cur = conn.cursor()
        insert_query = '''
        INSERT INTO items (item_name,value1) VALUES (%s,%s);
        '''
        cur.execute(insert_query, value)
        conn.commit()
        cur.close()
        conn.close()
    except Exception as e:
        data_collection['insert_many_items_exception'] = data_collection['insert_many_items_exception'] + 1
        print(f"Unable to connect to the database or insert values: {e}")

def measure_insert_items(num_items):
    item_values = [(f"item_name_2_{i}",'ramdon_value') for i in range(num_items)]

    start_time = time.time()
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        executor.map(insert_item, item_values)
    end_time = time.time()

    elapsed_time = end_time - start_time

    data_collection[f'insert_{num_items}_items'] = elapsed_time

    print(f"Time taken to insert 1000 items: {elapsed_time:.6f} seconds")


# insert users-items

In [18]:
def get_userid_itemid(): 
    try:
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("get_userid_itemid(): Connected to the database successfully")

        # Create a cursor object
        cur = conn.cursor()

        # Retrieve all user IDs
        cur.execute("SELECT user_id FROM users;")
        user_ids = [row[0] for row in cur.fetchall()]

        # Retrieve all item IDs
        cur.execute("SELECT item_id FROM items;")
        item_ids = [row[0] for row in cur.fetchall()]

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("get_userid_itemid(): Connection closed")
        return user_ids, item_ids
    
    except Exception as e:
        print(f"Unable to connect to the database or retrieve IDs: {e}")

In [19]:
def measure_one_insert_user_items_table():
    try:
        user_ids, item_ids = get_userid_itemid()
        # Insert relationships into 'users_items' table
        insert_query = '''
        INSERT INTO users_items (user_id, item_id) VALUES (%s, %s);
        '''
        
        start_time_connection = time.time() 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the  database successfully")

        # Create a cursor object
        cur = conn.cursor()
        
        start_time_without_connection = time.time()
        user_id = user_ids[0]
        
        # Randomly select 5 items for user_id
        random_items = random.sample(item_ids, 5)
        for item_id in random_items:
            cur.execute(insert_query, (user_id, item_id))

        conn.commit()
        end_time = time.time()
        
        elapsed_time_wihtout_connection = end_time - start_time_without_connection
        elapsed_time_with_connection = end_time - start_time_connection
        
        data_collection['insert_5_items_one_users_with_connection'] = elapsed_time_with_connection
        data_collection['insert_5_items_one_users_without_connection'] = elapsed_time_wihtout_connection
        
        print(f"insert user-items table with connections : {elapsed_time_with_connection:.6f} seconds")
        print(f"insert user-items table without connections : {elapsed_time_wihtout_connection:.6f} seconds")
        
        print("Values inserted successfully into items table")

        print("Relationships inserted successfully into 'users_items' table")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or insert relationships: {e}")

In [20]:
def insert_user_items(user_id, item_ids):
    try:
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        # Create a cursor object
        cur = conn.cursor()
        insert_query = '''
        INSERT INTO users_items (user_id, item_id) VALUES (%s, %s);
        '''
        # Randomly select 5 items for each user
        random_items = random.sample(item_ids, 5)
        for item_id in random_items:
            cur.execute(insert_query, (user_id, item_id))

        conn.commit()
        cur.close()
        conn.close()
    except Exception as e:
        data_collection['insert_5_items_per_users_exception'] = data_collection['insert_5_items_per_users_exception'] + 1
        print(f"Unable to connect to the database or insert relationships: {e}")

def measure_insert_user_items_table():
    user_ids, item_ids = get_userid_itemid()

    start_time = time.time()
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        executor.map(insert_user_items, user_ids, [item_ids]*len(user_ids))
    end_time = time.time()

    elapsed_time = end_time - start_time

    data_collection['insert_5_items_per_users'] = elapsed_time

    print(f"Time taken to insert 5 items per user with established connections: {elapsed_time:.6f} seconds")


# query with given firstname and lastname

In [21]:
def execute_query(first_name, last_name):
    try:
        # Define the SQL query
        query = '''
        SELECT i.item_name
        FROM users u
        JOIN users_items ui ON u.user_id = ui.user_id
        JOIN items i ON ui.item_id = i.item_id
        WHERE u.first_name = %s AND u.last_name = %s;
        '''
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        # Create a cursor object
        cur = conn.cursor()
        
        # Execute the SQL query
        cur.execute(query, (first_name, last_name))
        
        # Fetch and return the results
        items = cur.fetchall()
        
        # Close the cursor and connection
        cur.close()
        conn.close()
        
        return items

    except Exception as e:
        data_collection['query_users_exception'] = data_collection['query_users_exception'] + 1
        print(f"Unable to connect to the database or execute query: {e}")
        return []

def measure_query(num_queries):
    # Define the users for the query
    users = [(f"first_name_2_{i}", f"last_name_2_{i}") for i in range(num_queries)]

    start_time = time.time()
    results = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_user = {executor.submit(execute_query, first_name, last_name): (first_name, last_name) for first_name, last_name in users}
        for future in concurrent.futures.as_completed(future_to_user):
            user = future_to_user[future]
            try:
                data = future.result()
                results.extend(data)
            except Exception as e:
                print(f"Query failed for user {user}: {e}")

    end_time = time.time()
    elapsed_time = end_time - start_time
    data_collection['query_users'] = elapsed_time

    print(f"Time taken to query {num_queries:d} users from table with three joins: {elapsed_time:.6f} seconds")

    # Print the results
    print(len(results))
    # for item in results:
    #     print(item[0])

# measure time function calls

In [264]:
drop_all_tables()

Connected to the database successfully
Tables dropped successfully
Connection closed


In [265]:
measure_connection_time()

Time taken to connect to the database: 0.254229 seconds
Time taken to fetch the version data : 0.069548 seconds
PostgreSQL version: ('PostgreSQL 16.3 (Ubuntu 16.3-0ubuntu0.24.04.1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 13.2.0-23ubuntu4) 13.2.0, 64-bit',)
Connection closed


In [266]:
measure_creating_users_table()

Time taken to create users table and establishing connections : 0.375869 seconds
Time taken to create users table with established connections : 0.112587 seconds
Table 'users' created successfully
Connection closed


In [267]:
measure_create_items_table()

Connected to the database successfully
Time taken to create items table and establishing connections : 0.386490 seconds
Time taken to create items table with established connections : 0.116536 seconds
Table 'items' created successfully
Connection closed


In [268]:
measure_create_users_items_table()

Connected to the database successfully
Time taken to create users-items table and establishing connections : 0.388572 seconds
Time taken to create users-items table with established connections : 0.115481 seconds
Table 'users_items' created successfully
Connection closed


In [269]:
measure_insert_one_user()

insert one user with connectino:0.377896
insert one user without connectino:0.110208


In [29]:
measure_insert_users(num_users = num_users)

Unable to connect to the database or insert values: connection to server at "ec2-35-93-129-232.us-west-2.compute.amazonaws.com" (35.93.129.232), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?

Unable to connect to the database or insert values: connection to server at "ec2-35-93-129-232.us-west-2.compute.amazonaws.com" (35.93.129.232), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?

Unable to connect to the database or insert values: connection to server at "ec2-35-93-129-232.us-west-2.compute.amazonaws.com" (35.93.129.232), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?

Unable to connect to the database or insert values: connection to server at "ec2-35-93-129-232.us-west-2.compute.amazonaws.com" (35.93.129.232), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP co

In [271]:
# measure_users_exceptions(num_users=num_users)

In [27]:
measure_insert_items(num_items=num_items)

Unable to connect to the database or insert values: connection to server at "ec2-35-93-129-232.us-west-2.compute.amazonaws.com" (35.93.129.232), port 5432 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.

Unable to connect to the database or insert values: connection to server at "ec2-35-93-129-232.us-west-2.compute.amazonaws.com" (35.93.129.232), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?

Unable to connect to the database or insert values: connection to server at "ec2-35-93-129-232.us-west-2.compute.amazonaws.com" (35.93.129.232), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?

Unable to connect to the database or insert values: connection to server at "ec2-35-93-129-232.us-west-2.compute.amazonaws.com" (35.93.129.232), port 5432 failed: Connection refused
	Is the ser

In [273]:
measure_one_insert_user_items_table()

get_userid_itemid(): Connected to the database successfully
get_userid_itemid(): Connection closed
Connected to the  database successfully
insert user-items table with connections : 0.502787 seconds
insert user-items table without connections : 0.239145 seconds
Values inserted successfully into items table
Relationships inserted successfully into 'users_items' table
Connection closed


In [28]:
pprint(data_collection)

defaultdict(<class 'int'>,
            {'insert_1000_items': 6.124253034591675,
             'insert_1000_users': 5.988770008087158,
             'insert_many_items_exception': 864,
             'insert_many_users_exception': 2589})


In [275]:
measure_insert_user_items_table()

get_userid_itemid(): Connected to the database successfully
get_userid_itemid(): Connection closed
Unable to connect to the database or insert relationships: connection to server at "ec2-35-91-172-103.us-west-2.compute.amazonaws.com" (35.91.172.103), port 5432 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.

Unable to connect to the database or insert relationships: connection to server at "ec2-35-91-172-103.us-west-2.compute.amazonaws.com" (35.91.172.103), port 5432 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.

Unable to connect to the database or insert relationships: connection to server at "ec2-35-91-172-103.us-west-2.compute.amazonaws.com" (35.91.172.103), port 5432 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while pr

In [276]:
measure_query(num_queries=num_queries)

Unable to connect to the database or execute query: connection to server at "ec2-35-91-172-103.us-west-2.compute.amazonaws.com" (35.91.172.103), port 5432 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.
Unable to connect to the database or execute query: connection to server at "ec2-35-91-172-103.us-west-2.compute.amazonaws.com" (35.91.172.103), port 5432 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.


Unable to connect to the database or execute query: connection to server at "ec2-35-91-172-103.us-west-2.compute.amazonaws.com" (35.91.172.103), port 5432 failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.

Unable to connect to the database or execute query: connection to server at "ec2-35-91-172-103.us

In [277]:
pprint(data_collection)

defaultdict(<class 'int'>,
            {'connection_time': 0.2542285919189453,
             'create_item_table': 0.3864898681640625,
             'create_user_item_table': 0.38857150077819824,
             'create_user_table': 0.3758690357208252,
             'insert_1000_items': 14.487825393676758,
             'insert_1000_users': 17.742419242858887,
             'insert_5_items_one_users_with_connection': 0.5027873516082764,
             'insert_5_items_one_users_without_connection': 0.23914504051208496,
             'insert_5_items_per_users': 18.641669034957886,
             'insert_5_items_per_users_exception': 66,
             'insert_many_items_exception': 63,
             'insert_many_users_exception': 139,
             'insert_one_user_with_connection': 0.3778963088989258,
             'insert_one_user_without_connection': 0.11020779609680176,
             'query_users': 13.660431146621704,
             'query_users_exception': 181})


In [278]:
try:
    with open(file_name, 'w') as file:
        json.dump(data_collection, file)
    print(f"Data collection successfully saved to {file_name}")
except Exception as e:
    print(f"Error saving data collection to file: {e}")

Data collection successfully saved to m5.xlarge.data.json
