In [1]:
pip install psycopg2

Collecting psycopg2
  Downloading psycopg2-2.9.9-cp37-cp37m-win32.whl (1.0 MB)
     ---------------------------------------- 1.0/1.0 MB 15.8 MB/s eta 0:00:00
Installing collected packages: psycopg2
Successfully installed psycopg2-2.9.9
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import psycopg2
from psycopg2 import sql
import time
import random
from collections import defaultdict
import concurrent.futures


In [10]:
data_collection = defaultdict(list)

# connection parameters

In [11]:
host="ec2-54-185-241-47.us-west-2.compute.amazonaws.com"  # Replace with your EC2 instance's public DNS
port = "5432"
dbname = "mydatabase"
user = "danlandy"
password = "danlandy"


# drop tables

In [4]:
try:
    # Connect to the PostgreSQL server and 'users' database
    conn = psycopg2.connect(
        host=host,
        port=port,
        dbname=dbname,
        user=user,
        password=password
    )
    print("Connected to the database successfully")

    # Create a cursor object
    cur = conn.cursor()

    # Define the SQL queries to drop the tables
    drop_queries = [
        "DROP TABLE IF EXISTS users_items;",
        "DROP TABLE IF EXISTS users;",
        "DROP TABLE IF EXISTS items;"
    ]
    
    # Execute each drop query
    for query in drop_queries:
        cur.execute(query)
    
    # Commit the changes
    conn.commit()
    print("Tables dropped successfully")

    # Close the cursor and connection
    cur.close()
    conn.close()
    print("Connection closed")

except Exception as e:
    print(f"Unable to connect to the database or drop tables: {e}")


Connected to the database successfully
Tables dropped successfully
Connection closed


# measure the time for connecting the database and time for fetch the version data

In [6]:
def measure_connection_time(): 
    try:
        # Capture the start time
        start_time = time.time()

        # Connect to the PostgreSQL server
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )

        # Capture the end time
        end_time = time.time()

        # Calculate the connection duration
        connection_time = end_time - start_time
        print(f"Time taken to connect to the database: {connection_time:.6f} seconds")
        data_collection['connection_time'].append(connection_time)
        
        # Create a cursor object
        cur = conn.cursor()

        # Execute a SQL query
        start_time = time.time()
        cur.execute("SELECT version();")
        result = cur.fetchone()
        end_time = time.time()
        version_fetch_time = end_time - start_time
        
        
        print(f"Time taken to fetch the version data : {version_fetch_time:.6f} seconds")
        
        print("PostgreSQL version:", result)

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database: {e}")


# create users table

In [7]:
def measure_creating_users_table():
    try:
        start_time_connection = time.time(); 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        
        # Create a cursor object
        cur = conn.cursor()

        # Create the 'user_data' table
        create_table_query = '''
        CREATE TABLE users (
            user_id serial  PRIMARY KEY,
            first_name VARCHAR(50),
            last_name VARCHAR(50)
        );
        '''
        
        start_time = time.time()
        cur.execute(create_table_query)
        conn.commit()
        end_time = time.time()
        
        elapsed_time = end_time - start_time; 
        elapsed_time_2 = end_time - start_time_connection; 
        
        data_collection['create_user_table'].append(elapsed_time_2)
        data_collection['create_user_table'].append(elapsed_time)
        
        print(f"Time taken to create users table and establishing connections : {elapsed_time_2:.6f} seconds")
        print(f"Time taken to create users table with established connections : {elapsed_time:.6f} seconds")
            
        print("Table 'users' created successfully")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or create table: {e}")


# create Items table

In [8]:
def measure_create_items_table():
    try:
        start_time_connection = time.time() 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the database successfully")

        # Create a cursor object
        cur = conn.cursor()

        # Create the 'items' table within the 'items' schema
        create_table_query = '''
        CREATE TABLE items (
            item_id  serial PRIMARY KEY,
            item_name VARCHAR(100)
        );
        '''
        start_time = time.time()
        cur.execute(create_table_query)
        conn.commit()
        
        end_time = time.time()
        elapsed_time = end_time - start_time; 
        elapsed_time_2 = end_time - start_time_connection; 
        
        data_collection['create_item_table'].append(elapsed_time_2)
        data_collection['create_item_table'].append(elapsed_time)
        
        print(f"Time taken to create items table and establishing connections : {elapsed_time_2:.6f} seconds")
        print(f"Time taken to create items table with established connections : {elapsed_time:.6f} seconds")
        
        print("Table 'items' created successfully")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or create schema/table: {e}")


# create user-item table

In [9]:
def measure_create_users_items_table():
    try:
        start_time_connection = time.time() 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the database successfully")

        # Create a cursor object
        cur = conn.cursor()

        # Create the 'users_items' table
        create_table_query = '''
        CREATE TABLE users_items (
            user_id INTEGER NOT NULL,
            item_id INTEGER NOT NULL,
            PRIMARY KEY (user_id, item_id),
            FOREIGN KEY (user_id) REFERENCES users(user_id),
            FOREIGN KEY (item_id) REFERENCES items(item_id)
        );
        '''
        start_time = time.time()
        cur.execute(create_table_query)
        conn.commit()
        
        end_time = time.time()
        elapsed_time = end_time - start_time; 
        elapsed_time_2 = end_time - start_time_connection; 
        data_collection['create_user_item_table'].append(elapsed_time_2)
        data_collection['create_user_item_table'].append(elapsed_time)
        
        print(f"Time taken to create users-items table and establishing connections : {elapsed_time_2:.6f} seconds")
        print(f"Time taken to create users-items table with established connections : {elapsed_time:.6f} seconds")
        
        print("Table 'users_items' created successfully")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or create table: {e}")


# insert users

In [14]:
def measure_insert_users():
    try:
        # Insert values into the 'user_data' table
        insert_query = '''
        INSERT INTO users (first_name, last_name) VALUES (%s, %s);
        '''
        
        values = [] 
        for i in range(1000): 
            values.append((f"first_name_{i}", f"last_name_{i}"))
    
        start_time_connection = time.time() 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the database successfully")

        # Create a cursor object
        cur = conn.cursor()
        start_time = time.time()
        for value in values:
            cur.execute(insert_query, value)

        conn.commit()
        end_time = time.time()
        elapsed_time = end_time - start_time; 
        elapsed_time_2 = end_time - start_time_connection; 
        
        data_collection['insert_1000_users'].append(elapsed_time_2)
        data_collection['insert_1000_users'].append(elapsed_time)
        
        print(f"Time taken to insert users table and establishing connections : {elapsed_time_2:.6f} seconds")
        print(f"Time taken to insert users table with established connections : {elapsed_time:.6f} seconds")
        
        print("Values inserted successfully into users table")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or insert values: {e}")


# insert items


In [16]:
# def measure_insert_items():
#     try:
#         # Insert values into the 'item_data' table
#         insert_query = '''
#         INSERT INTO items (item_name) VALUES (%s);
#         '''
#         item_values = []

#         for i in range(1000):
#             item_values.append((f"item_name_{i}",))
        
#         start_time_connection = time.time() 
#         # Connect to the PostgreSQL server and 'users' database
#         conn = psycopg2.connect(
#             host=host,
#             port=port,
#             dbname=dbname,
#             user=user,
#             password=password
#         )
#         print("Connected to the database successfully")

#         # Create a cursor object
#         cur = conn.cursor()
#         start_time = time.time()
#         for value in item_values:
#             cur.execute(insert_query, value)

#         conn.commit()
#         end_time = time.time()
#         elapsed_time = end_time - start_time; 
#         elapsed_time_2 = end_time - start_time_connection; 
        
#         data_collection['insert_1000_items'].append(elapsed_time_2)
#         data_collection['insert_1000_items'].append(elapsed_time)
        
#         print(f"Time taken to insert items table and establishing connections : {elapsed_time_2:.6f} seconds")
#         print(f"Time taken to insert items table with established connections : {elapsed_time:.6f} seconds")
        
#         print("Values inserted successfully into items table")

#         # Close the cursor and connection
#         cur.close()
#         conn.close()
#         print("Connection closed")

#     except Exception as e:
#         print(f"Unable to connect to the database or insert values: {e}")


In [18]:
# Assuming data_collection is a dictionary with lists as values
# data_collection = {'insert_1000_items': []}

def insert_item(value):
    try:
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        # Create a cursor object
        cur = conn.cursor()
        insert_query = '''
        INSERT INTO items (item_name) VALUES (%s);
        '''
        cur.execute(insert_query, value)
        conn.commit()
        cur.close()
        conn.close()
    except Exception as e:
        print(f"Unable to connect to the database or insert values: {e}")

def measure_insert_items():
    item_values = [(f"item_name_{i}",) for i in range(1000)]

    start_time = time.time()
    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
        executor.map(insert_item, item_values)
    end_time = time.time()

    elapsed_time = end_time - start_time

    data_collection['insert_1000_items'].append(elapsed_time)

    print(f"Time taken to insert 1000 items: {elapsed_time:.6f} seconds")

# Call the function to measure insertion time
measure_insert_items()


Time taken to insert 1000 items: 25.387557 seconds


# insert users-items

In [18]:
def get_userid_itemid(): 
    try:
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the  database successfully")

        # Create a cursor object
        cur = conn.cursor()

        # Retrieve all user IDs
        cur.execute("SELECT user_id FROM users;")
        user_ids = [row[0] for row in cur.fetchall()]

        # Retrieve all item IDs
        cur.execute("SELECT item_id FROM items;")
        item_ids = [row[0] for row in cur.fetchall()]

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")
        return user_ids, item_ids
    
    except Exception as e:
        print(f"Unable to connect to the database or insert relationships: {e}")


In [19]:
def measure_insert_user_items_table():
    try:
        user_ids, item_ids = get_userid_itemid()
        # Insert relationships into 'users_items' table
        insert_query = '''
        INSERT INTO users_items (user_id, item_id) VALUES (%s, %s);
        '''
        
        start_time_connection = time.time() 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the  database successfully")

        # Create a cursor object
        cur = conn.cursor()
        
        start_time = time.time()
        for user_id in user_ids:
            # Randomly select 5 items for each user
            random_items = random.sample(item_ids, 5)
            for item_id in random_items:
                cur.execute(insert_query, (user_id, item_id))

        conn.commit()
        end_time = time.time()
        elapsed_time = end_time - start_time; 
        elapsed_time_2 = end_time - start_time_connection; 
        
        data_collection['insert_5_items_per_users'].append(elapsed_time_2)
        data_collection['insert_5_items_per_users'].append(elapsed_time)
        
        print(f"Time taken to insert user-items table and establishing connections : {elapsed_time_2:.6f} seconds")
        print(f"Time taken to insert user-items table table with established connections : {elapsed_time:.6f} seconds")
        
        print("Values inserted successfully into items table")

        print("Relationships inserted successfully into 'users_items' table")

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or insert relationships: {e}")


# query with given firstname and lastname

In [21]:
def measure_query():
    # Given first name and last name
    first_name = "first_name_4"  # Replace with the actual first name
    last_name = "last_name_4"    # Replace with the actual last name

    try:
        # Define the SQL query
        query = '''
        SELECT i.item_name
        FROM users u
        JOIN users_items ui ON u.user_id = ui.user_id
        JOIN items i ON ui.item_id = i.item_id
        WHERE u.first_name = %s AND u.last_name = %s;
        '''
        start_time_connection = time.time() 
        # Connect to the PostgreSQL server and 'users' database
        conn = psycopg2.connect(
            host=host,
            port=port,
            dbname=dbname,
            user=user,
            password=password
        )
        print("Connected to the 'users' database successfully")

        # Create a cursor object
        cur = conn.cursor()
        
        start_time = time.time()
        # Execute the SQL query
        cur.execute(query, (first_name, last_name))
        
        # Fetch and print the results
        items = cur.fetchall()
        
        end_time = time.time()
        elapsed_time = end_time - start_time; 
        elapsed_time_2 = end_time - start_time_connection; 
        data_collection['query_one_user'].append(elapsed_time_2)
        data_collection['query_one_user'].append(elapsed_time)
        
        print(f"Time taken to query table with three joins and establishing connections : {elapsed_time_2:.6f} seconds")
        print(f"Time taken to query table with three joins with established connections : {elapsed_time:.6f} seconds")
        
        
        for item in items:
            print(item[0])

        # Close the cursor and connection
        cur.close()
        conn.close()
        print("Connection closed")

    except Exception as e:
        print(f"Unable to connect to the database or execute query: {e}")


# measure time function calls

In [19]:
measure_connection_time()

Time taken to connect to the database: 0.346120 seconds
Time taken to fetch the version data : 0.077714 seconds
PostgreSQL version: ('PostgreSQL 16.2 (Ubuntu 16.2-1ubuntu4) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 13.2.0-23ubuntu3) 13.2.0, 64-bit',)
Connection closed


In [20]:
measure_creating_users_table()

Time taken to create users table and establishing connections : 0.387311 seconds
Time taken to create users table with established connections : 0.118016 seconds
Table 'users' created successfully
Connection closed


In [21]:
measure_create_items_table()

Connected to the database successfully
Time taken to create items table and establishing connections : 0.397959 seconds
Time taken to create items table with established connections : 0.121450 seconds
Table 'items' created successfully
Connection closed


In [22]:
measure_create_users_items_table()

Connected to the database successfully
Time taken to create users-items table and establishing connections : 0.408182 seconds
Time taken to create users-items table with established connections : 0.124132 seconds
Table 'users_items' created successfully
Connection closed


In [None]:
measure_insert_users()

Connected to the 'users' database successfully
Time taken to insert users table and establishing connections : 34.137690 seconds
Time taken to insert users table with established connections : 33.860469 seconds
Values inserted successfully into users table
Connection closed


In [None]:
measure_insert_items()

Connected to the database successfully
Time taken to insert items table and establishing connections : 34.379728 seconds
Time taken to insert items table with established connections : 34.091855 seconds
Values inserted successfully into items table
Connection closed


In [None]:
measure_insert_user_items_table()

Connected to the  database successfully
Connection closed
Connected to the  database successfully
Time taken to insert user-items table and establishing connections : 189.434933 seconds
Time taken to insert user-items table table with established connections : 189.137250 seconds
Values inserted successfully into items table
Relationships inserted successfully into 'users_items' table
Connection closed


In [23]:
measure_query()

NameError: name 'measure_query' is not defined