In [27]:
%pip install pandas psycopg2-binary python-dotenv
import pandas as pd
import os
from io import StringIO
import psycopg2
from psycopg2.extras import execute_values
import time
from dotenv import load_dotenv

load_dotenv()
db_password = os.environ.get('POSTGRES_PASSWORD')

Note: you may need to restart the kernel to use updated packages.


In [21]:
def create_conn():
    return psycopg2.connect(
        host="localhost",
        port=5432,
        dbname="mydb",
        user="danny",
        password = db_password,
    )

In [22]:
csv_file = "./db/data/product.csv"
df = pd.read_csv(csv_file)

# Assuming you have a 'date_column' that contains Unix epoch dates
# df["date_column"] = pd.to_datetime(df["date_column"], unit="s")

In [28]:
def create_table_from_csv(df, table_name):
    conn = create_conn()
    cur = conn.cursor()

    # Drop the table if it exists
    drop_table_sql = f"DROP TABLE IF EXISTS {table_name};"
    cur.execute(drop_table_sql)

    # Dynamically generate the CREATE TABLE statement based on headers and first row data
    headers = df.columns.tolist()
    first_row = df.iloc[0]
    columns = []
    for header, value in zip(headers, first_row):
        data_type = "VARCHAR" if isinstance(value, str) else "INTEGER"
        columns.append(f"{header} {data_type}")

    create_table_sql = f"CREATE TABLE {table_name} ({', '.join(columns)});"
    cur.execute(create_table_sql)

    # Commit the changes
    conn.commit()

    # Insert data
    insert_data(df, table_name, cur)
    conn.commit()

    cur.execute(f"SELECT * FROM {table_name} LIMIT 5")

    # Fetch the results
    results = cur.fetchall()

    # Print the results
    for row in results:
        print(row)

    # Close the connection
    cur.close()
    conn.close()

def insert_data(df, table_name, cur):
    # Prepare the data as a CSV string
    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=False, header=False)
    csv_buffer.seek(0)

    # Use the COPY command to import the data
    columns = ", ".join(df.columns)
    cur.copy_expert(f"COPY {table_name} ({columns}) FROM STDIN WITH CSV", csv_buffer)
    

In [29]:
# Record the start time
start_time = time.time()
table_name = "products"
create_table_from_csv(df, table_name)

# Record the end time
end_time = time.time()

# Calculate the duration and print it
duration = end_time - start_time
print(f"Data import took {duration:.2f} seconds.")

(1, 'Camo Onesie', 'Blend in to your crowd', 'The So Fatigues will wake you up and fit you in. This high energy camo will have you blending in to even the wildest surroundings.', 'Jackets', 140)
(2, 'Bright Future Sunglasses', "You've got to wear shades", "Where you're going you might not need roads, but you definitely need some shades. Give those baby blues a rest and let the future shine bright on these timeless lenses.", 'Accessories', 69)
(3, 'Morning Joggers', 'Make yourself a morning person', "Whether you're a morning person or not.  Whether you're gym bound or not.  Everyone looks good in joggers.", 'Pants', 40)
(4, "Slacker's Slacks", 'Comfortable for everything, or nothing', "I'll tell you how great they are after I nap for a bit.", 'Pants', 65)
(5, 'Heir Force Ones', 'A sneaker dynasty', "Now where da boxes where I keep mine? You should peep mine, maybe once or twice but never three times. I'm just a sneaker pro, I love Pumas and shell toes, but can't nothin compare to a fres