In [10]:
import sqlite3
import random
import pandas as pd
from datetime import datetime, timedelta
import os

# Ensure the directory exists
#os.makedirs('Problem-1', exist_ok=True)

# Connect to SQLite database
#db_path = os.path.join(os.getcwd(), 'Problem-1', 'ecommerce.db')
conn = sqlite3.connect('ecommerce.db')
cursor = conn.cursor()

# Create tables
cursor.execute('''
CREATE TABLE IF NOT EXISTS Product (
    ProductID INTEGER PRIMARY KEY,
    Name TEXT,
    Category TEXT,
    IsDiscontinued BOOLEAN
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS Variant (
    VariantID INTEGER PRIMARY KEY,
    ProductID INTEGER,
    Name TEXT,
    IsDiscontinued BOOLEAN,
    FOREIGN KEY (ProductID) REFERENCES Product(ProductID)
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS Price (
    PriceID INTEGER PRIMARY KEY,
    VariantID INTEGER,
    Price REAL,
    EffectiveDate DATE,
    FOREIGN KEY (VariantID) REFERENCES Variant(VariantID)
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS Customer (
    CustomerID INTEGER PRIMARY KEY,
    Name TEXT,
    Email TEXT,
    Address TEXT
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS "Order" (
    OrderID INTEGER PRIMARY KEY,
    CustomerID INTEGER,
    OrderDate DATE,
    OrderAmount REAL,
    FOREIGN KEY (CustomerID) REFERENCES Customer(CustomerID)
)
''')

cursor.execute('''
CREATE TABLE IF NOT EXISTS OrderItem (
    OrderItemID INTEGER PRIMARY KEY,
    OrderID INTEGER,
    VariantID INTEGER,
    Quantity INTEGER,
    PriceAtPurchase REAL,
    FOREIGN KEY (OrderID) REFERENCES "Order"(OrderID),
    FOREIGN KEY (VariantID) REFERENCES Variant(VariantID)
)
''')

# Insert sample data
products = [
    ('T-shirt', 'Clothing', False),
    ('Jeans', 'Clothing', False),
    ('Milk', 'Groceries', False),
    ('Bread', 'Groceries', False),
    ('Laptop', 'Electronics', False),
    ('Phone', 'Electronics', False),
    ('Headphones', 'Electronics', False),
    ('Jacket', 'Clothing', False),
    ('Butter', 'Groceries', False),
    ('Cheese', 'Groceries', False)
]

variants = [
    (1, 'Red T-shirt', False),
    (1, 'Green T-shirt', False),
    (2, 'Blue Jeans', False),
    (3, 'Whole Milk', False),
    (4, 'White Bread', False),
    (5, 'Gaming Laptop', False),
    (6, 'Smartphone', False),
    (7, 'Wireless Headphones', False),
    (7, 'Wired Headphones', False),
    (8, 'Leather Jacket', False),
    (9, 'Salted Butter', False),
    (10, 'Cheddar Cheese', False)
]

customers = [
    ('John Doe', 'john@example.com', '123 Elm St'),
    ('Jane Smith', 'jane@example.com', '456 Oak St'),
    ('Alice Johnson', 'alice@example.com', '789 Pine St'),
    ('Bob Brown', 'bob@example.com', '101 Maple St'),
    ('Charlie Davis', 'charlie@example.com', '202 Birch St'),
    ('Diana Evans', 'diana@example.com', '303 Cedar St'),
    ('Eve Foster', 'eve@example.com', '404 Spruce St'),
    ('Frank Green', 'frank@example.com', '505 Willow St'),
    ('Grace Harris', 'grace@example.com', '606 Aspen St'),
    ('Hank Irving', 'hank@example.com', '707 Redwood St')
]

# Insert products
cursor.executemany('INSERT INTO Product (Name, Category, IsDiscontinued) VALUES (?, ?, ?)', products)

# Insert variants
cursor.executemany('INSERT INTO Variant (ProductID, Name, IsDiscontinued) VALUES (?, ?, ?)', variants)

# Insert customers
cursor.executemany('INSERT INTO Customer (Name, Email, Address) VALUES (?, ?, ?)', customers)

# Insert prices
for variant_id in range(1, 12):
    for _ in range(5):
        price = round(random.uniform(10, 100), 2)
        effective_date = datetime.now() - timedelta(days=random.randint(0, 730))
        cursor.execute('INSERT INTO Price (VariantID, Price, EffectiveDate) VALUES (?, ?, ?)', (variant_id, price, effective_date))

# Insert orders and order items
for _ in range(50):
    customer_id = random.randint(1, 10)
    order_date = datetime.now() - timedelta(days=random.randint(0, 730))
    cursor.execute('INSERT INTO "Order" (CustomerID, OrderDate, OrderAmount) VALUES (?, ?, ?)', (customer_id, order_date, 0.0))
    order_id = cursor.lastrowid

    order_amount = 0.0


    for _ in range(random.randint(1, 5)):
        variant_id = random.randint(1, 10)
        quantity = random.randint(1, 3)
        cursor.execute('SELECT Price FROM Price WHERE VariantID = ? ORDER BY EffectiveDate DESC LIMIT 1', (variant_id,))
        price_at_purchase = cursor.fetchone()[0]
        order_amount += quantity * price_at_purchase
        cursor.execute('INSERT INTO OrderItem (OrderID, VariantID, Quantity, PriceAtPurchase) VALUES (?, ?, ?, ?)', (order_id, variant_id, quantity, price_at_purchase))
        cursor.execute('UPDATE "Order" SET OrderAmount = ? WHERE OrderID = ?', (order_amount, order_id))

# Commit and close
conn.commit()
conn.close()

  cursor.execute('INSERT INTO Price (VariantID, Price, EffectiveDate) VALUES (?, ?, ?)', (variant_id, price, effective_date))
  cursor.execute('INSERT INTO "Order" (CustomerID, OrderDate, OrderAmount) VALUES (?, ?, ?)', (customer_id, order_date, 0.0))


In [11]:
# Convert to DataFrames
conn = sqlite3.connect('ecommerce.db')
products_df = pd.read_sql_query('SELECT * FROM Product', conn)
variants_df = pd.read_sql_query('SELECT * FROM Variant', conn)
prices_df = pd.read_sql_query('SELECT * FROM Price', conn)
customers_df = pd.read_sql_query('SELECT * FROM Customer', conn)
orders_df = pd.read_sql_query('SELECT * FROM "Order"', conn)
order_items_df = pd.read_sql_query('SELECT * FROM OrderItem', conn)
conn.close()

In [12]:
# Save to CSV for review or further use
products_df.to_csv("csv-files/products.csv", index=False)
variants_df.to_csv("csv-files/variants.csv", index=False)
prices_df.to_csv("csv-files/prices.csv", index=False)
customers_df.to_csv("csv-files/customers.csv", index=False)
orders_df.to_csv("csv-files/orders.csv", index=False)
order_items_df.to_csv("csv-files/order_items.csv", index=False)

print("Sample data generated and saved to CSV files.")

Sample data generated and saved to CSV files.


In [13]:
products_df.head()

Unnamed: 0,ProductID,Name,Category,IsDiscontinued
0,1,T-shirt,Clothing,0
1,2,Jeans,Clothing,0
2,3,Milk,Groceries,0
3,4,Bread,Groceries,0
4,5,Laptop,Electronics,0


In [14]:
variants_df.head()

Unnamed: 0,VariantID,ProductID,Name,IsDiscontinued
0,1,1,Red T-shirt,0
1,2,1,Green T-shirt,0
2,3,2,Blue Jeans,0
3,4,3,Whole Milk,0
4,5,4,White Bread,0


In [15]:
prices_df.head()

Unnamed: 0,PriceID,VariantID,Price,EffectiveDate
0,1,1,92.9,2024-01-15 00:46:26.496552
1,2,1,42.15,2023-01-06 00:46:26.496552
2,3,1,47.13,2022-08-26 00:46:26.496552
3,4,1,16.5,2022-11-18 00:46:26.496552
4,5,1,27.13,2022-12-11 00:46:26.496552


In [16]:
customers_df.head()

Unnamed: 0,CustomerID,Name,Email,Address
0,1,John Doe,john@example.com,123 Elm St
1,2,Jane Smith,jane@example.com,456 Oak St
2,3,Alice Johnson,alice@example.com,789 Pine St
3,4,Bob Brown,bob@example.com,101 Maple St
4,5,Charlie Davis,charlie@example.com,202 Birch St


In [17]:
orders_df.head()

Unnamed: 0,OrderID,CustomerID,OrderDate,OrderAmount
0,1,2,2023-05-01 00:46:26.497567,75.48
1,2,2,2023-08-20 00:46:26.498574,219.88
2,3,8,2023-04-16 00:46:26.498574,462.0
3,4,5,2023-11-26 00:46:26.498574,368.46
4,5,9,2024-02-24 00:46:26.498574,641.91


In [18]:
order_items_df.head()

Unnamed: 0,OrderItemID,OrderID,VariantID,Quantity,PriceAtPurchase
0,1,1,6,2,37.74
1,2,2,2,2,10.82
2,3,2,3,2,31.69
3,4,2,2,2,10.82
4,5,2,6,2,37.74
