In [None]:
import oracledb

import random
from faker import Faker

# --- Oracle DB Config ---
DB_USER = "scott"
DB_PASSWORD = "tiger1"
DB_DSN = "localhost:1521/orcl"
NUM_USERS = 10000
NUM_PRODUCTS = 5000
NUM_BRANDS = 1000
NUM_MANUFACTURERS = 1000
NUM_CATEGORIES = 3000
NUM_RATINGS = 1000
NUM_ORDERS = 200000

fake = Faker()

def get_connection():
    try:
        conn = oracledb.connect(
            user="scott",
            password="tiger1",
            dsn="localhost:1521/orcl"
        )
        print("✅ Connected to Oracle")
        return conn
    except Exception as e:
        print("❌ Connection failed:", e)
        return None


def execute_insert(cursor, query, data, table_name):
    try:
        cursor.executemany(query, data)
        print(f"✅ Inserted into {table_name}: {len(data)} rows")
    except Exception as e:
        print(f"❌ Insert failed for {table_name}:", e)

def generate_users(n):
    return [
        (
            fake.user_name(), fake.email(), fake.boolean(80),
            fake.date_time_between(start_date="-2y", end_date="now"),
            i+1, fake.date_time_between(start_date="-2y", end_date="now")
        )
        for i in range(n)
    ]

def generate_brands(n, user_ids):
    return [
        (
            fake.company(), fake.country(),
            fake.date_time_between(start_date="-2y", end_date="now"),
            random.choice(user_ids),
            fake.date_time_between(start_date="-2y", end_date="now")
        )
        for _ in range(n)
    ]

def generate_manufacturers(n, user_ids):
    return [
        (
            fake.company(), fake.word(),
            fake.date_time_between(start_date="-2y", end_date="now"),
            random.choice(user_ids),
            fake.date_time_between(start_date="-2y", end_date="now")
        )
        for _ in range(n)
    ]

def generate_products(n, user_ids):
    return [
        (
            fake.sentence(nb_words=4)[:-1], fake.paragraph(nb_sentences=2),
            round(random.uniform(10.0, 1000.0), 2),
            fake.date_time_between(start_date="-2y", end_date="now"),
            random.choice(user_ids),
            fake.date_time_between(start_date="-2y", end_date="now")
        )
        for _ in range(n)
    ]

def generate_sellers(user_ids):
    return [
        (
            user_id,
            fake.date_time_between(start_date="-1y", end_date="now"),
            fake.date_time_between(start_date="-1y", end_date="now"),
            random.choice(user_ids),
            fake.date_time_between(start_date="-1y", end_date="now")
        )
        for user_id in user_ids
    ]

def generate_buyers(user_ids):
    return [
        (
            user_id,
            fake.date_time_between(start_date="-1y", end_date="now"),
            fake.date_time_between(start_date="-1y", end_date="now"),
            random.choice(user_ids),
            fake.date_time_between(start_date="-1y", end_date="now")
        )
        for user_id in user_ids
    ]

def generate_ratings(n, product_ids, user_ids):
    return [
        (
            random.choice(product_ids),
            round(random.uniform(1.0, 5.0), 2),
            fake.date_time_between(start_date="-1y", end_date="now"),
            random.choice(user_ids),
            fake.date_time_between(start_date="-1y", end_date="now")
        )
        for _ in range(n)
    ]

def generate_orders(n, buyer_ids):
    return [
        (
            random.choice(buyer_ids),
            fake.date_time_between(start_date="-1y", end_date="now"),
            round(random.uniform(50.0, 1000.0), 2),
            fake.date_time_between(start_date="-1y", end_date="now")
        )
        for _ in range(n)
    ]

def generate_order_items(order_ids, product_ids, seller_ids):
    return [
        (
            random.choice(order_ids),
            random.choice(product_ids),
            random.choice(seller_ids),
            random.randint(1, 5),
            round(random.uniform(10.0, 200.0), 2),
            round(random.uniform(1.0, 30.0), 2),
            fake.date_time_between(start_date="-1y", end_date="now")
        )
        for _ in range(len(order_ids))
    ]

def generate_clickstreams(user_ids, product_ids, order_ids):
    events = []
    for user in user_ids:
        for _ in range(random.randint(5, 15)):
            event_type = random.choice(["view", "add_to_cart", "purchase"])
            timestamp = fake.date_time_between(start_date="-1y", end_date="now")
            events.append((
                user,
                event_type,
                random.choice(product_ids) if event_type != "purchase" else None,
                random.choice(order_ids) if event_type == "purchase" else None,
                timestamp, timestamp
            ))
    return events

def main():
    conn = get_connection()
    cur = conn.cursor()

    # Step 1: Insert Users
    users = generate_users(NUM_USERS)
    execute_insert(cur, '''
        INSERT INTO AppUser (username, email, is_active, created_ts, last_updated_by, last_updated_ts)
        VALUES (:1, :2, :3, :4, :5, :6)
    ''', users, "AppUser")
    conn.commit()
    cur.execute("SELECT user_id FROM AppUser")
    user_ids = [row[0] for row in cur.fetchall()]

    # Step 2: Brands and Manufacturers
    execute_insert(cur, '''
        INSERT INTO Brand (name, country, created_ts, last_updated_by, last_updated_ts)
        VALUES (:1, :2, :3, :4, :5)
    ''', generate_brands(NUM_BRANDS, user_ids), "Brand")

    execute_insert(cur, '''
        INSERT INTO Manufacturer (name, type, created_ts, last_updated_by, last_updated_ts)
        VALUES (:1, :2, :3, :4, :5)
    ''', generate_manufacturers(NUM_MANUFACTURERS, user_ids), "Manufacturer")
    conn.commit()

    # Step 3: Products
    products = generate_products(NUM_PRODUCTS, user_ids)
    execute_insert(cur, '''
        INSERT INTO Product (name, description, price, created_ts, last_updated_by, last_updated_ts)
        VALUES (:1, :2, :3, :4, :5, :6)
    ''', products, "Product")
    conn.commit()
    cur.execute("SELECT product_id FROM Product")
    product_ids = [row[0] for row in cur.fetchall()]

    # Step 4: Sellers, Buyers
    execute_insert(cur, '''
        INSERT INTO Seller (user_id, first_time_sold_timestamp, created_ts, last_updated_by, last_updated_ts)
        VALUES (:1, :2, :3, :4, :5)
    ''', generate_sellers(user_ids), "Seller")
    execute_insert(cur, '''
        INSERT INTO Buyer (user_id, first_time_purchased_timestamp, created_ts, last_updated_by, last_updated_ts)
        VALUES (:1, :2, :3, :4, :5)
    ''', generate_buyers(user_ids), "Buyer")
    conn.commit()
    cur.execute("SELECT seller_id FROM Seller")
    seller_ids = [row[0] for row in cur.fetchall()]
    cur.execute("SELECT buyer_id FROM Buyer")
    buyer_ids = [row[0] for row in cur.fetchall()]

    # Step 5: Ratings
    execute_insert(cur, '''
        INSERT INTO Ratings (product_id, rating, created_ts, last_updated_by, last_updated_ts)
        VALUES (:1, :2, :3, :4, :5)
    ''', generate_ratings(NUM_RATINGS, product_ids, user_ids), "Ratings")
    conn.commit()

    # Step 6: Orders & OrderItems
    orders = generate_orders(NUM_ORDERS, buyer_ids)
    execute_insert(cur, '''
        INSERT INTO Orders (buyer_id, order_ts, total_price, created_ts)
        VALUES (:1, :2, :3, :4)
    ''', orders, "Orders")
    conn.commit()
    cur.execute("SELECT order_id FROM Orders")
    order_ids = [row[0] for row in cur.fetchall()]

    execute_insert(cur, '''
        INSERT INTO OrderItem (order_id, product_id, seller_id, quantity, base_price, tax, created_ts)
        VALUES (:1, :2, :3, :4, :5, :6, :7)
    ''', generate_order_items(order_ids, product_ids, seller_ids), "OrderItem")
    conn.commit()

    # Step 7: Clickstream
    execute_insert(cur, '''
        INSERT INTO Clickstream (user_id, event_type, product_id, order_id, timestamp, created_ts)
        VALUES (:1, :2, :3, :4, :5, :6)
    ''', generate_clickstreams(user_ids, product_ids, order_ids), "Clickstream")
    conn.commit()

    cur.close()
    conn.close()
    print("🎉 All data generated and loaded successfully.")

if __name__ == "__main__":
    main()
