In [3]:
import pandas as pd
from faker import Faker
import random
from sqlalchemy import create_engine, text

engine = create_engine(
    "mssql+pyodbc://DESKTOP-MHNR294\\SQLEXPRESS/sales_db"
    "?driver=ODBC+Driver+17+for+SQL+Server"
    "&trusted_connection=yes"
)

fake = Faker()


In [None]:
customers = []

for i in range(101, 201):
    customers.append({
        "customer_id": i,
        "name": fake.name(),
        "gender": random.choice(["Male", "Female"]),
        "city": fake.city(),
        "join_date": fake.date_between(start_date='-2y', end_date='today')
    })

customers_df = pd.DataFrame(customers)
customers_df.to_sql(
    "Customers",
    engine,
    schema="dbo",
    if_exists="append",
    index=False
)




✅ 100 customers inserted


In [None]:
categories = [
    {"category_id": 1, "category_name": "Electronics"},
    {"category_id": 2, "category_name": "Clothing"},
    {"category_id": 3, "category_name": "Groceries"},
    {"category_id": 4, "category_name": "Home"}
]

categories_df = pd.DataFrame(categories)
categories_df.to_sql("Categories", engine, if_exists="append", index=False)




✅ Categories inserted


In [None]:
products = []

for i in range(1, 21):
    price = round(random.uniform(20, 500), 2)
    cost = round(price * random.uniform(0.5, 0.8), 2)

    products.append({
        "product_id": i,
        "product_name": fake.word().capitalize(),
        "category_id": random.randint(1, 4),
        "price": price,
        "cost": cost
    })

products_df = pd.DataFrame(products)
products_df.to_sql("Products", engine, if_exists="append", index=False)




✅ 20 products inserted


In [None]:
orders = []

for i in range(1, 1001):
    orders.append({
        "order_id": i,
        "customer_id": random.randint(1, 100),
        "order_date": fake.date_between(start_date='-1y', end_date='today'),
        "total_amount": 0
    })

orders_df = pd.DataFrame(orders)
orders_df.to_sql("Orders", engine, if_exists="append", index=False)




✅ 1000 orders inserted


In [None]:
order_details = []
detail_id = 1

for order_id in range(1, 1001):
    selected_products = random.sample(range(1, 21), random.randint(1, 3))

    for pid in selected_products:
        quantity = random.randint(1, 4)
        unit_price = products_df.loc[
            products_df["product_id"] == pid, "price"
        ].values[0]

        order_details.append({
            "order_detail_id": detail_id,
            "order_id": order_id,
            "product_id": pid,
            "quantity": quantity,
            "unit_price": unit_price
        })

        detail_id += 1

order_details_df = pd.DataFrame(order_details)
order_details_df.to_sql("Order_Details", engine, if_exists="append", index=False)


✅ Order details inserted


In [13]:
order_totals = order_details_df.groupby("order_id").apply(
    lambda x: (x["quantity"] * x["unit_price"]).sum()
).reset_index(name="total_amount")

with engine.begin() as conn:
    for _, row in order_totals.iterrows():
        conn.execute(
            text("""
                UPDATE Orders
                SET total_amount = :total
                WHERE order_id = :order_id
            """),
            {
                "total": float(row.total_amount),
                "order_id": int(row.order_id)
            }
        )

