In [2]:
import pandas as pd 
import numpy as np 
from datetime import timedelta 

In [10]:
np.random.seed(42)
# Number of customers
n_customers = 600

customer_ids = range(1,n_customers +1)
signup_dates = pd.to_datetime(
    np.random.choice(
        pd.date_range("2021-01-01", "2023-12-31"),
        n_customers
    )
)

regions = np.random.choice (["North", "South", "East", "West"],
                            n_customers,
                            p =[0.3, 0.25, 0.25, 0.2]
                           )

segments = np.random.choice(
    ["HR_HL", "HR_LL", "LR_HL", "LR_LL"],
    n_customers,
    p = [0.2, 0.2, 0.3, 0.3]
)

Customers = pd.DataFrame({
    "customer_id" : customer_ids,
    "signup_date":signup_dates,
     "region" : regions,
    "customer_segment" : segments
})

    



In [16]:
# Save to CSV
Customers.to_csv("C:/Users/Abhi/Desktop/Projects/customer-loyalty-mysql/Data/Customers.csv" ,index = False )


In [18]:
Customers.head()

Unnamed: 0,customer_id,signup_date,region,customer_segment
0,1,2023-05-11,East,HR_LL
1,2,2023-11-11,West,LR_HL
2,3,2021-05-02,West,LR_HL
3,4,2022-04-12,North,LR_LL
4,5,2021-11-27,West,HR_HL


In [22]:
np.random.seed(42)

Customers = pd.read_csv("C:/Users/Abhi/Desktop/Projects/customer-loyalty-mysql/Data/Customers.csv")
Customers["signup_date"] = pd.to_datetime(Customers["signup_date"])

Customers.head()

Unnamed: 0,customer_id,signup_date,region,customer_segment
0,1,2023-05-11,East,HR_LL
1,2,2023-11-11,West,LR_HL
2,3,2021-05-02,West,LR_HL
3,4,2022-04-12,North,LR_LL
4,5,2021-11-27,West,HR_HL


In [24]:
n_products = 40 

product_ids = range(1,n_products+1)

categories = np.random.choice (
    ["Electronics", "Clothing", "Home", "Sports", "Books"],
    n_products
)

cost_price = np.random.uniform(200,3000,n_products).round(2)
selling_price  = (cost_price * np.random.uniform(1.2 ,1.8,n_products)).round(2)

Products = pd.DataFrame({
    "product_id": product_ids,
    "category": categories,
    "cost_price": cost_price,
    "selling_price": selling_price
})


In [26]:
Products.to_csv("C:/Users/Abhi/Desktop/Projects/customer-loyalty-mysql/Data/Products.csv" ,index = False )

In [120]:
Products.head()

Unnamed: 0,product_id,category,cost_price,selling_price
0,1,Sports,2856.88,4826.06
1,2,Books,2903.77,4716.05
2,3,Home,2463.51,4033.76
3,4,Books,1052.92,1750.76
4,5,Books,473.48,589.21


In [86]:
orders = []
order_id = 1

segment_behavior = {
    "HR_HL": (25, 0.05),   # many orders, low discount
    "HR_LL": (8, 0.30),    # few orders, high discount
    "LR_HL": (18, 0.10),   # frequent small orders
    "LR_LL": (3, 0.20)     # one-time buyers
}

for _, row in Customers.iterrows():
    max_orders, discount_prob = segment_behavior[row["customer_segment"]]
    num_orders = np.random.randint(1, max_orders + 1)

    for _ in range(num_orders):
        order_date = row["signup_date"] + timedelta(
            days=np.random.randint(1, 700)
        )

        discount = 0
        if np.random.rand() < discount_prob:
            discount = np.random.uniform(50, 500)

        orders.append([
            order_id,
            row["customer_id"],
            order_date,
            round(discount, 2)
        ])

        order_id += 1

orders = pd.DataFrame(
    orders,
    columns=["order_id", "customer_id", "order_date", "discount_amount"]
)



In [92]:
orders.to_csv("C:/Users/Abhi/Desktop/Projects/customer-loyalty-mysql/Data/orders.csv" ,index = False )

In [118]:
orders.head()

Unnamed: 0,order_id,customer_id,order_date,discount_amount
0,1,1,2024-04-22,335.09
1,2,1,2024-09-01,0.0
2,3,1,2024-07-15,0.0
3,4,1,2023-08-09,126.08
4,5,1,2024-05-05,254.19


In [110]:
order_items = []
order_item_id = 1

for _, order in orders.iterrows():
    n_items = np.random.randint(1, 5)
    selected_products = Products.sample(n_items)

    for _, product in selected_products.iterrows():
        quantity = np.random.randint(1, 4)

        order_items.append([
            order_item_id,
            order["order_id"],
            product["product_id"],
            quantity,
            product["selling_price"]
        ])

        order_item_id += 1

order_items = pd.DataFrame(
    order_items,
    columns=[
        "order_item_id",
        "order_id",
        "product_id",
        "quantity",
        "price"
    ]
)

In [112]:
order_items.to_csv("C:/Users/Abhi/Desktop/Projects/customer-loyalty-mysql/Data/order_items.csv" ,index = False )

In [116]:
order_items.head()

Unnamed: 0,order_item_id,order_id,product_id,quantity,price
0,1,1,22,3,3115.88
1,2,2,17,2,1064.42
2,3,2,13,1,2867.03
3,4,2,14,3,1756.98
4,5,2,19,1,3858.84
