In [3]:
from faker import Faker
import pandas as pd
import random
import numpy as np

In [4]:
fake = Faker("en_IN")
Faker.seed(42)
random.seed(42)
np.random.seed(42)

In [5]:
# Parameters
NUM_CUSTOMERS = 100
NUM_PRODUCTS = 50
NUM_ORDERS = 700
NUM_FEEDBACK = 350

In [6]:
customers = []
for i in range(1, NUM_CUSTOMERS + 1):
    customers.append({
        "customer_id": i,
        "customer_name": fake.name(),  
        "customer_type": random.choice(["Retailer", "Wholesaler", "Key Account"]),
        "region": fake.state()  
    })

In [7]:
customers_df = pd.DataFrame(customers)
print(customers_df.head())

   customer_id  customer_name customer_type          region
0            1  Aryan Maharaj   Key Account         Haryana
1            2   Liam Chaudry      Retailer          Sikkim
2            3    Pahal Balay      Retailer  Andhra Pradesh
3            4     Tejas Kaul   Key Account         Mizoram
4            5   Rushil Saini    Wholesaler         Haryana


In [16]:
customers_df.to_csv("customers.csv", index=False)

In [6]:
# -------------------- Generate Products --------------------
products = []
for i in range(1, NUM_PRODUCTS + 1):
    products.append({
        "product_id": i,
        "product_name": fake.word().capitalize() + " Product",
        "product_weight_kg": round(random.uniform(1.0, 50.0), 2)
    })
products_df = pd.DataFrame(products)

In [7]:
products_df.head()

Unnamed: 0,product_id,product_name,product_weight_kg
0,1,Boy Product,49.49
1,2,Pick Product,32.36
2,3,Leader Product,28.29
3,4,Hair Product,34.55
4,5,Shoulder Product,42.3


In [17]:
products_df.to_csv("products.csv", index=False)

In [8]:
# -------------------- Generate Orders --------------------
orders = []
for i in range(1, NUM_ORDERS + 1):
    orders.append({
        "order_id": i,
        "customer_id": random.randint(1, NUM_CUSTOMERS),
        "order_date": fake.date_between(start_date='-1y', end_date='today'),
        "total_quantity": random.randint(1, 100)
    })
orders_df = pd.DataFrame(orders)

In [9]:
orders_df.head()

Unnamed: 0,order_id,customer_id,order_date,total_quantity
0,1,93,2024-08-29,34
1,2,65,2024-12-21,98
2,3,23,2024-07-18,65
3,4,14,2024-10-27,81
4,5,39,2024-05-14,82


In [18]:
orders_df.to_csv("orders.csv", index=False)

In [10]:
# -------------------- Generate Order_Product --------------------
order_product = []
for i in range(1, NUM_ORDERS + 1):
    num_items = random.randint(1, 4)
    selected_products = random.sample(range(1, NUM_PRODUCTS + 1), num_items)
    for pid in selected_products:
        order_product.append({
            "order_id": i,
            "product_id": pid,
            "quantity_ordered": random.randint(1, 10)
        })
order_product_df = pd.DataFrame(order_product)

In [11]:
order_product_df.head()

Unnamed: 0,order_id,product_id,quantity_ordered
0,1,37,2
1,1,14,9
2,2,38,4
3,2,14,6
4,3,39,5


In [19]:
order_product_df.to_csv("order_product.csv", index=False)

In [12]:
# -------------------- Generate Deliveries --------------------
deliveries = []
for order in orders:
    delivery_date = fake.date_between(start_date=order["order_date"], end_date='+10d')
    deliveries.append({
        "delivery_id": order["order_id"],
        "order_id": order["order_id"],
        "delivery_date": delivery_date,
        "delivery_status": random.choice(["On Time", "Delayed"]),
        "is_full_delivery": random.choice([True, False]),
        "warehouse_id": f"WH_{random.randint(1, 10):03d}",
        "distance_from_hub_km": round(random.uniform(50, 300), 2),
        "flood_affected": random.choice([True, False])
    })
deliveries_df = pd.DataFrame(deliveries)

In [13]:
deliveries_df.head()

Unnamed: 0,delivery_id,order_id,delivery_date,delivery_status,is_full_delivery,warehouse_id,distance_from_hub_km,flood_affected
0,1,1,2025-03-27,Delayed,True,WH_002,90.5,True
1,2,2,2025-03-20,Delayed,False,WH_005,227.61,False
2,3,3,2024-12-25,Delayed,False,WH_006,236.97,False
3,4,4,2024-11-06,Delayed,True,WH_003,263.55,True
4,5,5,2024-07-25,On Time,False,WH_002,102.64,False


In [20]:
deliveries_df.to_csv("deliveries.csv", index=False)

In [14]:
# -------------------- Generate Feedback --------------------
feedbacks = []
feedback_order_ids = random.sample(range(1, NUM_ORDERS + 1), NUM_FEEDBACK)
for i in range(1, NUM_FEEDBACK + 1):
    order_id = feedback_order_ids[i-1]
    feedbacks.append({
        "feedback_id": i,
        "customer_id": orders_df.loc[orders_df['order_id'] == order_id, 'customer_id'].values[0],
        "order_id": order_id,
        "feedback_date": fake.date_between(start_date=orders_df.loc[orders_df['order_id'] == order_id, 'order_date'].values[0], end_date='today'),
        "satisfaction_score": random.randint(1, 5),
        "complaint_type": random.choice(["Late Delivery", "Incomplete Order", "Damaged Goods", "None"]),
        "renewal_status": random.choice(["Renewed", "Not Renewed"])
    })
feedback_df = pd.DataFrame(feedbacks)

In [15]:
feedback_df.head()

Unnamed: 0,feedback_id,customer_id,order_id,feedback_date,satisfaction_score,complaint_type,renewal_status
0,1,53,163,2025-03-15,1,Damaged Goods,Renewed
1,2,36,619,2024-11-10,4,Incomplete Order,Renewed
2,3,46,589,2024-11-24,2,Late Delivery,Renewed
3,4,84,318,2025-01-09,5,Damaged Goods,Not Renewed
4,5,33,602,2025-03-23,4,,Not Renewed


In [21]:
feedback_df.to_csv("feedback.csv", index=False)