In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# --- CONFIGURATION ---
NUM_ROWS = 2000
START_DATE = datetime(2023, 1, 1)

# --- LISTS OF DATA ---
regions = ['North', 'South', 'East', 'West']
categories = {
    'Technology': ['Laptop', 'Smartphone', 'Monitor', 'USB Hub', 'Headphones'],
    'Furniture': ['Chair', 'Desk', 'Bookshelf', 'Sofa', 'Table'],
    'Office Supplies': ['Binder', 'Paper', 'Pen Set', 'Stapler', 'Folder']
}
customers = ['Chandan', 'Amit', 'Sarah', 'John', 'Priya', 'Rahul', 'Emily', 'David', 'Sneha', 'Michael']

data = []

print("Generating data... please wait.")

for i in range(1, NUM_ROWS + 1):
    # 1. Random Date (within last 2 years)
    date = START_DATE + timedelta(days=random.randint(0, 700))

    # 2. Random Region & Customer
    region = random.choice(regions)
    customer = random.choice(customers)

    # 3. Random Category & Product
    cat_name = random.choice(list(categories.keys()))
    prod_name = random.choice(categories[cat_name])

    # 4. Math for Sales & Profit
    quantity = random.randint(1, 10)

    # Base prices per category
    if cat_name == 'Technology':
        base_price = random.randint(200, 2000)
        profit_margin = random.uniform(0.10, 0.30) # 10-30% profit
    elif cat_name == 'Furniture':
        base_price = random.randint(100, 1000)
        profit_margin = random.uniform(0.05, 0.20) # 5-20% profit
    else: # Office Supplies
        base_price = random.randint(5, 50)
        profit_margin = random.uniform(0.30, 0.50) # 30-50% profit

    sales = round(base_price * quantity, 2)
    # Occasionally throw in a negative profit (loss) to make it realistic
    if random.random() < 0.10:
        profit = round(sales * -0.10, 2) # 10% loss
    else:
        profit = round(sales * profit_margin, 2)

    data.append([i, date, customer, region, cat_name, prod_name, sales, quantity, profit])

# --- CREATE DATAFRAME ---
columns = ['order_id', 'order_date', 'customer_name', 'region', 'category', 'product_name', 'sales', 'quantity', 'profit']
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv('my_generated_data.csv', index=False)

print(f"SUCCESS! generated {NUM_ROWS} rows.")
print("File saved as: my_generated_data.csv")
print(df.head())

Generating data... please wait.
SUCCESS! generated 2000 rows.
File saved as: my_generated_data.csv
   order_id order_date customer_name region         category product_name  \
0         1 2023-04-24       Michael  North       Technology      USB Hub   
1         2 2023-06-27          John   West       Technology   Smartphone   
2         3 2024-09-23          Amit   West        Furniture         Sofa   
3         4 2024-08-24         Sneha  North       Technology   Smartphone   
4         5 2024-09-20         Rahul  South  Office Supplies        Paper   

   sales  quantity   profit  
0  19930        10  5736.64  
1   5620        10  1418.41  
2    171         1    27.18  
3   9976         8  2004.28  
4     38         2    17.25  
