In [1]:
import pandas as pd
import numpy as np

In [2]:
# Set seed for reproducibility
np.random.seed(42)

# Define the basic structure of the dataset
departments = ["Electronics", "Clothing", "Home", "Toys", "Grocery"]
categories = {
    "Electronics": ["Phones", "Computers", "TVs"],
    "Clothing": ["Men", "Women", "Kids"],
    "Home": ["Furniture", "Appliances", "Kitchen"],
    "Toys": ["Outdoor", "Educational", "Games"],
    "Grocery": ["Fresh", "Canned", "Beverages"]
}

# Generate unique IDs
num_products = 10000
product_ids = np.arange(1, num_products + 1)

# Generate department, category, subcategory, and other product-related columns
product_data = []
for prod_id in product_ids:
    dept = np.random.choice(departments)
    category = np.random.choice(categories[dept])
    subcategory = f"{category}_SubCat_{np.random.randint(1, 10)}"
    
    product_data.append({
        "department": dept,
        "category": category,
        "category_id": f"{dept[:3].upper()}_{category[:3].upper()}",
        "subcategory": subcategory,
        "subcategory_id": f"{category[:3].upper()}_{subcategory[-1]}",
        "product": f"{category}_Product_{prod_id}",
        "product_id": f"SKU_{prod_id}",
        "cost": round(np.random.uniform(5, 500), 2),
        "100_stock": np.random.randint(50, 5000)
    })


In [None]:
tiendas = {
    '100':'Dist. Center',
    '101':'Madrid',
    '102':'Andalucia',
    '103':'Aragon',
    '104':'Asturias',
    '105':'Canarias',
    '106':'Galicia',
    '107':'Murcia',
    '108':'Cataluña',
    '109':'País Vasco',
    '110':'Ceuta',
    '111':'La Rioja',
    '112':'Castilla-La Mancha',
    '113':'Castilla y Leon',
    '114':'Principado de Asturias',
    '115':'Baleares',
    '116':'Cantabria',
    '117':'Comunidad Valenciana',
    '118':'Melilla',
    '119':'Madrid_Las Rozas',
    '120':'Madrid_SS de los Reyes'
}

In [4]:

# Convert product data to DataFrame
df = pd.DataFrame(product_data)

# Define store IDs and add stock, transfer, and sales for each store
store_columns = []
store_ids = [f'{id}' for id,tienda in tiendas.items() if id not in('100')]

for store_id in store_ids:
    df[f"{store_id}_stock"] = np.random.randint(0, 300, size=num_products)
    df[f"{store_id}_transfer"] = np.random.randint(0,100, size=num_products)
    df[f"{store_id}_sales"] = np.random.randint(0, 200, size=num_products)
    # store_columns.extend([f"{store_id}_stock", f"{store_id}_transfer", f"{store_id}_sales"])

# Check resulting dataframe shape and a sample of data
df.head()

Unnamed: 0,department,category,category_id,subcategory,subcategory_id,product,product_id,cost,100_stock,101_stock,...,117_sales,118_stock,118_transfer,118_sales,119_stock,119_transfer,119_sales,120_stock,120_transfer,120_sales
0,Toys,Outdoor,TOY_OUT,Outdoor_SubCat_8,OUT_8,Outdoor_Product_1,SKU_1,301.34,516,95,...,132,7,65,17,19,51,123,209,60,183
1,Home,Kitchen,HOM_KIT,Kitchen_SubCat_8,KIT_8,Kitchen_Product_2,SKU_2,170.19,2969,44,...,120,291,25,127,6,68,77,177,55,84
2,Home,Appliances,HOM_APP,Appliances_SubCat_5,APP_5,Appliances_Product_3,SKU_3,485.11,2483,290,...,2,188,61,166,87,68,43,61,15,22
3,Toys,Outdoor,TOY_OUT,Outdoor_SubCat_1,OUT_1,Outdoor_Product_4,SKU_4,155.6,4167,257,...,199,57,3,119,21,32,143,195,87,131
4,Grocery,Fresh,GRO_FRE,Fresh_SubCat_1,FRE_1,Fresh_Product_5,SKU_5,264.76,2608,258,...,136,156,9,14,148,64,89,190,70,108


In [5]:
df.to_csv('df_retail.csv')