In [2]:
!pip install faker pandas


Collecting faker
  Downloading Faker-18.13.0-py3-none-any.whl (1.7 MB)
     ---------------------------------------- 1.7/1.7 MB 12.1 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-18.13.0


In [2]:
import pandas as pd
from faker import Faker
import random

# Initialize Faker
fake = Faker()

# Number of rows for each dataset
num_rows = 1000

# Generate Customers Dataset
customers_data = []
for i in range(1, num_rows + 1):
    customers_data.append([
        1000 + i,  # customer_id
        random.randint(1950, 2005),  # year_birth
        random.choice(["High School", "Graduate", "PhD", "Associate", "Bachelor", "Master"]),  # education
        random.choice(["Single", "Married", "Divorced", "Widowed"]),  # marital_status
        random.randint(20000, 120000),  # income
        random.randint(0, 3),  # kid_home
        random.randint(0, 3),  # teen_home
        fake.date_between(start_date="-20y", end_date="today"),  # dt_customer (registration date)
        random.randint(1, 30),  # recency (days since last purchase)
        random.randint(0, 500),  # mnt_wines
        random.randint(0, 200),  # mnt_fruits
        random.randint(0, 300),  # mnt_meat_products
        random.randint(0, 150),  # mnt_fish_products
        random.randint(0, 100),  # mnt_sweet_products
        random.randint(0, 100),  # mnt_gold_prods
        random.randint(0, 10),  # num_deals_purchases
        random.randint(0, 10),  # num_web_purchases
        random.randint(0, 10),  # num_catalog_purchases
        random.randint(0, 10),  # num_store_purchases
        random.randint(0, 10),  # num_web_visits_month
        random.randint(50, 100)  # loyalty_score
    ])

customers_df = pd.DataFrame(customers_data, columns=[
    "customer_id", "year_birth", "education", "marital_status", "income",
    "kid_home", "teen_home", "dt_customer", "recency", "mnt_wines",
    "mnt_fruits", "mnt_meat_products", "mnt_fish_products", "mnt_sweet_products",
    "mnt_gold_prods", "num_deals_purchases", "num_web_purchases",
    "num_catalog_purchases", "num_store_purchases", "num_web_visits_month",
    "loyalty_score"
])

# Generate Sales Transactions Dataset
sales_data = []
for i in range(1, num_rows + 1):
    sales_data.append([
        5000 + i,  # transaction_id
        f"CA-2024-{i:04d}",  # order_id
        fake.date_between(start_date="-2y", end_date="today"),  # order_date
        fake.date_between(start_date="-2y", end_date="today"),  # ship_date
        random.choice(["Standard", "Express", "Same Day"]),  # ship_mode
        random.randint(1001, 2000),  # customer_id
        random.choice(["Consumer", "Corporate", "Home Office"]),  # segment
        "USA",  # country
        fake.city(),  # city
        fake.state(),  # state
        f"P{random.randint(3001, 4000)}",  # product_id
        random.choice(["Electronics", "Furniture", "Office Supplies"]),  # category
        random.choice(["Laptops", "Chairs", "Smartphones", "Tables", "Printers"]),  # sub_category
        fake.word().capitalize() + " " + random.choice(["Laptop", "Desk", "Phone", "Printer", "Monitor"]),  # product_name
        round(random.uniform(50, 2500), 2),  # sales amount
        random.randint(1, 5),  # quantity
        round(random.uniform(0, 20), 2),  # discount
        round(random.uniform(50, 2500), 2),  # total_amount
        random.choice(["Credit Card", "Debit Card", "PayPal", "Bank Transfer"]),  # payment_method
        random.choice(["Delivered", "In Transit", "Shipped", "Cancelled"])  # shipping_status
    ])

sales_df = pd.DataFrame(sales_data, columns=[
    "transaction_id", "order_id", "order_date", "ship_date", "ship_mode",
    "customer_id", "segment", "country", "city", "state",
    "product_id", "category", "sub_category", "product_name", "sales",
    "quantity", "discount", "total_amount", "payment_method", "shipping_status"
])

# Generate Products Dataset
products_data = []
for i in range(1, num_rows + 1):
    products_data.append([
        f"P{3000 + i}",  # product_id
        fake.word().capitalize() + " " + random.choice(["Laptop", "Desk", "Phone", "Printer", "Monitor"]),  # product_name
        random.choice(["Electronics", "Furniture", "Office Supplies"]),  # category
        random.choice(["Laptops", "Chairs", "Smartphones", "Tables", "Printers"]),  # sub_category
        random.choice(["Apple", "Dell", "HP", "Lenovo", "IKEA", "Samsung"]),  # brand
        round(random.uniform(50, 2500), 2),  # price
        random.randint(10, 500),  # stock_quantity
        random.randint(0, 30),  # discount
        round(random.uniform(1, 5), 1),  # rating
        random.choice(["Best Buy", "Amazon", "Walmart", "Target", "Newegg"])  # supplier
    ])

products_df = pd.DataFrame(products_data, columns=[
    "product_id", "product_name", "category", "sub_category", "brand",
    "price", "stock_quantity", "discount", "rating", "supplier"
])

# Save as CSV files locally
customers_df.to_csv("customers_synthetic.csv", index=False)
sales_df.to_csv("sales_synthetic.csv", index=False)
products_df.to_csv("products_synthetic.csv", index=False)

print("✅ Synthetic datasets generated and saved as CSV files:")
print("- customers_synthetic.csv")
print("- sales_synthetic.csv")
print("- products_synthetic.csv")


✅ Synthetic datasets generated and saved as CSV files:
- customers_synthetic.csv
- sales_synthetic.csv
- products_synthetic.csv


In [6]:
import pandas as pd

# Expand Pandas display settings for better visualization
pd.set_option('display.max_rows', 20)  # Adjust the number of rows shown
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 1000)  # Avoid column truncation
pd.set_option('display.colheader_justify', 'center')

# Load Customers Data
customers_df = pd.read_csv(r"C:\Users\masood\Downloads\Data Warehouse Project\Raw Data Folder\customers_synthetic.csv")

# Load Products Data
products_df = pd.read_csv(r"C:\Users\masood\Downloads\Data Warehouse Project\Raw Data Folder\products_synthetic.csv")

# Load Sales Transactions Data
sales_df = pd.read_csv(r"C:\Users\masood\Downloads\Data Warehouse Project\Raw Data Folder\sales_synthetic.csv")

# Display first 10 rows of each dataset with all columns
print("📌 Customers Dataset:")
display(customers_df.head(10))

print("\n📌 Products Dataset:")
display(products_df.head(10))

print("\n📌 Sales Transactions Dataset:")
display(sales_df.head(10))


📌 Customers Dataset:


Unnamed: 0,customer_id,year_birth,education,marital_status,income,kid_home,teen_home,dt_customer,recency,mnt_wines,mnt_fruits,mnt_meat_products,mnt_fish_products,mnt_sweet_products,mnt_gold_prods,num_deals_purchases,num_web_purchases,num_catalog_purchases,num_store_purchases,num_web_visits_month,loyalty_score
0,1001,2002,Associate,Married,58545,2,3,2005-07-12,21,318,144,173,131,89,92,8,8,5,8,0,51
1,1002,1977,Master,Single,105408,3,3,2018-09-19,11,422,193,243,8,46,29,7,1,0,5,8,80
2,1003,1973,Master,Divorced,116851,1,1,2019-02-07,1,141,145,128,110,4,67,4,4,8,6,5,60
3,1004,1984,Master,Married,78974,3,2,2014-06-28,8,131,32,209,97,14,8,7,10,9,4,8,87
4,1005,1988,Graduate,Divorced,110761,1,1,2007-07-17,3,57,56,273,135,25,100,4,6,7,4,3,59
5,1006,1963,High School,Married,113796,2,0,2017-10-23,6,249,81,188,48,55,99,2,7,3,7,10,54
6,1007,1978,Bachelor,Married,118203,2,3,2008-04-28,5,386,15,163,14,26,91,1,3,3,1,0,85
7,1008,1988,High School,Widowed,30909,2,3,2008-03-01,8,329,114,77,127,90,7,1,6,3,1,8,80
8,1009,1990,PhD,Single,79314,1,1,2025-02-27,26,383,21,260,36,94,26,9,9,1,4,7,98
9,1010,1972,Associate,Divorced,58952,2,2,2006-09-14,13,111,100,20,57,93,76,6,7,5,0,5,51



📌 Products Dataset:


Unnamed: 0,product_id,product_name,category,sub_category,brand,price,stock_quantity,discount,rating,supplier
0,P3001,Fight Desk,Furniture,Smartphones,HP,2208.51,248,24,2.9,Target
1,P3002,Threat Monitor,Electronics,Laptops,Lenovo,694.67,16,13,1.6,Walmart
2,P3003,No Laptop,Office Supplies,Laptops,IKEA,2110.28,290,3,4.6,Amazon
3,P3004,Buy Monitor,Office Supplies,Laptops,Dell,763.3,127,3,4.8,Newegg
4,P3005,Describe Laptop,Office Supplies,Printers,HP,394.72,147,12,1.8,Best Buy
5,P3006,Instead Phone,Office Supplies,Chairs,HP,1704.11,423,2,3.4,Target
6,P3007,Which Monitor,Electronics,Chairs,Samsung,2308.4,314,29,4.7,Walmart
7,P3008,Less Laptop,Furniture,Printers,IKEA,1503.83,130,25,3.1,Walmart
8,P3009,Idea Desk,Furniture,Smartphones,Dell,253.8,131,12,2.0,Newegg
9,P3010,Civil Desk,Office Supplies,Tables,HP,1738.22,336,11,4.0,Target



📌 Sales Transactions Dataset:


Unnamed: 0,transaction_id,order_id,order_date,ship_date,ship_mode,customer_id,segment,country,city,state,product_id,category,sub_category,product_name,sales,quantity,discount,total_amount,payment_method,shipping_status
0,5001,CA-2024-0001,2025-01-19,2024-03-10,Same Day,1556,Consumer,USA,Port David,Illinois,P3470,Furniture,Chairs,Other Phone,2202.06,1,10.41,1654.07,Bank Transfer,Shipped
1,5002,CA-2024-0002,2025-02-13,2023-07-23,Same Day,1526,Consumer,USA,Johnsonhaven,Indiana,P3066,Electronics,Smartphones,Those Phone,2254.52,4,0.89,1677.33,Bank Transfer,Cancelled
2,5003,CA-2024-0003,2024-01-17,2023-10-24,Standard,1029,Consumer,USA,West Cindy,Wyoming,P3115,Furniture,Tables,Serve Monitor,482.08,2,12.81,2041.51,Debit Card,Shipped
3,5004,CA-2024-0004,2023-07-30,2024-03-10,Standard,1747,Corporate,USA,Shannonbury,Colorado,P3105,Furniture,Printers,Different Printer,1230.33,3,8.83,764.96,Credit Card,In Transit
4,5005,CA-2024-0005,2023-05-23,2023-11-17,Express,1224,Home Office,USA,Port Travisbury,Georgia,P3224,Office Supplies,Laptops,Cell Phone,722.98,3,10.91,2494.89,PayPal,Delivered
5,5006,CA-2024-0006,2024-08-07,2024-03-09,Same Day,1965,Consumer,USA,Moranfort,California,P3895,Office Supplies,Laptops,Election Monitor,1756.57,3,8.6,193.41,Bank Transfer,Cancelled
6,5007,CA-2024-0007,2023-08-03,2023-07-20,Express,1700,Home Office,USA,Port Daniellefurt,Virginia,P3602,Furniture,Chairs,Gun Monitor,2264.59,3,0.98,1222.93,PayPal,Delivered
7,5008,CA-2024-0008,2023-07-28,2023-06-22,Standard,1249,Corporate,USA,Hopkinsburgh,Arizona,P3183,Electronics,Smartphones,That Phone,167.09,4,10.16,1737.84,Bank Transfer,Shipped
8,5009,CA-2024-0009,2023-03-28,2023-09-30,Standard,1174,Corporate,USA,West Kevinport,Michigan,P3311,Furniture,Smartphones,Want Printer,2183.7,1,17.79,670.76,PayPal,In Transit
9,5010,CA-2024-0010,2023-05-26,2024-08-24,Standard,1487,Home Office,USA,Melissaton,New York,P3122,Electronics,Printers,Area Desk,1681.9,2,2.44,1924.22,Debit Card,Delivered


In [8]:
import pandas as pd
from IPython.core.display import display, HTML

# Expand Pandas display settings
pd.set_option('display.max_rows', 20)  # Adjust the number of rows shown
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 1000)  # Avoid column truncation
pd.set_option('display.colheader_justify', 'center')

# Update file paths to reflect new names
customers_df = pd.read_csv(r"C:\Users\masood\Downloads\Data Warehouse Project\Raw Data Folder\customers_walmart.csv")
products_df = pd.read_csv(r"C:\Users\masood\Downloads\Data Warehouse Project\Raw Data Folder\products_walmart.csv")
sales_df = pd.read_csv(r"C:\Users\masood\Downloads\Data Warehouse Project\Raw Data Folder\sales_walmart.csv")

# Function to style and display datasets
def display_dataframe(title, df):
    html = f"<h3 style='color:blue; font-size:18px;'>{title}</h3>"
    display(HTML(html))
    display(df.head(10).style.set_table_styles([
        {'selector': 'th', 'props': [('background-color', '#404040'), ('color', 'white'), ('font-weight', 'bold')]},
        {'selector': 'td', 'props': [('border', '1px solid black')]}
    ]))

# Display each dataset with styling
display_dataframe("📌 Customers Walmart Dataset", customers_df)
display_dataframe("📌 Products Walmart Dataset", products_df)
display_dataframe("📌 Sales Walmart Dataset", sales_df)


Unnamed: 0,customer_id,year_birth,education,marital_status,income,kid_home,teen_home,dt_customer,recency,mnt_wines,mnt_fruits,mnt_meat_products,mnt_fish_products,mnt_sweet_products,mnt_gold_prods,num_deals_purchases,num_web_purchases,num_catalog_purchases,num_store_purchases,num_web_visits_month,loyalty_score
0,1001,2002,Associate,Married,58545,2,3,2005-07-12,21,318,144,173,131,89,92,8,8,5,8,0,51
1,1002,1977,Master,Single,105408,3,3,2018-09-19,11,422,193,243,8,46,29,7,1,0,5,8,80
2,1003,1973,Master,Divorced,116851,1,1,2019-02-07,1,141,145,128,110,4,67,4,4,8,6,5,60
3,1004,1984,Master,Married,78974,3,2,2014-06-28,8,131,32,209,97,14,8,7,10,9,4,8,87
4,1005,1988,Graduate,Divorced,110761,1,1,2007-07-17,3,57,56,273,135,25,100,4,6,7,4,3,59
5,1006,1963,High School,Married,113796,2,0,2017-10-23,6,249,81,188,48,55,99,2,7,3,7,10,54
6,1007,1978,Bachelor,Married,118203,2,3,2008-04-28,5,386,15,163,14,26,91,1,3,3,1,0,85
7,1008,1988,High School,Widowed,30909,2,3,2008-03-01,8,329,114,77,127,90,7,1,6,3,1,8,80
8,1009,1990,PhD,Single,79314,1,1,2025-02-27,26,383,21,260,36,94,26,9,9,1,4,7,98
9,1010,1972,Associate,Divorced,58952,2,2,2006-09-14,13,111,100,20,57,93,76,6,7,5,0,5,51


Unnamed: 0,product_id,product_name,category,sub_category,brand,price,stock_quantity,discount,rating,supplier
0,P3001,Fight Desk,Furniture,Smartphones,HP,2208.51,248,24,2.9,Target
1,P3002,Threat Monitor,Electronics,Laptops,Lenovo,694.67,16,13,1.6,Walmart
2,P3003,No Laptop,Office Supplies,Laptops,IKEA,2110.28,290,3,4.6,Amazon
3,P3004,Buy Monitor,Office Supplies,Laptops,Dell,763.3,127,3,4.8,Newegg
4,P3005,Describe Laptop,Office Supplies,Printers,HP,394.72,147,12,1.8,Best Buy
5,P3006,Instead Phone,Office Supplies,Chairs,HP,1704.11,423,2,3.4,Target
6,P3007,Which Monitor,Electronics,Chairs,Samsung,2308.4,314,29,4.7,Walmart
7,P3008,Less Laptop,Furniture,Printers,IKEA,1503.83,130,25,3.1,Walmart
8,P3009,Idea Desk,Furniture,Smartphones,Dell,253.8,131,12,2.0,Newegg
9,P3010,Civil Desk,Office Supplies,Tables,HP,1738.22,336,11,4.0,Target


Unnamed: 0,transaction_id,order_id,order_date,ship_date,ship_mode,customer_id,segment,country,city,state,product_id,category,sub_category,product_name,sales,quantity,discount,total_amount,payment_method,shipping_status
0,5001,CA-2024-0001,2025-01-19,2024-03-10,Same Day,1556,Consumer,USA,Port David,Illinois,P3470,Furniture,Chairs,Other Phone,2202.06,1,10.41,1654.07,Bank Transfer,Shipped
1,5002,CA-2024-0002,2025-02-13,2023-07-23,Same Day,1526,Consumer,USA,Johnsonhaven,Indiana,P3066,Electronics,Smartphones,Those Phone,2254.52,4,0.89,1677.33,Bank Transfer,Cancelled
2,5003,CA-2024-0003,2024-01-17,2023-10-24,Standard,1029,Consumer,USA,West Cindy,Wyoming,P3115,Furniture,Tables,Serve Monitor,482.08,2,12.81,2041.51,Debit Card,Shipped
3,5004,CA-2024-0004,2023-07-30,2024-03-10,Standard,1747,Corporate,USA,Shannonbury,Colorado,P3105,Furniture,Printers,Different Printer,1230.33,3,8.83,764.96,Credit Card,In Transit
4,5005,CA-2024-0005,2023-05-23,2023-11-17,Express,1224,Home Office,USA,Port Travisbury,Georgia,P3224,Office Supplies,Laptops,Cell Phone,722.98,3,10.91,2494.89,PayPal,Delivered
5,5006,CA-2024-0006,2024-08-07,2024-03-09,Same Day,1965,Consumer,USA,Moranfort,California,P3895,Office Supplies,Laptops,Election Monitor,1756.57,3,8.6,193.41,Bank Transfer,Cancelled
6,5007,CA-2024-0007,2023-08-03,2023-07-20,Express,1700,Home Office,USA,Port Daniellefurt,Virginia,P3602,Furniture,Chairs,Gun Monitor,2264.59,3,0.98,1222.93,PayPal,Delivered
7,5008,CA-2024-0008,2023-07-28,2023-06-22,Standard,1249,Corporate,USA,Hopkinsburgh,Arizona,P3183,Electronics,Smartphones,That Phone,167.09,4,10.16,1737.84,Bank Transfer,Shipped
8,5009,CA-2024-0009,2023-03-28,2023-09-30,Standard,1174,Corporate,USA,West Kevinport,Michigan,P3311,Furniture,Smartphones,Want Printer,2183.7,1,17.79,670.76,PayPal,In Transit
9,5010,CA-2024-0010,2023-05-26,2024-08-24,Standard,1487,Home Office,USA,Melissaton,New York,P3122,Electronics,Printers,Area Desk,1681.9,2,2.44,1924.22,Debit Card,Delivered


In [10]:
import pandas as pd

# Define data dictionary for Customers Dataset
customers_dict = pd.DataFrame({
    "Column Name": ["customer_id", "year_birth", "education", "marital_status", "income",
                    "kid_home", "teen_home", "dt_customer", "recency", "mnt_wines",
                    "mnt_fruits", "mnt_meat_products", "mnt_fish_products", "mnt_sweet_products",
                    "mnt_gold_prods", "num_deals_purchases", "num_web_purchases",
                    "num_catalog_purchases", "num_store_purchases", "num_web_visits_month", "loyalty_score"],
    "Data Type": ["Integer (PK)", "Integer", "String", "String", "Integer",
                  "Integer", "Integer", "Date", "Integer", "Integer",
                  "Integer", "Integer", "Integer", "Integer",
                  "Integer", "Integer", "Integer",
                  "Integer", "Integer", "Integer", "Integer"],
    "Description": ["Unique customer identifier", "Customer’s birth year", "Education level", "Marital status", "Annual income in USD",
                    "Number of kids in household", "Number of teens in household", "Customer registration date", "Days since last purchase",
                    "Amount spent on wines", "Amount spent on fruits", "Amount spent on meat products", "Amount spent on fish products",
                    "Amount spent on sweet products", "Amount spent on gold products", "Number of purchases from deals",
                    "Number of web purchases", "Number of catalog purchases", "Number of in-store purchases", "Number of website visits in last month",
                    "Customer loyalty score"],
    "Example Value": [1001, 1985, "Master", "Married", 75000, 1, 0, "2020-05-15", 10, 200,
                      50, 100, 40, 20, 60, 5, 3, 2, 4, 6, 75]
})

# Define data dictionary for Products Dataset
products_dict = pd.DataFrame({
    "Column Name": ["product_id", "product_name", "category", "sub_category", "brand",
                    "price", "stock_quantity", "discount", "rating", "supplier"],
    "Data Type": ["String (PK)", "String", "String", "String", "String",
                  "Float", "Integer", "Float", "Float", "String"],
    "Description": ["Unique product identifier", "Product name", "Product category", "Product sub-category",
                    "Brand of the product", "Price in USD", "Available stock quantity", "Discount percentage",
                    "Average customer rating", "Supplier name"],
    "Example Value": ["P3001", "Dell XPS 15", "Electronics", "Laptops", "Dell",
                      1200.99, 50, 10.5, 4.5, "Best Buy"]
})

# Define data dictionary for Sales Transactions Dataset
sales_dict = pd.DataFrame({
    "Column Name": ["transaction_id", "order_id", "order_date", "ship_date", "ship_mode",
                    "customer_id", "segment", "country", "city", "state",
                    "product_id", "category", "sub_category", "product_name", "sales",
                    "quantity", "discount", "total_amount", "payment_method", "shipping_status"],
    "Data Type": ["Integer (PK)", "String", "Date", "Date", "String",
                  "Integer (FK)", "String", "String", "String", "String",
                  "String (FK)", "String", "String", "String", "Float",
                  "Integer", "Float", "Float", "String", "String"],
    "Description": ["Unique transaction identifier", "Unique order identifier", "Date of order placement", "Date of shipment",
                    "Shipping mode", "Customer ID (foreign key from Customers table)", "Customer segment",
                    "Country of purchase", "City of purchase", "State of purchase", "Product ID (foreign key from Products table)",
                    "Product category", "Product sub-category", "Product name", "Total sales amount",
                    "Quantity purchased", "Discount percentage", "Total amount after discount",
                    "Payment method used", "Current shipping status"],
    "Example Value": [5001, "CA-2024-001", "2024-01-10", "2024-01-15", "Standard",
                      1001, "Consumer", "USA", "New York", "New York",
                      "P3001", "Electronics", "Laptops", "Dell XPS 15", 1200.50,
                      1, 5, 1140.48, "Credit Card", "Delivered"]
})

# Set the file path where the data dictionary will be saved
data_dictionary_path = r"C:\Users\masood\Downloads\Data Warehouse Project\Data Dictionary\data_dictionary.xlsx"

# Save the data dictionary as an Excel file with multiple sheets
with pd.ExcelWriter(data_dictionary_path) as writer:
    customers_dict.to_excel(writer, sheet_name="Customers", index=False)
    products_dict.to_excel(writer, sheet_name="Products", index=False)
    sales_dict.to_excel(writer, sheet_name="Sales", index=False)

# Confirmation message
print(f"✅ Data dictionary successfully created: {data_dictionary_path}")


✅ Data dictionary successfully created: C:\Users\masood\Downloads\Data Warehouse Project\Data Dictionary\data_dictionary.xlsx


In [12]:
import pandas as pd

# Load renamed FlipKart datasets
customers_df = pd.read_csv(r"C:\Users\masood\Downloads\Data Warehouse Project\Raw Data Folder\Customers_FlipKart.csv")
products_df = pd.read_csv(r"C:\Users\masood\Downloads\Data Warehouse Project\Raw Data Folder\Products_FlipKart.csv")
sales_df = pd.read_csv(r"C:\Users\masood\Downloads\Data Warehouse Project\Raw Data Folder\Sales_FlipKart.csv")

# Display first few rows to verify
print("✅ Customers Data:")
display(customers_df.head())

print("✅ Products Data:")
display(products_df.head())

print("✅ Sales Data:")
display(sales_df.head())


✅ Customers Data:


Unnamed: 0,customer_id,year_birth,education,marital_status,income,kid_home,teen_home,dt_customer,recency,mnt_wines,mnt_fruits,mnt_meat_products,mnt_fish_products,mnt_sweet_products,mnt_gold_prods,num_deals_purchases,num_web_purchases,num_catalog_purchases,num_store_purchases,num_web_visits_month,loyalty_score
0,1001,2002,Associate,Married,58545,2,3,2005-07-12,21,318,144,173,131,89,92,8,8,5,8,0,51
1,1002,1977,Master,Single,105408,3,3,2018-09-19,11,422,193,243,8,46,29,7,1,0,5,8,80
2,1003,1973,Master,Divorced,116851,1,1,2019-02-07,1,141,145,128,110,4,67,4,4,8,6,5,60
3,1004,1984,Master,Married,78974,3,2,2014-06-28,8,131,32,209,97,14,8,7,10,9,4,8,87
4,1005,1988,Graduate,Divorced,110761,1,1,2007-07-17,3,57,56,273,135,25,100,4,6,7,4,3,59


✅ Products Data:


Unnamed: 0,product_id,product_name,category,sub_category,brand,price,stock_quantity,discount,rating,supplier
0,P3001,Fight Desk,Furniture,Smartphones,HP,2208.51,248,24,2.9,Target
1,P3002,Threat Monitor,Electronics,Laptops,Lenovo,694.67,16,13,1.6,Walmart
2,P3003,No Laptop,Office Supplies,Laptops,IKEA,2110.28,290,3,4.6,Amazon
3,P3004,Buy Monitor,Office Supplies,Laptops,Dell,763.3,127,3,4.8,Newegg
4,P3005,Describe Laptop,Office Supplies,Printers,HP,394.72,147,12,1.8,Best Buy


✅ Sales Data:


Unnamed: 0,transaction_id,order_id,order_date,ship_date,ship_mode,customer_id,segment,country,city,state,product_id,category,sub_category,product_name,sales,quantity,discount,total_amount,payment_method,shipping_status
0,5001,CA-2024-0001,2025-01-19,2024-03-10,Same Day,1556,Consumer,USA,Port David,Illinois,P3470,Furniture,Chairs,Other Phone,2202.06,1,10.41,1654.07,Bank Transfer,Shipped
1,5002,CA-2024-0002,2025-02-13,2023-07-23,Same Day,1526,Consumer,USA,Johnsonhaven,Indiana,P3066,Electronics,Smartphones,Those Phone,2254.52,4,0.89,1677.33,Bank Transfer,Cancelled
2,5003,CA-2024-0003,2024-01-17,2023-10-24,Standard,1029,Consumer,USA,West Cindy,Wyoming,P3115,Furniture,Tables,Serve Monitor,482.08,2,12.81,2041.51,Debit Card,Shipped
3,5004,CA-2024-0004,2023-07-30,2024-03-10,Standard,1747,Corporate,USA,Shannonbury,Colorado,P3105,Furniture,Printers,Different Printer,1230.33,3,8.83,764.96,Credit Card,In Transit
4,5005,CA-2024-0005,2023-05-23,2023-11-17,Express,1224,Home Office,USA,Port Travisbury,Georgia,P3224,Office Supplies,Laptops,Cell Phone,722.98,3,10.91,2494.89,PayPal,Delivered
