In [3]:
# We'll use the following columns to generate a synthetic dataset with 200,000 rows of data over a five-year period 
# for an online store dealing in sales of smartphones and accessories.


# 1. Date
# 2. Sales Amount
# 3. Product or Service
# 4. Quantity Sold
# 5. Price
# 6. Customer Demographics
# 7. Amount spent in Marketing Spend
# 8. Promotions and Discounts
# 9. Economic Indicators
# 10. Competitor Information

# Here's a Python script to generate this dataset

# Importing Libraries
import numpy as np
import pandas as pd
from faker import Faker
from datetime import timedelta, datetime

# Initiaize faker
fake = Faker()

# Define Parameters
num_rows = 200000
start_date = datetime(2019,1,1)
end_date = datetime(2023,12,31)
date_range = pd.date_range(start = start_date, end = end_date, freq = 'D')

# Generate Dates
dates = np.random.choice(date_range, num_rows)

# Generate Product Data
products = ['Iphone', 'Samsung', 'Sony', 'Windows', 'Earpods', 'Charger', 'Covers']
product = np.random.choice(products, num_rows)

# Generate Prices
price_dict = {
    'Iphone': 2100, 'Samsung': 1590, 'Sony': 1000, 'Windows': 980, 'Earpods': 60, 'Charger': 14, 'Covers': 25
}
price = [price_dict[p] for p in product]

# Generate quantity
quantity = np.random.randint(1,7, num_rows)

# Generate Sales Amount
sales_amount = [q * p for q,p in zip(quantity, price)]

# Generate customer demographics
age = np.random.randint(18,70, num_rows)
gender = np.random.choice(['Male', 'Female'], num_rows)
location = [fake.city() for _ in range(num_rows)]

# Generate amount spent on marketing
marketing = np.random.uniform(.5, 1, num_rows)

# Generate Promotions and discount
promotions = np.random.choice(['None', '10% Off', '20% Off', 'Buy One Get One Free'], num_rows)
discounts = [0 if promo == 'None' or promo == 'Buy One Get One Free' else float(promo.split('%')[0]) / 100 * price[i] for i, promo in enumerate(promotions)]

# Generate Economic Indicators
gdp_growth = np.random.uniform(1.5, 3.5, num_rows)
inflation_rate = np.random.uniform(1, 3, num_rows)

# Generate Competitor information
competitor_prices = [price[i] * np.random.uniform(0.9, 1.1) for i in range(num_rows)]
competitor_launch = np.random.choice([True, False], num_rows, p=[0.1, 0.9])

# Create DataFrame
data = {
    'Date': dates,
    'Product ': product,
    'Quantity Sold': quantity,
    'Price': price,
    'Sales Amount': sales_amount,
    'Customer Age': age,
    'Customer Gender': gender,
    'Customer Location': location,
    'Marketing Cost': marketing,
    'Promotions and Discounts': promotions,
    'Discount Amount': discounts,
    'GDP Growth': gdp_growth,
    'Inflation Rate': inflation_rate,
    'Competitor Price': competitor_prices,
    'Competitor Launch': competitor_launch
}

df = pd.DataFrame(data)

# Save to csv
df.to_csv('phones_and_accessories_sales_data.csv', index=False)

df


Unnamed: 0,Date,Product,Quantity Sold,Price,Sales Amount,Customer Age,Customer Gender,Customer Location,Marketing Cost,Promotions and Discounts,Discount Amount,GDP Growth,Inflation Rate,Competitor Price,Competitor Launch
0,2022-03-29,Charger,2,14,28,50,Male,Ramseyfurt,0.664894,,0.0,3.240903,2.557529,14.757094,False
1,2022-03-30,Covers,1,25,25,43,Female,Floresland,0.778824,,0.0,2.541638,1.280799,26.569694,False
2,2022-01-11,Charger,6,14,84,30,Female,Laurastad,0.986713,,0.0,2.372445,2.759502,15.028323,True
3,2021-02-28,Earpods,6,60,360,26,Male,South Jeffrey,0.643637,,0.0,3.134899,2.195626,61.430625,False
4,2020-11-13,Windows,5,980,4900,21,Female,Sabrinaton,0.877316,Buy One Get One Free,0.0,1.728475,2.668554,1064.825643,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199995,2022-11-14,Sony,5,1000,5000,47,Male,Port Joanborough,0.846039,,0.0,1.589290,1.835124,911.502066,True
199996,2022-10-14,Earpods,5,60,300,64,Female,Turnertown,0.693099,Buy One Get One Free,0.0,2.215004,1.456387,56.268889,False
199997,2019-01-06,Charger,2,14,28,44,Female,North Karen,0.805530,Buy One Get One Free,0.0,3.496444,2.708249,14.251086,True
199998,2019-08-08,Earpods,2,60,120,40,Female,Joannatown,0.860348,Buy One Get One Free,0.0,3.151685,1.637726,58.585310,False
