In [1]:
import pandas as pd

import numpy as np

from datetime import datetime, timedelta

In [2]:
# Generate mock sales data

np.random.seed(42)

start_date = datetime(2020, 1, 1)

num_months = 48 # 4 years of monthly data

date_rng = pd.date_range(start_date, periods=num_months, freq='MS') # MS for Month Start



sales_data = pd.DataFrame(date_rng, columns=['Date'])



# Base sales with a slight upward trend

base_sales = 10000 + np.arange(num_months) * 50



# Seasonal component (higher in Q4, lower in Q1)

seasonal_component = 2000 * np.sin(2 * np.pi * (sales_data['Date'].dt.month - 1) / 12 + np.pi/2) \

+ 1500 * np.sin(2 * np.pi * (sales_data['Date'].dt.month - 1) / 6)



# Random noise

noise = np.random.normal(0, 500, num_months)



sales_data['SalesAmount'] = (base_sales + seasonal_component + noise).astype(int)

sales_data['SalesAmount'] = sales_data['SalesAmount'].clip(lower=2000) # Ensure sales are positive



# Add promotional flags (randomly)

sales_data['Promotion'] = np.random.choice([0, 1], num_months, p=[0.8, 0.2])

sales_data.loc[sales_data['Promotion'] == 1, 'SalesAmount'] *= np.random.uniform(1.1, 1.3) # Increase sales during promotion



# Add holiday flags (e.g., December)

sales_data['HolidayMonth'] = (sales_data['Date'].dt.month == 12).astype(int)

sales_data.loc[sales_data['HolidayMonth'] == 1, 'SalesAmount'] *= np.random.uniform(1.15, 1.4) # Increase sales during holiday month



sales_data['SalesAmount'] = sales_data['SalesAmount'].astype(int)



# Save to CSV

sales_data.to_csv('retail_sales_mock_data.csv', index=False)

print("Mock retail sales data generated: retail_sales_mock_data.csv")

print(sales_data.head())



Mock retail sales data generated: retail_sales_mock_data.csv
        Date  SalesAmount  Promotion  HolidayMonth
0 2020-01-01        12248          0             0
1 2020-02-01        11712          0             0
2 2020-03-01        11423          0             0
3 2020-04-01        14030          1             0
4 2020-05-01         9082          0             0


 16623.34031037 14619.8465219 ]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  sales_data.loc[sales_data['Promotion'] == 1, 'SalesAmount'] *= np.random.uniform(1.1, 1.3) # Increase sales during promotion
