In [1]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import datetime, timedelta

In [2]:
# Initialize Faker and random seed for reproducibility
faker = Faker()
random.seed(42)
np.random.seed(42)

In [3]:
# Hardcoded values for generating data
country_codes = ['DE', 'FR', 'RU', 'IT', 'ES', 'PL']  # Example EU and Russian countries
product_codes = [f'P{str(i).zfill(4)}' for i in range(1, 51)]  # 50 unique products
routes = [f'R{str(i).zfill(3)}' for i in range(1, 21)]  # 20 delivery routes
currencies = ['EUR', 'RUB', 'USD']
seasons = {'Winter': [12, 1, 2], 'Spring': [3, 4, 5], 'Summer': [6, 7, 8], 'Autumn': [9, 10, 11]}

In [4]:
# Helper function to calculate season
def get_season(month):
    for season, months in seasons.items():
        if month in months:
            return season
    return 'Unknown'

In [6]:
from datetime import date, timedelta

In [7]:
# Corrected start and end dates
start_date = date(2009, 1, 1)  # Using datetime.date objects
end_date = date(2011, 12, 31)


In [8]:
# Generate fake data
num_rows = 2421
data = []

for _ in range(num_rows):
    order_date = faker.date_between(start_date=start_date, end_date=end_date)
    requested_delivery_date = order_date + timedelta(days=random.randint(30, 180))  # 1-6 months lead time
    country_code = random.choice(country_codes)
    product_code = random.choice(product_codes)
    description = f"{faker.word()} {faker.word()}"
    order_type = "VO"  # Vendor Order, constant
    customer_order_code = faker.unique.uuid4()[:8]  # Unique short ID
    value = round(random.uniform(50, 500), 2)  # Price between 50 and 500
    currency = random.choice(currencies)
    items = random.randint(1, 100)  # Quantity between 1 and 100
    route = random.choice(routes)
    month = order_date.month
    season = get_season(month)
    
    data.append([
        order_date, requested_delivery_date, country_code, product_code, description, order_type,
        customer_order_code, value, currency, items, route, season
    ])

In [10]:
# Convert to DataFrame
columns = [
    "Order Date", "Requested Delivery Date", "Customer Country Code", "Product Code",
    "Description", "Order Type", "Customer Order Code", "Value", "Currency", "Items", "Route", "Season"
]
df = pd.DataFrame(data, columns=columns)

In [14]:
# Save as CSV
output_file = "C:/MBAN- Schulich/MBAN- Sem 2/Predictive Modeling/Hackathon main/fake_xyz_apparel_data.csv"
df.to_csv(output_file, index=False)