In [1]:
from pathlib import Path

Path('orders').mkdir(exist_ok=True)

In [2]:
import pandas as pd
import datetime
import random

In [3]:
sales_start_date = datetime.datetime(2020, 1, 1)
sales_end_date = datetime.datetime(2021, 12, 31)

In [4]:
retail_prices = pd.read_csv('./normalized/05_retail_prices.csv', sep='\t')
median = retail_prices['Retail price (USD)'].median()
price_for_cheap_items = round(median / 3, 2)

In [5]:
sugg_operations_per_day = 8
min_sugg_operations_per_day = round(sugg_operations_per_day * (1 - 0.3), 0)
max_sugg_operations_per_day = sugg_operations_per_day * 5
min_sugg_operations_per_day_new_year = round(min_sugg_operations_per_day * (1 + 0.35), 0)
max_sugg_operations_per_day_new_year = round(max_sugg_operations_per_day * (1 + 0.45), 0)
min_sugg_operations_per_day_january = round(min_sugg_operations_per_day * (1 - 0.6), 0)
max_sugg_operations_per_day_january = round(max_sugg_operations_per_day * (1 - 0.5), 0)

In [6]:
orders_columns = ['Date', 'SalespointCode', 'ItemCode', 'Qty', 'DiscountCardID']
orders = pd.DataFrame(columns=orders_columns)

items = pd.read_csv('./normalized/04_items.csv', sep='\t')
salespoints = pd.read_excel('./data/SalesPoints.xlsx')
discount_cards = pd.read_csv('./data/Discount_cards.csv')

sales_date = sales_start_date

while sales_date <= sales_end_date:
    
    if (sales_date.month == 12 and sales_date.day > 14):
        number_of_orders = random.randint(min_sugg_operations_per_day_new_year, max_sugg_operations_per_day_new_year)
    elif (sales_date.month == 1 and sales_date.day < 14):
        number_of_orders = random.randint(min_sugg_operations_per_day_january, max_sugg_operations_per_day_january)
    else:
        number_of_orders = random.randint(min_sugg_operations_per_day, max_sugg_operations_per_day)
    
    for i in range(0, number_of_orders):
        
        item_line = random.randint(0, len(items) - 1)
        item_code = items['Code'][item_line]
        item_retail_price = retail_prices[retail_prices['Code'] == item_code].iloc[0]['Retail price (USD)']
        
        qty = 1
        
        if random.randint(1, 12) == 2: qty = 2
        if random.randint(1, 24) == 3: qty = 3
        if random.randint(1, 128) == 4 and item_retail_price <= price_for_cheap_items: qty = random.randint(4, 11)
        if random.randint(1, 256) == 5 and item_retail_price <= price_for_cheap_items: qty = random.randint(5, 15)
        if random.randint(1, 312) == 6 and item_retail_price <= price_for_cheap_items: qty = random.randint(6, 21)
        
        salespoint_line = random.randint(0, len(salespoints) - 1)
        salespoint_code = salespoints['Sales Point'][salespoint_line]
        
        
        discount_card_id = discount_cards['id'][random.randint(0, len(discount_cards) - 1)] if random.randint(1, random.randint(1, 25)) < 4 else 0
            
        new_order = pd.DataFrame([{
            'Date': sales_date, 
            'SalespointCode': salespoint_code, 
            'ItemCode': item_code,
            'Qty': qty,
            'DiscountCardID': discount_card_id if discount_card_id != 0 else None}])
        
        orders = pd.concat([orders, new_order], ignore_index=True)
    sales_date += datetime.timedelta(days=1)
    
orders['ID'] = orders.index + 1
orders_columns.insert(0, 'ID')
orders.to_csv('./orders/orders.csv', sep='\t', index=False, columns=orders_columns)