In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import gc
import warnings
warnings.filterwarnings('ignore')

# 1.0 Load submission data and real purchases in test period

In [None]:
# Load submission file:
df_submission = pd.read_csv('../data/baseline_sample_submission.csv')

# Set modelname:
model_name = 'baseline_top12'

In [None]:
# Load transactions:
df_trans = pd.read_csv('../data/transactions_train.csv', parse_dates=[0], dtype={'article_id':'string'})
df_trans.head()

In [None]:
# Set start and end date:

test_week_no = 3

if test_week_no == 1:
    start_date = "2020-08-26"
    end_date = "2020-09-01"
elif test_week_no == 2:
    start_date = "2020-09-02"
    end_date = "2020-09-08"
elif test_week_no == 3:
    start_date = "2020-09-09"
    end_date = "2020-09-15"
else:
    start_date = "2020-09-09"
    end_date = "2020-09-15"

# Load purchases in test period:
df_test_week = df_trans.query(f't_dat >= "{start_date}" and t_dat <= "{end_date}"').copy()
# Drop not necessary columns:
df_test_week = df_test_week.drop(columns=['t_dat', 'sales_channel_id'])

print(f'Number of purchases in test-week are: {df_test_week.shape[0]}')

df_test_week.head()

In [None]:
# NOT NEEDED:
# Create wardrobe for testweek:
# df_wardrobe_week = df_test_week.groupby('customer_id')[['article_id','price']].aggregate(lambda x: list(x)).reset_index().copy()
# df_wardrobe_week.head()

In [None]:
# Convert prediction-string to list
df_submission['prediction2'] = df_submission['prediction'].apply(lambda x: list(x.split(' ')))

# Cut prediction-list to 12 elements (only for Marketbasket-Submission necessary)
df_submission['prediction_12'] = df_submission['prediction2'].apply(lambda x: x[0:12])

# Drop not necessary columns:
df_submission = df_submission.drop(columns=['prediction', 'prediction2'])

df_submission.head()

# 2.0 Collect all prices from forecasted products

In [None]:
# Loop through datasets to get forecasted products:
price_list = []
article_list = []
# for i in tqdm(range(0,10000)):
for i in tqdm(range(len(df_test_week))):
    df_sub_cust = df_submission.query(f'customer_id == "{df_test_week.customer_id.iloc[i]}"')
    if df_test_week.article_id.iloc[i] in df_sub_cust.prediction_12.iloc[0]:
        price_list.append(df_test_week.price.iloc[i])
        article_list.append(df_test_week.article_id.iloc[i])


print(f'Number of found prices: {len(price_list)}')
print(f'Number of forecasted articles: {len(article_list)}')


# 3.0 Calculate turnover & hitrate in test week

In [None]:
print(f'This calculation regards the model "{model_name}" in the week between {start_date} and {end_date}.\n')
print(f'Number of purchased articles: {df_test_week.shape[0]}')

# Calculate hitrate:
hits = len(article_list)
sold_articles = df_test_week.shape[0]
hitrate = hits/sold_articles

# Calculate turnover:
turnover_week = df_test_week.price.sum()

# Calculate forecasted turnover and share:
forecasted_turnover = sum(price_list)
forecast_share = forecasted_turnover/turnover_week

print(f'The hitrate is: {round(hitrate*100, 2)} %')
print(f'Turnover in this week is €: {round(turnover_week, 2)}')
print(f'The forecasted turnover is €: {round(forecasted_turnover, 2)}')
print(f'\nWe are able to forecast the following share of turnover:\n{round(forecast_share*100, 4)} %')

# Append to list for saving as csv:
result_list = []
result_list.append(model_name)
result_list.append(start_date)
result_list.append(end_date)
result_list.append(sold_articles)
result_list.append(hits)
result_list.append(hitrate)
result_list.append(turnover_week)
result_list.append(forecasted_turnover)
result_list.append(forecast_share)

# 4.0 Save price-list, article-list and results in csv

In [None]:
# Convert price-list and article-list to dataframe and save as csv:

df_forecast = pd.DataFrame(list(zip(article_list, price_list)), columns =['article_id', 'price'])
df_forecast.to_csv(f'../data/forecast-results_{model_name}_{start_date}_{end_date}.csv', index=False)
df_forecast.head()


In [None]:
# Save results in csv
from csv import writer
def append_list_as_row(file_name, list_of_elem):
    # Open file in append mode
    with open(file_name, 'a+', newline='') as write_obj:
        # Create a writer object from csv module
        csv_writer = writer(write_obj)
        # Add contents of list as last row in the csv file
        csv_writer.writerow(list_of_elem)

In [None]:
# Append a list as new line to an old csv file
append_list_as_row('../data/collection_forecast_results.csv', result_list)