# Team Captain - M5 Accuracy: Sales Prediction

In [0]:
import catboost
import numpy as np

import config
import prepare_data
import utility

In [0]:
regressor = catboost.CatBoostRegressor()
regressor.load_model(str(config.model_path / 'catboost_regressor.cbm'))

In [0]:
calendar = utility.read_calendar(config.m5_path / 'calendar.csv')

In [0]:
sales_data = utility.read_sales_data(config.m5_path / 'sales_train_validation.csv')

In [0]:
prices = utility.read_prices(config.m5_path / 'sell_prices.csv')

In [0]:
from tqdm.notebook import tqdm
def calculate_rolling_mean(data, n_products, n_days, window_size=28):
    column = f'rmean{window_size}_sales'
    data[column] = float('nan')
    for i in tqdm(range(n_products), desc=f'window size = {window_size}'):
        data[column][i::n_products] = (
            sales_data
            .iloc[i, -(n_days + window_size):]
            .rolling(window_size, min_periods=1)
            .mean()
            [-n_days:]
            .values
        )

In [0]:
def predict(start_day, n_days):
    days = range(start_day, start_day + n_days)
    n_products = len(sales_data)

    data = prepare_data.generate_prediction_data(sales_data, calendar, prices, days)
    prepare_data.cat2int(data)
    calculate_rolling_mean(data, n_products, n_days, 7)
    calculate_rolling_mean(data, n_products, n_days, 14)
    calculate_rolling_mean(data, n_products, n_days, 28)

    predictions = regressor.predict(data)
    predictions = predictions.reshape(n_days, -1).T

    np.round(predictions, out=predictions)

    predictions[predictions < 0] = 0

    return predictions

In [0]:
# calendar is 0-based
predictions_evaluation = predict(1941, 28)
predictions_validation = predict(1913, 28)

In [0]:
utility.save_predictions(
    predictions_evaluation,
    predictions_validation,
    sales_data.id
)