In [1]:
from utils import read_data, simple_predictors, evaluation, rollingCV, viz, knn, random_forest, parameters, predictions, regression
import pandas as pd
from IPython.display import display

In [2]:
RD = read_data.ReadData()
viz = viz.Viz()
Eval = evaluation.Evaluation()
hd = RD.read_Hd_Mobile()
K = 8
RCV = rollingCV.RollingCV(hd[::-1], K, 30, 14, 30)
SimplePredictors = simple_predictors.SimplePredictors(hd)
KNN = knn.KNN(Eval, 60)
RandomForest = random_forest.RandomForest(Eval)
Reg = regression.Regression()

In [3]:
Params = parameters.Parameters()
Predictions = predictions.Predictions()
points_of_sales = sorted(hd.id.unique())

### Mean sale value from last N sale days

In [4]:
mean_predictions = pd.DataFrame()
for k in range(1, K+1):
    pred = pd.DataFrame()
    for point in points_of_sales:
        test_set = RCV.getKthTestSet(k, id=point)
        for index, row in test_set.iterrows():
            pred = pd.concat([pred, SimplePredictors.avg(row.dzien_rozliczenia, 3, row.id)], ignore_index=True)
    pred['k'] = k
    mean_predictions = pd.concat([mean_predictions, pred], ignore_index=True)
mean_predictions.to_csv('predictions_mobile/mean_predictions.csv')

### Sale value from N days back

In [5]:
n_back_predictions = pd.DataFrame()
for k in range(1, K+1):
    pred = pd.DataFrame()
    for point in points_of_sales:
        test_set = RCV.getKthTestSet(k, id=point)
        for index, row in test_set.iterrows():
            pred = pd.concat([pred, SimplePredictors.back_n_days(row.dzien_rozliczenia, 7, row.id)], ignore_index=True)
    pred['k'] = k
    n_back_predictions = pd.concat([n_back_predictions, pred], ignore_index=True)
n_back_predictions.to_csv('predictions_mobile/n_back_predictions.csv')

### Mean value from same days in last N weeks

In [6]:
n_weeks_predictions = pd.DataFrame()
for k in range(1, K+1):
    pred = pd.DataFrame()
    for point in points_of_sales:
        test_set = RCV.getKthTestSet(k, id=point)
        for index, row in test_set.iterrows():
            pred = pd.concat([pred, SimplePredictors.same_days_last_n_weeks_avg(row.dzien_rozliczenia, 3, row.id)], ignore_index=True)
    pred['k'] = k
    n_weeks_predictions = pd.concat([n_weeks_predictions, pred], ignore_index=True)
n_weeks_predictions.to_csv('predictions_mobile/n_weeks_predictions.csv')

### EWMA

In [7]:
ewma_predictions = pd.DataFrame()
for k in range(1, K+1):
    pred = pd.DataFrame()
    for point in points_of_sales:
        test_set = RCV.getKthTestSet(k, id=point)
        for index, row in test_set.iterrows():
            pred = pd.concat([pred, SimplePredictors.EWMA(row.dzien_rozliczenia, 3, row.id)], ignore_index=True)
    pred['k'] = k
    ewma_predictions = pd.concat([ewma_predictions, pred], ignore_index=True)
ewma_predictions.to_csv('predictions_mobile/ewma_predictions.csv')

### KNN (all points)

In [8]:
knn_all_predictions = pd.DataFrame()
for k in range(1, K+1):
    train = RCV.getKthTrainSet(k, isML=True, isRegression=True)
    test = RCV.getKthTestSet(k, isML=True, isRegression=True)
    parameters = Params.get_parameters_mobile("knn", k)
    pred = KNN.knn(train, test, parameters)
    pred['k'] = k
    pred['dzien_rozliczenia'] = test['dzien_rozliczenia'].values
    knn_all_predictions = pd.concat([knn_all_predictions, pred], ignore_index=True)
knn_all_predictions.to_csv('predictions_mobile/knn_all_predictions.csv')

### Random Forest (all points)

In [9]:
rf_all_predictions = pd.DataFrame()
for k in range(1, K+1):
    train = RCV.getKthTrainSet(k, isML=True, isRegression=True)
    test = RCV.getKthTestSet(k, isML=True, isRegression=True)
    parameters = Params.get_parameters_mobile("rf", k)
    pred = RandomForest.randForest(train, test, parameters)
    pred['k'] = k
    pred['dzien_rozliczenia'] = test['dzien_rozliczenia'].values
    rf_all_predictions = pd.concat([rf_all_predictions, pred], ignore_index=True)
rf_all_predictions.to_csv('predictions_mobile/rf_all_predictions.csv')