# Import Files  &  Libs

In [None]:
from google.colab import drive
from google.colab import files
drive.mount("/content/gdrive", force_remount=True)
!cp /content/gdrive/MyDrive/Decathlon/project.zip /content/
!unzip -q /content/project.zip
!rm /content/project.zip
!mkdir /content/results

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from model import curvegen
from dataset import dataset
from sklearn.neighbors import KNeighborsRegressor
import sklearn
import scipy
import tqdm
import os
from importlib import reload
reload(curvegen)
reload(dataset)

# Load Data

Loads average annual turnovers timeserie dataset from json files and preprocess data

In [76]:
annual_ds_train=dataset.Annual_construction_dataset()
with open('data_annual_train.json', 'r') as f:
    annual_ds_train.load_from_json(f)
annual_ds_val=dataset.Annual_construction_dataset()
with open('data_annual_val.json', 'r') as f:
    annual_ds_val.load_from_json(f)
annual_ds_train.set_data_for_training(False)
annual_ds_val.set_data_for_training(False)

# Model

Fits the models on training set and calculates cosine, L1 and L2 loss on validation set.

In [None]:
X_train=[]
Y_train=[]
X_val=[]
Y_val=[]
dep=127
for sample in annual_ds_train.samples_per_dep[dep]:
    Y_train.append(sample[0])
    X_train.append(sample[1])
X_train=np.array(X_train)
Y_train=np.array(Y_train)
for sample in annual_ds_val.samples_per_dep[dep]:
    Y_val.append(sample[0])
    X_val.append(sample[1])
X_val=np.array(X_val)
Y_val=np.array(Y_val)
def custom_distance(x,y,w):
    cat1_x=x[0:8]
    cat2_x=x[8:29+8]
    cat3_x=x[29+8:]
    cat1_y=y[0:8]
    cat2_y=y[8:29+8]
    cat3_y=y[29+8:]
    cat1_d=w[0]*scipy.spatial.distance.minkowski(cat1_x, cat1_y, p=2, w=None)*(1/np.sqrt(2))
    cat2_d=w[1]*scipy.spatial.distance.minkowski(cat2_x, cat2_y, p=2, w=None)*(1/np.sqrt(2))
    cat3_d=scipy.spatial.distance.minkowski(cat3_x, cat3_y, p=2, w=None)
    return cat1_d+cat2_d+cat3_d+w[2]
results=[]
gt=Y_val
for k in tqdm.tqdm(range(1,10)):
    for w1 in np.linspace(0,1,4):
        for w2 in np.linspace(0,1,4):
          for w3 in np.linspace(0,1,4):
            knn = KNeighborsRegressor(n_neighbors=k,weights='distance',n_jobs=-1,metric=lambda a,b:custom_distance(a,b,np.array([w1,w2,w3])))
            knn.fit(X_train, Y_train)
            pred=knn.predict(X_val)
            for n in range(len(pred)):
                pred[n]=dataset.norm(pred[n],np.max(pred[n]),np.min(pred[n]))
            cosine_loss=1-np.mean(np.diag(sklearn.metrics.pairwise.cosine_similarity(pred,gt)))
            l1_loss=sklearn.metrics.mean_absolute_error(pred,gt)
            l2_loss=sklearn.metrics.mean_squared_error(pred,gt)
            results.append({
                "w":[w1,w2,w3],
                "k":k,
                "cos_loss":cosine_loss,
                "l1_loss":l1_loss,
                "l2_loss":l2_loss
            })


# Hyperparameters Search :

In [None]:
key="cos_loss"
loss_list=[]
for result in results:
    loss_list.append(result[key])
min_loss_index=np.argmin(loss_list)
min_loss=loss_list[min_loss_index]
best_param=results[min_loss_index]
print("w = {} \t || k = {} \t || loss = {}".format(best_param["w"],best_param["k"],min_loss))

In [None]:
key="l1_loss"
loss_list=[]
for result in results:
    loss_list.append(result[key])
min_loss_index=np.argmin(loss_list)
min_loss=loss_list[min_loss_index]
best_param=results[min_loss_index]
print("w = {} \t || k = {} \t || loss = {}".format(best_param["w"],best_param["k"],min_loss))

In [None]:
key="l2_loss"
loss_list=[]
for result in results:
    loss_list.append(result[key])
min_loss_index=np.argmin(loss_list)
min_loss=loss_list[min_loss_index]
best_param=results[min_loss_index]
print("w = {} \t || k = {} \t || loss = {}".format(best_param["w"],best_param["k"],min_loss))

In [None]:
n=190
knn = KNeighborsRegressor(n_neighbors=2,weights='distance',n_jobs=-1,metric=lambda a,b:custom_distance(a,b,np.array([1,1,0.0001])))
knn.fit(X_train, Y_train)
pred=knn.predict(X_val)
gt=Y_val
print(sklearn.metrics.pairwise.cosine_similarity(dataset.norm(pred[n],np.max(pred[n]),np.min(pred[n])).reshape(1, -1),gt[n].reshape(1, -1)))
l1y=dataset.norm(pred[n],np.max(pred[n]),np.min(pred[n]))
plt.plot(l1y,color='red')
plt.plot(gt[n])
plt.show()

In [None]:
n=190
knn = KNeighborsRegressor(n_neighbors=4,weights='distance',n_jobs=-1,metric=lambda a,b:custom_distance(a,b,np.array([2/3, 0.0, 1])))
knn.fit(X_train, Y_train)
pred=knn.predict(X_val)
gt=Y_val
print(sklearn.metrics.pairwise.cosine_similarity(dataset.norm(pred[n],np.max(pred[n]),np.min(pred[n])).reshape(1, -1),gt[n].reshape(1, -1)))
plt.plot(dataset.norm(pred[n],np.max(pred[n]),np.min(pred[n])),color='red')
plt.plot(gt[n])
plt.plot(l1y,color='green')
plt.show()