# **Packages**

In [10]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import preprocessing, svm
from sklearn.linear_model import RANSACRegressor, LinearRegression
from sklearn.neighbors import KNeighborsRegressor

# **Import files**

In [11]:
x_te_url = 'https://raw.githubusercontent.com/MingchengHe/4AI3_Project/main/x_te.csv'
x_te = pd.read_csv(x_te_url)
x_tr_url = 'https://raw.githubusercontent.com/MingchengHe/4AI3_Project/main/x_tr.csv'
x_tr = pd.read_csv(x_tr_url)
y_te_url = 'https://raw.githubusercontent.com/MingchengHe/4AI3_Project/main/y_te.csv'
y_te = pd.read_csv(y_te_url)
y_tr_url = 'https://raw.githubusercontent.com/MingchengHe/4AI3_Project/main/y_tr.csv'
y_tr = pd.read_csv(y_tr_url)

te_final_url = 'https://raw.githubusercontent.com/MingchengHe/4AI3_Project/main/te_final.csv'
te_final = pd.read_csv(te_final_url)

sample_url = 'https://raw.githubusercontent.com/MingchengHe/4AI3_Project/main/sampleSubmission.csv'
sample = pd.read_csv(sample_url)

# **Conduct KNR regression**

Find the highest score by testing different hyperparameters

In [12]:
n_neighbors = [2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 35, 50, 75, 100]
n_jobs = [5]
algorithms = ['auto'] #, 'ball_tree', 'kd_tree', 'brute']
score_highest = 0.0

for n_neighbor in n_neighbors:
  for n_job in n_jobs:
    for algorithm in algorithms:
      KNR = KNeighborsRegressor(n_neighbors = n_neighbor,
                                n_jobs = n_job,
                                algorithm = algorithm)
      KNR.fit(x_tr, y_tr)
      score = KNR.score(x_te, y_te)
      print('n_neighbor:\t', n_neighbor, '\nn_job:\t', n_job,
            '\nAlgorithm:\t', algorithm, '\nScore:\t', score)
      if score > score_highest:
        score_highest = score
        parameter_best = [n_neighbor, n_job, algorithm, score]
      print('---')

n_neighbor:	 2 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.19705602474364814
---
n_neighbor:	 3 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.2636396579091659
---
n_neighbor:	 4 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.30039887129580123
---
n_neighbor:	 5 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.3226089453242392
---
n_neighbor:	 6 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.3358446527091953
---
n_neighbor:	 7 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.34377446287262836
---
n_neighbor:	 8 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.3485122467366385
---
n_neighbor:	 9 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.3522417861547317
---
n_neighbor:	 10 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.3550849676787573
---
n_neighbor:	 15 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.3622867433648951
---
n_neighbor:	 35 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.35351116476514655
---
n_neighbor:	 50 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.3467822582707961
---
n_neighbor:	 75 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.3368653549227445
---
n_n

# **Print the best hyper parameters**

In [13]:
print('Final best score:', '\nn_neighbor:\t', parameter_best[0],
      '\nn_job:\t', parameter_best[1],
      '\nAlgorithm:\t', parameter_best[2],
      '\nScore:\t', parameter_best[3])

Final best score: 
n_neighbor:	 15 
n_job:	 5 
Algorithm:	 auto 
Score:	 0.3622867433648951


# **Make the prediction**

In [14]:
KNR = KNeighborsRegressor(n_neighbors = n_neighbor,
                                n_jobs = n_job,
                                algorithm = algorithm)
KNR.fit(x_tr, y_tr)

prediction_array = KNR.predict(te_final)
prediction = pd.DataFrame(prediction_array)
prediction.columns = ['Weekly_Sales_cbrt']
prediction

Unnamed: 0,Weekly_Sales_cbrt
0,27.943352
1,27.731427
2,27.598905
3,27.381442
4,27.295968
...,...
115059,21.473116
115060,21.833773
115061,21.774782
115062,22.196827


In [15]:
prediction['Weekly_Sales'] = prediction['Weekly_Sales_cbrt'].pow(3)
prediction

Unnamed: 0,Weekly_Sales_cbrt,Weekly_Sales
0,27.943352,21819.033355
1,27.731427,21326.355852
2,27.598905,21022.073134
3,27.381442,20529.053658
4,27.295968,20337.404009
...,...,...
115059,21.473116,9901.140215
115060,21.833773,10408.457813
115061,21.774782,10324.319430
115062,22.196827,10936.356733


In [16]:
sample['Weekly_Sales'] = prediction['Weekly_Sales']
sample

Unnamed: 0,Id,Weekly_Sales
0,1_1_2012-11-02,21819.033355
1,1_1_2012-11-09,21326.355852
2,1_1_2012-11-16,21022.073134
3,1_1_2012-11-23,20529.053658
4,1_1_2012-11-30,20337.404009
...,...,...
115059,45_98_2013-06-28,9901.140215
115060,45_98_2013-07-05,10408.457813
115061,45_98_2013-07-12,10324.319430
115062,45_98_2013-07-19,10936.356733


In [17]:
sample.to_csv('KNR_Result.csv')