In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import math
from math import sqrt
from sklearn.metrics import mean_squared_error
from scipy.stats import uniform, randint
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsRegressor

import time


<h1>Links to all the resources I used to build this notebook:<h1>

Filling in missing values and value encoding:
https://www.kaggle.com/code/jillanisofttech/job-salary-prediction-by-jst

KKNR syntax and values:
https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html

hyper parameter tuning:
https://medium.datadriveninvestor.com/k-nearest-neighbors-in-python-hyperparameters-tuning-716734bc557f

After realizing Grid Search CV would take ages to run, found a faster grid search here:
https://towardsdatascience.com/11-times-faster-hyperparameter-tuning-with-halvinggridsearch-232ed0160155

In [2]:
#Get dataset
df = pd.read_csv("Train_rev1_2.csv") 
df.head()

Unnamed: 0,Id,Title,FullDescription,LocationRaw,LocationNormalized,ContractType,ContractTime,Company,Category,SalaryRaw,SalaryNormalized,SourceName
0,12612628,Engineering Systems Analyst,Engineering Systems Analyst Dorking Surrey Sal...,"Dorking, Surrey, Surrey",Dorking,,permanent,Gregory Martin International,Engineering Jobs,20000 - 30000/annum 20-30K,25000,cv-library.co.uk
1,12612830,Stress Engineer Glasgow,Stress Engineer Glasgow Salary **** to **** We...,"Glasgow, Scotland, Scotland",Glasgow,,permanent,Gregory Martin International,Engineering Jobs,25000 - 35000/annum 25-35K,30000,cv-library.co.uk
2,12612844,Modelling and simulation analyst,Mathematical Modeller / Simulation Analyst / O...,"Hampshire, South East, South East",Hampshire,,permanent,Gregory Martin International,Engineering Jobs,20000 - 40000/annum 20-40K,30000,cv-library.co.uk
3,12613049,Engineering Systems Analyst / Mathematical Mod...,Engineering Systems Analyst / Mathematical Mod...,"Surrey, South East, South East",Surrey,,permanent,Gregory Martin International,Engineering Jobs,25000 - 30000/annum 25K-30K negotiable,27500,cv-library.co.uk
4,12613647,"Pioneer, Miser Engineering Systems Analyst","Pioneer, Miser Engineering Systems Analyst Do...","Surrey, South East, South East",Surrey,,permanent,Gregory Martin International,Engineering Jobs,20000 - 30000/annum 20-30K,25000,cv-library.co.uk


<h1>Cleaning and Encoding the whole dataset before splitting it up<h1>



In [3]:
#check missing values
df.isna().sum()

Id                         0
Title                      1
FullDescription            0
LocationRaw                0
LocationNormalized         0
ContractType          179326
ContractTime           63905
Company                32430
Category                   0
SalaryRaw                  0
SalaryNormalized           0
SourceName                 1
dtype: int64

In [4]:
#Turn all string values into category values
for label, content in df.items():
    if pd.api.types.is_string_dtype(content):
        df[label] = content.astype("category").cat.as_ordered()
    

In [5]:
#Filling in missing values and perform encoding
for label,content in df.items():
    if not pd.api.types.is_numeric_dtype(content):
        # Add binary column to indicate whether sample had missing value
        df[label+"is_missing"]=pd.isnull(content)
        # Turn categories into numbers and add+1
        df[label] = pd.Categorical(content).codes+1
df.head()

Unnamed: 0,Id,Title,FullDescription,LocationRaw,LocationNormalized,ContractType,ContractTime,Company,Category,SalaryRaw,...,Titleis_missing,FullDescriptionis_missing,LocationRawis_missing,LocationNormalizedis_missing,ContractTypeis_missing,ContractTimeis_missing,Companyis_missing,Categoryis_missing,SalaryRawis_missing,SourceNameis_missing
0,12612628,39227,68301,5178,751,0,2,7758,9,21344,...,False,False,False,False,True,False,False,False,False,False
1,12612830,121388,196351,6881,987,0,2,7758,9,32427,...,False,False,False,False,True,False,False,False,False,False
2,12612844,73486,117824,7630,1069,0,2,7758,9,21539,...,False,False,False,False,True,False,False,False,False,False
3,12613049,39228,68300,17183,2310,0,2,7758,9,31840,...,False,False,False,False,True,False,False,False,False,False
4,12613647,85435,157839,17183,2310,0,2,7758,9,21344,...,False,False,False,False,True,False,False,False,False,False


In [6]:
#check for missing values again
df.isna().sum()

Id                              0
Title                           0
FullDescription                 0
LocationRaw                     0
LocationNormalized              0
ContractType                    0
ContractTime                    0
Company                         0
Category                        0
SalaryRaw                       0
SalaryNormalized                0
SourceName                      0
Titleis_missing                 0
FullDescriptionis_missing       0
LocationRawis_missing           0
LocationNormalizedis_missing    0
ContractTypeis_missing          0
ContractTimeis_missing          0
Companyis_missing               0
Categoryis_missing              0
SalaryRawis_missing             0
SourceNameis_missing            0
dtype: int64

In [7]:
#Train-validation-test split

X = df.drop(columns = ['SalaryNormalized']).copy()
y = df['SalaryNormalized']
# Split the data in training and remaining dataset, train size = 60%
X_train, X_rem, y_train, y_rem = train_test_split(X,y, train_size=0.6)
# Split remaining 50/50 into validation and test sets
X_valid, X_test, y_valid, y_test = train_test_split(X_rem,y_rem, test_size=0.5)
print(X_train.shape), print(y_train.shape)
print(X_valid.shape), print(y_valid.shape)
print(X_test.shape), print(y_test.shape)

(146860, 21)
(146860,)
(48954, 21)
(48954,)
(48954, 21)
(48954,)


(None, None)

In [8]:
#Initialize basic KNN regression model:
KNNR = KNeighborsRegressor(n_neighbors=10)


In [9]:
#Fit model to training data
KNNR.fit(X_train,y_train)

KNeighborsRegressor(n_neighbors=10)

In [10]:
#make prediction on validation data
y_pred_valid = KNNR.predict(X_valid)

In [11]:
#analyze results of prediction
MAE = mean_absolute_error(y_valid,y_pred_valid)

r2 = r2_score(y_valid,y_pred_valid)
print(f'MAE validation: {MAE}\nr2 validation: {r2}')
print('\n')


MAE validation: 9913.842137516853
r2 validation: 0.27429773090495924




In [12]:
#If you're curious what the individual predictions look like
n=0
for value in y_valid:
    print(f'Predicted: {y_pred_valid[n]} --> Actual: {value} ')
    n+=1

Predicted: 29180.0 --> Actual: 27500 
Predicted: 21310.0 --> Actual: 17500 
Predicted: 25203.2 --> Actual: 42240 
Predicted: 70275.0 --> Actual: 11884 
Predicted: 32846.8 --> Actual: 43000 
Predicted: 20700.0 --> Actual: 17000 
Predicted: 45340.0 --> Actual: 55000 
Predicted: 42850.0 --> Actual: 39500 
Predicted: 26200.0 --> Actual: 24750 
Predicted: 44568.0 --> Actual: 65000 
Predicted: 17543.6 --> Actual: 19000 
Predicted: 19705.0 --> Actual: 31800 
Predicted: 24735.9 --> Actual: 18500 
Predicted: 41586.0 --> Actual: 45000 
Predicted: 47600.0 --> Actual: 35000 
Predicted: 32230.0 --> Actual: 15600 
Predicted: 27042.9 --> Actual: 24000 
Predicted: 17325.0 --> Actual: 13440 
Predicted: 20855.5 --> Actual: 17250 
Predicted: 41330.0 --> Actual: 37500 
Predicted: 25444.0 --> Actual: 22318 
Predicted: 20278.8 --> Actual: 16000 
Predicted: 24150.0 --> Actual: 23000 
Predicted: 40950.0 --> Actual: 42500 
Predicted: 18051.2 --> Actual: 15500 
Predicted: 20827.0 --> Actual: 15500 
Predicted: 2

Predicted: 33900.0 --> Actual: 32500 
Predicted: 36578.2 --> Actual: 23000 
Predicted: 43576.0 --> Actual: 47500 
Predicted: 25016.0 --> Actual: 18250 
Predicted: 32325.0 --> Actual: 19500 
Predicted: 35419.3 --> Actual: 44160 
Predicted: 20532.5 --> Actual: 16000 
Predicted: 20470.3 --> Actual: 21500 
Predicted: 23633.8 --> Actual: 15000 
Predicted: 25804.8 --> Actual: 25000 
Predicted: 21355.3 --> Actual: 30720 
Predicted: 38125.0 --> Actual: 47500 
Predicted: 26950.0 --> Actual: 27500 
Predicted: 29559.2 --> Actual: 29958 
Predicted: 35250.0 --> Actual: 32000 
Predicted: 55778.0 --> Actual: 65000 
Predicted: 54300.0 --> Actual: 42500 
Predicted: 19309.8 --> Actual: 26880 
Predicted: 30725.0 --> Actual: 37500 
Predicted: 27040.7 --> Actual: 35000 
Predicted: 38450.0 --> Actual: 33000 
Predicted: 35282.0 --> Actual: 30794 
Predicted: 34826.0 --> Actual: 25000 
Predicted: 21250.0 --> Actual: 20500 
Predicted: 38900.0 --> Actual: 29500 
Predicted: 47200.0 --> Actual: 55000 
Predicted: 4

Predicted: 56692.5 --> Actual: 50000 
Predicted: 59020.0 --> Actual: 42500 
Predicted: 23100.0 --> Actual: 25000 
Predicted: 39110.0 --> Actual: 35000 
Predicted: 43650.0 --> Actual: 37500 
Predicted: 38856.1 --> Actual: 51500 
Predicted: 34187.2 --> Actual: 33000 
Predicted: 29880.0 --> Actual: 26600 
Predicted: 41800.0 --> Actual: 37500 
Predicted: 28142.5 --> Actual: 27500 
Predicted: 31371.6 --> Actual: 29098 
Predicted: 60360.4 --> Actual: 50000 
Predicted: 42700.0 --> Actual: 17500 
Predicted: 29227.8 --> Actual: 30000 
Predicted: 62200.0 --> Actual: 25800 
Predicted: 35100.4 --> Actual: 25000 
Predicted: 54900.0 --> Actual: 80000 
Predicted: 38100.0 --> Actual: 32000 
Predicted: 45410.0 --> Actual: 22080 
Predicted: 26394.4 --> Actual: 26000 
Predicted: 35862.1 --> Actual: 26000 
Predicted: 25600.0 --> Actual: 19000 
Predicted: 52450.0 --> Actual: 50000 
Predicted: 34648.0 --> Actual: 18750 
Predicted: 34841.3 --> Actual: 32500 
Predicted: 28875.0 --> Actual: 20500 
Predicted: 5

Predicted: 28300.0 --> Actual: 27500 
Predicted: 27850.0 --> Actual: 17700 
Predicted: 35457.6 --> Actual: 42500 
Predicted: 22409.4 --> Actual: 18000 
Predicted: 33405.3 --> Actual: 25000 
Predicted: 25543.0 --> Actual: 33600 
Predicted: 37900.0 --> Actual: 40000 
Predicted: 34400.0 --> Actual: 59520 
Predicted: 46083.4 --> Actual: 31500 
Predicted: 37250.0 --> Actual: 35000 
Predicted: 51134.3 --> Actual: 52500 
Predicted: 33239.3 --> Actual: 18000 
Predicted: 32127.8 --> Actual: 30000 
Predicted: 37200.0 --> Actual: 40000 
Predicted: 53750.0 --> Actual: 50000 
Predicted: 36774.0 --> Actual: 30000 
Predicted: 18293.1 --> Actual: 15000 
Predicted: 21475.0 --> Actual: 20500 
Predicted: 35950.0 --> Actual: 40000 
Predicted: 32916.0 --> Actual: 24500 
Predicted: 29957.7 --> Actual: 26500 
Predicted: 25400.0 --> Actual: 21000 
Predicted: 38230.0 --> Actual: 52500 
Predicted: 29700.0 --> Actual: 84000 
Predicted: 27682.0 --> Actual: 15000 
Predicted: 28375.0 --> Actual: 72000 
Predicted: 3

Predicted: 35100.0 --> Actual: 27500 
Predicted: 14856.8 --> Actual: 11884 
Predicted: 47950.0 --> Actual: 41000 
Predicted: 27440.0 --> Actual: 10320 
Predicted: 35700.0 --> Actual: 36500 
Predicted: 44214.0 --> Actual: 26400 
Predicted: 26756.2 --> Actual: 11884 
Predicted: 39746.0 --> Actual: 32400 
Predicted: 43876.0 --> Actual: 70000 
Predicted: 43160.0 --> Actual: 35000 
Predicted: 29342.5 --> Actual: 23500 
Predicted: 27077.1 --> Actual: 29400 
Predicted: 19185.0 --> Actual: 19500 
Predicted: 27030.0 --> Actual: 35000 
Predicted: 35102.0 --> Actual: 27000 
Predicted: 25315.0 --> Actual: 17250 
Predicted: 24938.0 --> Actual: 21120 
Predicted: 54053.0 --> Actual: 50000 
Predicted: 46527.8 --> Actual: 75000 
Predicted: 28700.0 --> Actual: 28500 
Predicted: 45900.0 --> Actual: 45000 
Predicted: 50390.0 --> Actual: 24000 
Predicted: 40999.8 --> Actual: 40000 
Predicted: 22183.1 --> Actual: 18000 
Predicted: 49915.0 --> Actual: 62500 
Predicted: 44935.2 --> Actual: 37500 
Predicted: 4

Predicted: 25025.0 --> Actual: 24000 
Predicted: 42741.6 --> Actual: 35000 
Predicted: 21170.2 --> Actual: 21120 
Predicted: 26580.5 --> Actual: 26500 
Predicted: 34575.0 --> Actual: 24000 
Predicted: 50410.0 --> Actual: 32500 
Predicted: 23606.6 --> Actual: 16000 
Predicted: 32858.5 --> Actual: 15250 
Predicted: 26699.5 --> Actual: 30720 
Predicted: 36454.4 --> Actual: 30000 
Predicted: 47836.0 --> Actual: 50000 
Predicted: 50051.2 --> Actual: 45000 
Predicted: 28945.4 --> Actual: 27500 
Predicted: 38406.0 --> Actual: 42500 
Predicted: 38099.9 --> Actual: 35000 
Predicted: 22267.0 --> Actual: 11884 
Predicted: 48822.0 --> Actual: 82800 
Predicted: 28613.5 --> Actual: 19500 
Predicted: 34105.2 --> Actual: 12600 
Predicted: 38617.6 --> Actual: 45000 
Predicted: 19004.0 --> Actual: 8640 
Predicted: 46870.0 --> Actual: 35000 
Predicted: 48374.2 --> Actual: 42500 
Predicted: 22686.0 --> Actual: 17500 
Predicted: 24215.0 --> Actual: 24500 
Predicted: 44546.2 --> Actual: 47500 
Predicted: 22

Predicted: 32061.5 --> Actual: 32400 
Predicted: 48250.0 --> Actual: 52500 
Predicted: 48150.0 --> Actual: 50000 
Predicted: 24735.2 --> Actual: 16750 
Predicted: 55282.0 --> Actual: 50000 
Predicted: 54691.3 --> Actual: 65000 
Predicted: 24953.1 --> Actual: 17500 
Predicted: 58500.0 --> Actual: 57500 
Predicted: 31559.5 --> Actual: 19200 
Predicted: 37543.7 --> Actual: 14400 
Predicted: 29990.0 --> Actual: 19000 
Predicted: 40525.0 --> Actual: 42500 
Predicted: 30929.7 --> Actual: 59520 
Predicted: 40920.7 --> Actual: 50000 
Predicted: 19121.9 --> Actual: 24000 
Predicted: 45134.0 --> Actual: 33500 
Predicted: 55649.9 --> Actual: 55000 
Predicted: 52214.0 --> Actual: 22500 
Predicted: 23880.0 --> Actual: 14000 
Predicted: 33204.9 --> Actual: 28500 
Predicted: 21753.0 --> Actual: 20000 
Predicted: 33100.0 --> Actual: 32000 
Predicted: 49150.0 --> Actual: 40000 
Predicted: 21680.0 --> Actual: 15500 
Predicted: 59941.4 --> Actual: 73500 
Predicted: 34351.1 --> Actual: 65000 
Predicted: 4

Predicted: 31159.2 --> Actual: 15379 
Predicted: 38250.0 --> Actual: 37500 
Predicted: 21702.4 --> Actual: 13900 
Predicted: 37768.0 --> Actual: 14400 
Predicted: 33220.1 --> Actual: 55000 
Predicted: 29617.6 --> Actual: 27500 
Predicted: 25508.0 --> Actual: 18500 
Predicted: 37650.0 --> Actual: 34000 
Predicted: 21131.6 --> Actual: 15000 
Predicted: 19232.8 --> Actual: 18500 
Predicted: 36875.0 --> Actual: 35000 
Predicted: 21750.0 --> Actual: 19250 
Predicted: 37050.0 --> Actual: 62500 
Predicted: 35020.0 --> Actual: 30000 
Predicted: 33950.0 --> Actual: 35000 
Predicted: 52600.0 --> Actual: 35500 
Predicted: 23450.0 --> Actual: 18000 
Predicted: 45600.0 --> Actual: 42500 
Predicted: 21495.7 --> Actual: 23040 
Predicted: 51960.0 --> Actual: 16800 
Predicted: 43537.6 --> Actual: 15542 
Predicted: 44916.0 --> Actual: 26500 
Predicted: 30204.0 --> Actual: 15000 
Predicted: 25442.2 --> Actual: 23000 
Predicted: 39890.0 --> Actual: 27000 
Predicted: 46752.0 --> Actual: 21250 
Predicted: 3

Predicted: 57031.9 --> Actual: 17280 
Predicted: 53397.8 --> Actual: 47500 
Predicted: 40013.1 --> Actual: 35000 
Predicted: 39964.4 --> Actual: 45000 
Predicted: 18662.2 --> Actual: 11884 
Predicted: 21090.0 --> Actual: 15000 
Predicted: 42246.2 --> Actual: 50000 
Predicted: 55000.0 --> Actual: 22000 
Predicted: 36720.4 --> Actual: 36000 
Predicted: 31375.0 --> Actual: 30000 
Predicted: 33200.0 --> Actual: 31500 
Predicted: 38125.8 --> Actual: 55000 
Predicted: 19177.6 --> Actual: 21120 
Predicted: 50800.0 --> Actual: 47500 
Predicted: 40096.6 --> Actual: 37044 
Predicted: 28924.0 --> Actual: 80000 
Predicted: 34675.0 --> Actual: 30000 
Predicted: 30744.0 --> Actual: 39500 
Predicted: 24042.3 --> Actual: 13800 
Predicted: 42054.0 --> Actual: 35000 
Predicted: 20122.8 --> Actual: 18500 
Predicted: 30672.3 --> Actual: 35000 
Predicted: 34948.5 --> Actual: 42240 
Predicted: 48164.0 --> Actual: 47500 
Predicted: 28312.4 --> Actual: 36000 
Predicted: 46000.0 --> Actual: 45000 
Predicted: 3

Predicted: 44450.0 --> Actual: 13056 
Predicted: 33378.7 --> Actual: 56000 
Predicted: 31928.0 --> Actual: 26500 
Predicted: 24575.0 --> Actual: 30625 
Predicted: 34150.0 --> Actual: 32000 
Predicted: 23973.7 --> Actual: 30000 
Predicted: 42800.0 --> Actual: 38000 
Predicted: 35061.0 --> Actual: 30465 
Predicted: 39150.0 --> Actual: 39223 
Predicted: 32110.0 --> Actual: 30000 
Predicted: 35205.0 --> Actual: 60000 
Predicted: 38400.0 --> Actual: 40000 
Predicted: 30863.6 --> Actual: 25740 
Predicted: 36620.0 --> Actual: 28000 
Predicted: 33825.0 --> Actual: 31000 
Predicted: 38610.0 --> Actual: 65000 
Predicted: 20488.7 --> Actual: 17000 
Predicted: 24178.5 --> Actual: 42500 
Predicted: 30349.5 --> Actual: 18500 
Predicted: 34646.0 --> Actual: 30000 
Predicted: 43752.0 --> Actual: 35000 
Predicted: 40633.6 --> Actual: 28000 
Predicted: 34882.0 --> Actual: 32500 
Predicted: 22432.2 --> Actual: 35000 
Predicted: 60968.8 --> Actual: 8500 
Predicted: 35812.4 --> Actual: 34000 
Predicted: 40

Predicted: 32201.4 --> Actual: 37200 
Predicted: 49377.7 --> Actual: 44500 
Predicted: 32140.4 --> Actual: 28500 
Predicted: 20176.1 --> Actual: 14750 
Predicted: 26398.0 --> Actual: 36960 
Predicted: 21849.8 --> Actual: 25920 
Predicted: 27325.8 --> Actual: 21250 
Predicted: 37153.0 --> Actual: 40000 
Predicted: 32846.7 --> Actual: 34000 
Predicted: 22583.8 --> Actual: 60000 
Predicted: 37525.0 --> Actual: 37500 
Predicted: 38784.3 --> Actual: 37500 
Predicted: 36300.0 --> Actual: 32500 
Predicted: 27800.0 --> Actual: 28500 
Predicted: 17743.6 --> Actual: 35000 
Predicted: 36100.0 --> Actual: 37500 
Predicted: 28077.8 --> Actual: 22000 
Predicted: 24817.5 --> Actual: 22500 
Predicted: 17171.9 --> Actual: 18240 
Predicted: 52072.0 --> Actual: 35000 
Predicted: 71325.7 --> Actual: 15942 
Predicted: 40740.0 --> Actual: 40000 
Predicted: 29276.0 --> Actual: 29500 
Predicted: 49900.6 --> Actual: 62499 
Predicted: 40984.4 --> Actual: 26500 
Predicted: 26797.5 --> Actual: 15600 
Predicted: 4

Predicted: 22625.0 --> Actual: 21000 
Predicted: 49574.0 --> Actual: 7200 
Predicted: 34040.0 --> Actual: 31500 
Predicted: 35200.0 --> Actual: 30000 
Predicted: 46180.0 --> Actual: 25920 
Predicted: 39250.0 --> Actual: 40000 
Predicted: 22013.7 --> Actual: 19000 
Predicted: 50445.1 --> Actual: 62499 
Predicted: 24708.2 --> Actual: 12960 
Predicted: 49485.2 --> Actual: 14400 
Predicted: 50541.3 --> Actual: 15360 
Predicted: 46650.0 --> Actual: 17760 
Predicted: 28525.1 --> Actual: 13920 
Predicted: 25526.1 --> Actual: 27500 
Predicted: 45259.2 --> Actual: 47500 
Predicted: 47492.0 --> Actual: 70000 
Predicted: 33700.0 --> Actual: 72000 
Predicted: 29470.0 --> Actual: 28800 
Predicted: 17539.1 --> Actual: 21120 
Predicted: 35393.7 --> Actual: 23040 
Predicted: 26125.0 --> Actual: 20000 
Predicted: 29115.3 --> Actual: 26000 
Predicted: 43548.8 --> Actual: 15360 
Predicted: 27767.1 --> Actual: 45120 
Predicted: 61150.0 --> Actual: 82500 
Predicted: 44048.0 --> Actual: 32500 
Predicted: 39

Predicted: 30135.9 --> Actual: 31000 
Predicted: 29836.5 --> Actual: 42500 
Predicted: 35418.0 --> Actual: 52500 
Predicted: 44163.2 --> Actual: 90000 
Predicted: 25647.0 --> Actual: 22000 
Predicted: 35450.0 --> Actual: 42500 
Predicted: 35800.0 --> Actual: 19000 
Predicted: 38549.7 --> Actual: 37500 
Predicted: 26433.6 --> Actual: 25440 
Predicted: 38300.0 --> Actual: 39000 
Predicted: 17051.0 --> Actual: 19200 
Predicted: 28336.8 --> Actual: 26500 
Predicted: 32374.3 --> Actual: 71000 
Predicted: 36582.0 --> Actual: 31000 
Predicted: 25543.4 --> Actual: 21782 
Predicted: 29300.0 --> Actual: 28500 
Predicted: 39394.4 --> Actual: 35308 
Predicted: 38476.0 --> Actual: 38500 
Predicted: 34263.3 --> Actual: 43200 
Predicted: 22202.0 --> Actual: 15500 
Predicted: 47224.4 --> Actual: 51955 
Predicted: 52350.0 --> Actual: 40000 
Predicted: 28578.3 --> Actual: 23500 
Predicted: 42099.8 --> Actual: 32500 
Predicted: 40549.9 --> Actual: 34000 
Predicted: 20067.7 --> Actual: 17500 
Predicted: 2

Predicted: 37860.0 --> Actual: 43728 
Predicted: 43473.0 --> Actual: 16200 
Predicted: 33619.0 --> Actual: 12729 
Predicted: 25562.0 --> Actual: 32640 
Predicted: 35630.0 --> Actual: 40000 
Predicted: 26924.0 --> Actual: 23040 
Predicted: 29227.4 --> Actual: 18500 
Predicted: 44729.8 --> Actual: 37500 
Predicted: 57672.1 --> Actual: 50000 
Predicted: 35383.2 --> Actual: 50000 
Predicted: 37250.0 --> Actual: 32500 
Predicted: 35270.0 --> Actual: 30000 
Predicted: 30037.0 --> Actual: 32000 
Predicted: 40957.0 --> Actual: 38000 
Predicted: 30712.0 --> Actual: 25500 
Predicted: 43713.3 --> Actual: 20000 
Predicted: 19075.0 --> Actual: 16000 
Predicted: 46650.0 --> Actual: 45000 
Predicted: 24280.3 --> Actual: 18000 
Predicted: 27424.0 --> Actual: 43200 
Predicted: 53366.5 --> Actual: 62500 
Predicted: 34876.0 --> Actual: 29000 
Predicted: 42019.7 --> Actual: 57000 
Predicted: 24716.2 --> Actual: 35000 
Predicted: 52700.0 --> Actual: 55000 
Predicted: 51014.6 --> Actual: 13785 
Predicted: 2

Predicted: 45685.1 --> Actual: 21018 
Predicted: 21372.0 --> Actual: 14000 
Predicted: 30690.5 --> Actual: 28000 
Predicted: 27036.0 --> Actual: 10000 
Predicted: 24898.0 --> Actual: 25000 
Predicted: 35477.0 --> Actual: 27000 
Predicted: 24386.4 --> Actual: 14400 
Predicted: 35900.0 --> Actual: 39000 
Predicted: 35597.0 --> Actual: 62499 
Predicted: 43188.0 --> Actual: 55500 
Predicted: 50250.0 --> Actual: 45000 
Predicted: 27882.6 --> Actual: 33000 
Predicted: 26897.5 --> Actual: 28500 
Predicted: 19909.2 --> Actual: 16500 
Predicted: 21649.0 --> Actual: 21500 
Predicted: 23301.1 --> Actual: 39600 
Predicted: 21009.9 --> Actual: 14832 
Predicted: 19850.0 --> Actual: 22000 
Predicted: 39112.3 --> Actual: 34000 
Predicted: 50519.1 --> Actual: 62500 
Predicted: 43172.0 --> Actual: 80000 
Predicted: 22610.0 --> Actual: 22500 
Predicted: 40050.0 --> Actual: 38500 
Predicted: 48198.0 --> Actual: 30000 
Predicted: 37674.0 --> Actual: 36000 
Predicted: 19464.0 --> Actual: 19000 
Predicted: 2

Predicted: 35688.0 --> Actual: 84000 
Predicted: 48231.0 --> Actual: 56640 
Predicted: 57859.3 --> Actual: 47500 
Predicted: 46428.6 --> Actual: 43200 
Predicted: 33447.5 --> Actual: 35343 
Predicted: 28885.1 --> Actual: 25000 
Predicted: 22900.0 --> Actual: 20500 
Predicted: 31597.5 --> Actual: 12739 
Predicted: 37030.0 --> Actual: 30000 
Predicted: 40350.0 --> Actual: 36000 
Predicted: 23296.2 --> Actual: 11884 
Predicted: 49599.7 --> Actual: 44999 
Predicted: 18657.6 --> Actual: 21500 
Predicted: 48281.5 --> Actual: 27250 
Predicted: 26138.0 --> Actual: 27500 
Predicted: 22275.0 --> Actual: 54000 
Predicted: 35799.6 --> Actual: 30000 
Predicted: 31344.4 --> Actual: 30000 
Predicted: 34150.0 --> Actual: 34000 
Predicted: 27227.4 --> Actual: 16528 
Predicted: 25450.0 --> Actual: 27500 
Predicted: 34650.0 --> Actual: 30000 
Predicted: 19150.0 --> Actual: 20000 
Predicted: 18673.4 --> Actual: 14750 
Predicted: 57499.5 --> Actual: 62499 
Predicted: 24615.2 --> Actual: 14880 
Predicted: 6

Predicted: 52550.0 --> Actual: 38000 
Predicted: 19075.0 --> Actual: 16000 
Predicted: 20224.4 --> Actual: 8800 
Predicted: 44514.8 --> Actual: 90000 
Predicted: 29200.0 --> Actual: 27000 
Predicted: 24410.0 --> Actual: 24500 
Predicted: 43000.0 --> Actual: 40000 
Predicted: 34250.0 --> Actual: 32500 
Predicted: 37330.5 --> Actual: 45500 
Predicted: 36300.0 --> Actual: 10500 
Predicted: 47676.0 --> Actual: 46000 
Predicted: 39681.2 --> Actual: 38000 
Predicted: 30538.4 --> Actual: 55000 
Predicted: 40232.0 --> Actual: 90000 
Predicted: 18897.2 --> Actual: 14500 
Predicted: 79000.0 --> Actual: 90000 
Predicted: 48240.0 --> Actual: 62000 
Predicted: 37519.2 --> Actual: 17000 
Predicted: 43207.0 --> Actual: 40000 
Predicted: 40080.0 --> Actual: 18240 
Predicted: 23638.0 --> Actual: 20000 
Predicted: 34400.0 --> Actual: 31500 
Predicted: 32060.0 --> Actual: 61832 
Predicted: 29494.8 --> Actual: 27500 
Predicted: 31750.0 --> Actual: 30000 
Predicted: 30650.7 --> Actual: 25000 
Predicted: 39

Predicted: 22622.0 --> Actual: 25000 
Predicted: 42900.0 --> Actual: 32000 
Predicted: 28589.4 --> Actual: 32500 
Predicted: 36300.0 --> Actual: 39000 
Predicted: 20193.5 --> Actual: 26400 
Predicted: 22405.4 --> Actual: 21120 
Predicted: 43750.0 --> Actual: 21000 
Predicted: 35700.0 --> Actual: 30000 
Predicted: 32100.9 --> Actual: 21500 
Predicted: 30014.0 --> Actual: 13737 
Predicted: 41527.0 --> Actual: 38750 
Predicted: 25900.0 --> Actual: 30000 
Predicted: 45193.0 --> Actual: 15840 
Predicted: 21349.2 --> Actual: 67200 
Predicted: 46098.0 --> Actual: 65000 
Predicted: 18417.8 --> Actual: 20579 
Predicted: 42999.8 --> Actual: 65000 
Predicted: 55300.0 --> Actual: 55000 
Predicted: 45250.0 --> Actual: 37500 
Predicted: 47874.0 --> Actual: 21000 
Predicted: 55362.0 --> Actual: 22000 
Predicted: 39172.4 --> Actual: 25000 
Predicted: 29826.6 --> Actual: 22250 
Predicted: 19000.0 --> Actual: 20750 
Predicted: 57800.0 --> Actual: 55000 
Predicted: 59770.0 --> Actual: 75000 
Predicted: 4

Predicted: 41758.4 --> Actual: 50000 
Predicted: 24933.0 --> Actual: 28800 
Predicted: 36000.8 --> Actual: 36500 
Predicted: 41450.0 --> Actual: 37500 
Predicted: 36276.2 --> Actual: 26400 
Predicted: 23991.6 --> Actual: 27500 
Predicted: 32993.6 --> Actual: 25000 
Predicted: 32520.0 --> Actual: 52500 
Predicted: 57250.0 --> Actual: 60000 
Predicted: 22076.0 --> Actual: 24000 
Predicted: 40940.0 --> Actual: 27500 
Predicted: 26750.0 --> Actual: 30000 
Predicted: 21275.0 --> Actual: 20500 
Predicted: 39093.2 --> Actual: 35000 
Predicted: 38170.0 --> Actual: 26750 
Predicted: 29750.0 --> Actual: 27000 
Predicted: 18016.0 --> Actual: 16000 
Predicted: 27490.0 --> Actual: 40000 
Predicted: 42188.2 --> Actual: 61440 
Predicted: 44450.0 --> Actual: 35500 
Predicted: 46350.0 --> Actual: 35000 
Predicted: 38690.5 --> Actual: 39000 
Predicted: 23490.0 --> Actual: 19000 
Predicted: 39577.0 --> Actual: 27000 
Predicted: 29958.0 --> Actual: 32640 
Predicted: 26813.2 --> Actual: 28000 
Predicted: 1

Predicted: 43649.4 --> Actual: 84000 
Predicted: 32440.0 --> Actual: 35000 
Predicted: 42870.0 --> Actual: 35000 
Predicted: 49250.0 --> Actual: 62000 
Predicted: 19680.0 --> Actual: 19500 
Predicted: 34490.0 --> Actual: 40000 
Predicted: 30300.0 --> Actual: 31500 
Predicted: 62149.6 --> Actual: 87499 
Predicted: 28604.0 --> Actual: 23500 
Predicted: 30255.7 --> Actual: 49920 
Predicted: 28130.4 --> Actual: 28500 
Predicted: 44030.4 --> Actual: 37000 
Predicted: 25580.8 --> Actual: 23250 
Predicted: 12559.2 --> Actual: 11904 
Predicted: 28882.2 --> Actual: 26000 
Predicted: 21239.0 --> Actual: 18390 
Predicted: 24635.0 --> Actual: 32500 
Predicted: 23512.5 --> Actual: 21000 
Predicted: 28189.8 --> Actual: 13142 
Predicted: 25782.0 --> Actual: 25528 
Predicted: 31150.0 --> Actual: 61456 
Predicted: 33350.0 --> Actual: 37500 
Predicted: 26675.0 --> Actual: 25440 
Predicted: 38595.3 --> Actual: 39453 
Predicted: 34974.0 --> Actual: 26500 
Predicted: 27550.0 --> Actual: 30000 
Predicted: 2

Predicted: 17944.7 --> Actual: 17049 
Predicted: 27921.4 --> Actual: 52500 
Predicted: 34470.0 --> Actual: 32500 
Predicted: 39096.0 --> Actual: 65000 
Predicted: 24733.7 --> Actual: 32640 
Predicted: 22900.0 --> Actual: 23000 
Predicted: 23332.0 --> Actual: 14100 
Predicted: 23451.7 --> Actual: 18500 
Predicted: 22836.7 --> Actual: 17000 
Predicted: 31750.0 --> Actual: 30000 
Predicted: 13954.5 --> Actual: 18624 
Predicted: 52050.0 --> Actual: 45000 
Predicted: 47299.3 --> Actual: 42500 
Predicted: 48294.0 --> Actual: 75000 
Predicted: 22089.2 --> Actual: 25680 
Predicted: 26490.0 --> Actual: 21000 
Predicted: 39307.6 --> Actual: 42500 
Predicted: 13826.7 --> Actual: 16080 
Predicted: 26992.0 --> Actual: 27000 
Predicted: 17059.4 --> Actual: 16000 
Predicted: 21200.0 --> Actual: 18500 
Predicted: 38086.0 --> Actual: 40000 
Predicted: 53310.0 --> Actual: 98400 
Predicted: 20755.4 --> Actual: 19656 
Predicted: 42600.0 --> Actual: 42500 
Predicted: 35080.0 --> Actual: 28000 
Predicted: 2

Predicted: 31700.0 --> Actual: 28000 
Predicted: 44024.9 --> Actual: 57500 
Predicted: 21995.0 --> Actual: 18000 
Predicted: 21099.1 --> Actual: 21500 
Predicted: 30455.3 --> Actual: 32000 
Predicted: 26853.2 --> Actual: 46080 
Predicted: 31866.5 --> Actual: 19000 
Predicted: 41686.8 --> Actual: 35000 
Predicted: 27300.0 --> Actual: 28000 
Predicted: 42673.9 --> Actual: 15000 
Predicted: 42385.6 --> Actual: 45000 
Predicted: 46350.0 --> Actual: 42500 
Predicted: 21749.9 --> Actual: 16320 
Predicted: 29128.1 --> Actual: 29284 
Predicted: 21700.0 --> Actual: 33600 
Predicted: 31385.8 --> Actual: 28500 
Predicted: 36033.1 --> Actual: 53760 
Predicted: 26617.0 --> Actual: 30500 
Predicted: 32326.4 --> Actual: 51000 
Predicted: 27657.8 --> Actual: 20000 
Predicted: 46000.0 --> Actual: 50000 
Predicted: 30950.0 --> Actual: 30000 
Predicted: 33200.0 --> Actual: 45000 
Predicted: 52388.0 --> Actual: 55000 
Predicted: 27085.5 --> Actual: 16051 
Predicted: 23422.0 --> Actual: 22500 
Predicted: 3

Predicted: 35790.0 --> Actual: 40000 
Predicted: 53700.0 --> Actual: 32500 
Predicted: 33800.0 --> Actual: 30000 
Predicted: 38850.0 --> Actual: 37000 
Predicted: 32150.0 --> Actual: 35000 
Predicted: 34050.0 --> Actual: 32000 
Predicted: 47303.4 --> Actual: 39000 
Predicted: 37250.0 --> Actual: 37500 
Predicted: 22175.0 --> Actual: 15500 
Predicted: 30280.6 --> Actual: 30000 
Predicted: 39149.9 --> Actual: 40000 
Predicted: 21767.2 --> Actual: 20500 
Predicted: 26924.0 --> Actual: 24500 
Predicted: 38051.6 --> Actual: 35000 
Predicted: 27200.0 --> Actual: 26000 
Predicted: 20908.6 --> Actual: 26880 
Predicted: 31222.3 --> Actual: 20500 
Predicted: 24032.2 --> Actual: 20750 
Predicted: 46651.2 --> Actual: 12864 
Predicted: 39000.0 --> Actual: 17517 
Predicted: 25872.9 --> Actual: 10742 
Predicted: 36072.0 --> Actual: 14728 
Predicted: 27469.0 --> Actual: 23000 
Predicted: 59070.0 --> Actual: 62500 
Predicted: 33900.0 --> Actual: 75000 
Predicted: 27306.5 --> Actual: 35000 
Predicted: 3

Predicted: 31154.0 --> Actual: 22500 
Predicted: 28150.0 --> Actual: 25000 
Predicted: 28350.0 --> Actual: 27500 
Predicted: 60549.4 --> Actual: 87499 
Predicted: 29110.0 --> Actual: 52800 
Predicted: 31988.2 --> Actual: 35000 
Predicted: 42814.0 --> Actual: 19500 
Predicted: 32642.0 --> Actual: 29500 
Predicted: 23552.0 --> Actual: 24000 
Predicted: 45460.0 --> Actual: 62400 
Predicted: 42124.0 --> Actual: 35000 
Predicted: 42802.0 --> Actual: 45000 
Predicted: 37016.8 --> Actual: 22250 
Predicted: 24489.2 --> Actual: 18720 
Predicted: 31700.0 --> Actual: 90000 
Predicted: 42500.0 --> Actual: 37500 
Predicted: 44344.2 --> Actual: 42500 
Predicted: 43032.7 --> Actual: 57500 
Predicted: 44427.8 --> Actual: 52499 
Predicted: 25644.0 --> Actual: 12000 
Predicted: 23636.0 --> Actual: 16000 
Predicted: 41324.4 --> Actual: 50000 
Predicted: 46369.0 --> Actual: 60000 
Predicted: 42974.2 --> Actual: 45000 
Predicted: 26942.0 --> Actual: 24000 
Predicted: 33286.0 --> Actual: 30000 
Predicted: 3

Predicted: 30837.2 --> Actual: 11884 
Predicted: 27450.0 --> Actual: 30000 
Predicted: 32674.0 --> Actual: 28500 
Predicted: 33849.4 --> Actual: 12998 
Predicted: 30921.3 --> Actual: 37156 
Predicted: 42471.1 --> Actual: 37500 
Predicted: 42275.0 --> Actual: 28000 
Predicted: 19876.0 --> Actual: 11000 
Predicted: 31189.6 --> Actual: 27500 
Predicted: 51184.0 --> Actual: 34000 
Predicted: 43230.0 --> Actual: 45000 
Predicted: 32317.9 --> Actual: 20500 
Predicted: 37956.0 --> Actual: 13689 
Predicted: 37766.2 --> Actual: 46500 
Predicted: 32655.3 --> Actual: 30000 
Predicted: 38747.1 --> Actual: 13440 
Predicted: 30736.0 --> Actual: 37500 
Predicted: 12475.2 --> Actual: 23040 
Predicted: 48400.0 --> Actual: 26000 
Predicted: 32400.0 --> Actual: 32500 
Predicted: 19350.0 --> Actual: 19000 
Predicted: 32444.0 --> Actual: 16500 
Predicted: 34044.2 --> Actual: 45000 
Predicted: 16676.9 --> Actual: 21250 
Predicted: 42544.7 --> Actual: 14400 
Predicted: 19965.6 --> Actual: 24000 
Predicted: 2

Predicted: 29521.0 --> Actual: 27500 
Predicted: 25052.9 --> Actual: 57500 
Predicted: 46858.0 --> Actual: 18182 
Predicted: 57149.4 --> Actual: 87499 
Predicted: 33293.5 --> Actual: 37500 
Predicted: 46850.0 --> Actual: 34500 
Predicted: 35730.1 --> Actual: 43000 
Predicted: 42250.0 --> Actual: 52500 
Predicted: 29000.0 --> Actual: 28500 
Predicted: 37052.6 --> Actual: 30000 
Predicted: 26448.8 --> Actual: 36000 
Predicted: 27820.0 --> Actual: 29000 
Predicted: 27116.1 --> Actual: 24400 
Predicted: 22200.0 --> Actual: 20500 
Predicted: 34300.0 --> Actual: 30000 
Predicted: 25780.0 --> Actual: 150000 
Predicted: 43932.5 --> Actual: 36000 
Predicted: 23012.2 --> Actual: 22000 
Predicted: 26490.8 --> Actual: 32500 
Predicted: 23244.0 --> Actual: 18000 
Predicted: 23800.0 --> Actual: 17092 
Predicted: 47677.9 --> Actual: 43369 
Predicted: 54249.5 --> Actual: 45000 
Predicted: 28877.0 --> Actual: 74880 
Predicted: 35426.6 --> Actual: 58706 
Predicted: 27050.0 --> Actual: 28500 
Predicted: 

Predicted: 45150.0 --> Actual: 32500 
Predicted: 23950.0 --> Actual: 25000 
Predicted: 36800.0 --> Actual: 36480 
Predicted: 44385.1 --> Actual: 35000 
Predicted: 54700.0 --> Actual: 50000 
Predicted: 28910.0 --> Actual: 22500 
Predicted: 21138.5 --> Actual: 23500 
Predicted: 42158.2 --> Actual: 77500 
Predicted: 51280.4 --> Actual: 47500 
Predicted: 30681.2 --> Actual: 27500 
Predicted: 28832.8 --> Actual: 63000 
Predicted: 50439.9 --> Actual: 60000 
Predicted: 23225.0 --> Actual: 22000 
Predicted: 24049.5 --> Actual: 19000 
Predicted: 37989.4 --> Actual: 35500 
Predicted: 40500.0 --> Actual: 25920 
Predicted: 30200.0 --> Actual: 30750 
Predicted: 25876.3 --> Actual: 21000 
Predicted: 45021.1 --> Actual: 45000 
Predicted: 13531.0 --> Actual: 11856 
Predicted: 28112.7 --> Actual: 27000 
Predicted: 39600.0 --> Actual: 31000 
Predicted: 27037.5 --> Actual: 22000 
Predicted: 44550.0 --> Actual: 50000 
Predicted: 25855.5 --> Actual: 13440 
Predicted: 31459.3 --> Actual: 32500 
Predicted: 3

Predicted: 29535.5 --> Actual: 90000 
Predicted: 34438.0 --> Actual: 7750 
Predicted: 29610.0 --> Actual: 26500 
Predicted: 28251.6 --> Actual: 27500 
Predicted: 31010.0 --> Actual: 32500 
Predicted: 36740.4 --> Actual: 49920 
Predicted: 20275.0 --> Actual: 18000 
Predicted: 37224.0 --> Actual: 55000 
Predicted: 28220.0 --> Actual: 52800 
Predicted: 45022.0 --> Actual: 57500 
Predicted: 26342.0 --> Actual: 24000 
Predicted: 32706.5 --> Actual: 30500 
Predicted: 20975.0 --> Actual: 22500 
Predicted: 21177.5 --> Actual: 19518 
Predicted: 21216.0 --> Actual: 16138 
Predicted: 24334.4 --> Actual: 55000 
Predicted: 29050.0 --> Actual: 25000 
Predicted: 29771.2 --> Actual: 26880 
Predicted: 20600.0 --> Actual: 20900 
Predicted: 32630.6 --> Actual: 27000 
Predicted: 33481.8 --> Actual: 27500 
Predicted: 55980.0 --> Actual: 40000 
Predicted: 44300.0 --> Actual: 42500 
Predicted: 43350.0 --> Actual: 37500 
Predicted: 21724.8 --> Actual: 20375 
Predicted: 50850.0 --> Actual: 60000 
Predicted: 38

Predicted: 31136.4 --> Actual: 19680 
Predicted: 29201.1 --> Actual: 30000 
Predicted: 39504.5 --> Actual: 35400 
Predicted: 42393.7 --> Actual: 37000 
Predicted: 36837.0 --> Actual: 55000 
Predicted: 23950.0 --> Actual: 23000 
Predicted: 33355.9 --> Actual: 60000 
Predicted: 15693.0 --> Actual: 15360 
Predicted: 33632.0 --> Actual: 29500 
Predicted: 46550.0 --> Actual: 45000 
Predicted: 35050.0 --> Actual: 32500 
Predicted: 33939.2 --> Actual: 21120 
Predicted: 55075.3 --> Actual: 57500 
Predicted: 58870.0 --> Actual: 55000 
Predicted: 23877.5 --> Actual: 5942 
Predicted: 33880.8 --> Actual: 31500 
Predicted: 36248.0 --> Actual: 40000 
Predicted: 33082.7 --> Actual: 27500 
Predicted: 31296.0 --> Actual: 39360 
Predicted: 25065.6 --> Actual: 25000 
Predicted: 41494.0 --> Actual: 60000 
Predicted: 30608.0 --> Actual: 36000 
Predicted: 32600.0 --> Actual: 29000 
Predicted: 19148.7 --> Actual: 95000 
Predicted: 25595.0 --> Actual: 32500 
Predicted: 28188.1 --> Actual: 29500 
Predicted: 34

Predicted: 20850.9 --> Actual: 22500 
Predicted: 33826.0 --> Actual: 24000 
Predicted: 34356.4 --> Actual: 62500 
Predicted: 20942.9 --> Actual: 21000 
Predicted: 33754.0 --> Actual: 26500 
Predicted: 25536.0 --> Actual: 26000 
Predicted: 28574.5 --> Actual: 42500 
Predicted: 29553.2 --> Actual: 63000 
Predicted: 50295.2 --> Actual: 52500 
Predicted: 47955.0 --> Actual: 100000 
Predicted: 27309.8 --> Actual: 19000 
Predicted: 37784.0 --> Actual: 25000 
Predicted: 34893.1 --> Actual: 32500 
Predicted: 50470.0 --> Actual: 41000 
Predicted: 39806.2 --> Actual: 28000 
Predicted: 34155.0 --> Actual: 28500 
Predicted: 34461.0 --> Actual: 27500 
Predicted: 35350.0 --> Actual: 32500 
Predicted: 36687.6 --> Actual: 75000 
Predicted: 22941.0 --> Actual: 34212 
Predicted: 33700.0 --> Actual: 30000 
Predicted: 33850.0 --> Actual: 70000 
Predicted: 26925.0 --> Actual: 30000 
Predicted: 20550.0 --> Actual: 18000 
Predicted: 39342.9 --> Actual: 22500 
Predicted: 55988.0 --> Actual: 60000 
Predicted: 

Predicted: 35311.0 --> Actual: 30000 
Predicted: 38704.9 --> Actual: 14400 
Predicted: 47710.6 --> Actual: 55000 
Predicted: 39350.0 --> Actual: 42500 
Predicted: 56431.9 --> Actual: 15840 
Predicted: 21667.0 --> Actual: 21500 
Predicted: 25524.3 --> Actual: 52500 
Predicted: 24200.0 --> Actual: 26000 
Predicted: 31634.2 --> Actual: 12000 
Predicted: 38867.6 --> Actual: 30000 
Predicted: 24670.7 --> Actual: 21500 
Predicted: 38652.0 --> Actual: 12134 
Predicted: 29713.0 --> Actual: 30000 
Predicted: 48384.4 --> Actual: 50000 
Predicted: 37100.0 --> Actual: 34000 
Predicted: 38411.0 --> Actual: 25000 
Predicted: 22626.1 --> Actual: 22406 
Predicted: 49638.2 --> Actual: 45000 
Predicted: 29729.2 --> Actual: 30500 
Predicted: 28702.8 --> Actual: 20416 
Predicted: 24420.0 --> Actual: 30500 
Predicted: 41857.2 --> Actual: 40000 
Predicted: 36498.6 --> Actual: 26000 
Predicted: 64346.0 --> Actual: 35000 
Predicted: 23050.0 --> Actual: 17000 
Predicted: 38080.0 --> Actual: 32500 
Predicted: 2

Predicted: 31537.1 --> Actual: 31500 
Predicted: 31280.0 --> Actual: 25000 
Predicted: 40850.0 --> Actual: 42000 
Predicted: 41619.2 --> Actual: 47500 
Predicted: 27777.0 --> Actual: 17750 
Predicted: 33899.8 --> Actual: 32000 
Predicted: 30700.0 --> Actual: 29700 
Predicted: 33450.0 --> Actual: 35000 
Predicted: 16714.2 --> Actual: 12172 
Predicted: 28980.4 --> Actual: 47500 
Predicted: 24026.8 --> Actual: 27500 
Predicted: 18440.0 --> Actual: 17000 
Predicted: 24837.1 --> Actual: 21000 
Predicted: 48950.0 --> Actual: 45000 
Predicted: 34050.0 --> Actual: 27500 
Predicted: 24581.0 --> Actual: 15360 
Predicted: 42973.6 --> Actual: 45000 
Predicted: 32752.0 --> Actual: 34000 
Predicted: 53299.7 --> Actual: 50000 
Predicted: 23291.1 --> Actual: 57500 
Predicted: 24580.0 --> Actual: 25500 
Predicted: 46212.8 --> Actual: 14400 
Predicted: 40083.5 --> Actual: 13939 
Predicted: 20218.0 --> Actual: 31200 
Predicted: 26231.2 --> Actual: 23520 
Predicted: 23196.0 --> Actual: 18500 
Predicted: 4

Predicted: 43350.0 --> Actual: 55000 
Predicted: 29490.0 --> Actual: 32500 
Predicted: 36800.0 --> Actual: 37500 
Predicted: 27115.0 --> Actual: 25000 
Predicted: 13186.1 --> Actual: 13440 
Predicted: 53348.4 --> Actual: 55000 
Predicted: 18377.8 --> Actual: 16000 
Predicted: 27600.0 --> Actual: 42500 
Predicted: 19850.0 --> Actual: 30720 
Predicted: 27660.0 --> Actual: 22500 
Predicted: 25399.4 --> Actual: 25740 
Predicted: 55499.9 --> Actual: 45000 
Predicted: 39150.0 --> Actual: 36000 
Predicted: 32655.4 --> Actual: 35000 
Predicted: 22006.1 --> Actual: 13080 
Predicted: 26700.0 --> Actual: 30000 
Predicted: 35492.0 --> Actual: 27844 
Predicted: 23750.0 --> Actual: 22500 
Predicted: 30450.0 --> Actual: 29000 
Predicted: 27894.7 --> Actual: 17202 
Predicted: 37470.0 --> Actual: 38500 
Predicted: 33320.0 --> Actual: 20000 
Predicted: 45659.8 --> Actual: 35000 
Predicted: 25266.9 --> Actual: 18500 
Predicted: 19462.2 --> Actual: 15000 
Predicted: 25661.2 --> Actual: 23000 
Predicted: 3

Predicted: 56250.0 --> Actual: 58000 
Predicted: 45464.0 --> Actual: 62500 
Predicted: 26279.3 --> Actual: 22500 
Predicted: 46900.0 --> Actual: 45500 
Predicted: 30698.0 --> Actual: 17500 
Predicted: 21076.0 --> Actual: 18000 
Predicted: 35700.0 --> Actual: 35000 
Predicted: 32750.6 --> Actual: 32500 
Predicted: 16322.5 --> Actual: 12480 
Predicted: 30087.3 --> Actual: 13440 
Predicted: 26032.6 --> Actual: 8409 
Predicted: 18869.1 --> Actual: 33600 
Predicted: 26530.0 --> Actual: 31968 
Predicted: 29403.6 --> Actual: 29500 
Predicted: 19427.1 --> Actual: 18144 
Predicted: 46300.0 --> Actual: 47000 
Predicted: 42710.0 --> Actual: 48500 
Predicted: 45526.6 --> Actual: 50000 
Predicted: 53100.0 --> Actual: 50000 
Predicted: 24043.6 --> Actual: 15500 
Predicted: 27813.9 --> Actual: 11884 
Predicted: 42363.2 --> Actual: 40000 
Predicted: 27890.5 --> Actual: 64999 
Predicted: 21661.7 --> Actual: 16711 
Predicted: 20009.4 --> Actual: 21600 
Predicted: 33650.0 --> Actual: 31000 
Predicted: 26

Predicted: 26600.0 --> Actual: 25000 
Predicted: 22102.6 --> Actual: 20500 
Predicted: 37254.0 --> Actual: 35500 
Predicted: 26365.8 --> Actual: 21000 
Predicted: 47000.0 --> Actual: 62500 
Predicted: 32698.4 --> Actual: 32500 
Predicted: 20500.0 --> Actual: 22500 
Predicted: 37307.0 --> Actual: 23500 
Predicted: 53073.9 --> Actual: 30000 
Predicted: 25710.8 --> Actual: 25000 
Predicted: 35110.0 --> Actual: 24000 
Predicted: 22250.0 --> Actual: 26000 
Predicted: 21308.3 --> Actual: 15000 
Predicted: 19975.5 --> Actual: 21325 
Predicted: 32522.8 --> Actual: 34000 
Predicted: 42566.0 --> Actual: 55000 
Predicted: 27849.9 --> Actual: 22500 
Predicted: 35300.0 --> Actual: 29500 
Predicted: 61939.8 --> Actual: 48500 
Predicted: 44378.7 --> Actual: 50000 
Predicted: 43482.9 --> Actual: 19000 
Predicted: 25670.5 --> Actual: 32500 
Predicted: 31335.7 --> Actual: 31000 
Predicted: 20804.8 --> Actual: 18000 
Predicted: 19484.8 --> Actual: 12576 
Predicted: 29230.0 --> Actual: 28500 
Predicted: 3

Predicted: 21747.7 --> Actual: 13574 
Predicted: 18930.0 --> Actual: 22500 
Predicted: 27852.0 --> Actual: 19200 
Predicted: 22910.0 --> Actual: 24960 
Predicted: 38181.0 --> Actual: 87500 
Predicted: 33115.2 --> Actual: 25500 
Predicted: 29300.8 --> Actual: 12288 
Predicted: 28620.0 --> Actual: 26500 
Predicted: 41207.4 --> Actual: 45000 
Predicted: 27967.4 --> Actual: 32400 
Predicted: 38800.0 --> Actual: 42500 
Predicted: 28179.2 --> Actual: 39360 
Predicted: 34667.2 --> Actual: 25920 
Predicted: 27150.0 --> Actual: 24000 
Predicted: 31828.0 --> Actual: 27000 
Predicted: 18208.8 --> Actual: 35404 
Predicted: 31915.0 --> Actual: 32500 
Predicted: 41300.0 --> Actual: 32707 
Predicted: 38600.0 --> Actual: 45000 
Predicted: 19455.8 --> Actual: 14000 
Predicted: 23150.0 --> Actual: 18500 
Predicted: 28578.0 --> Actual: 47500 
Predicted: 19847.3 --> Actual: 17000 
Predicted: 52570.0 --> Actual: 45000 
Predicted: 37550.0 --> Actual: 35000 
Predicted: 37429.8 --> Actual: 35000 
Predicted: 3

Predicted: 49504.9 --> Actual: 37500 
Predicted: 43881.1 --> Actual: 40000 
Predicted: 31374.0 --> Actual: 30500 
Predicted: 47100.0 --> Actual: 62500 
Predicted: 29426.0 --> Actual: 34560 
Predicted: 37123.9 --> Actual: 47500 
Predicted: 32577.8 --> Actual: 25000 
Predicted: 31600.0 --> Actual: 37500 
Predicted: 32434.0 --> Actual: 23000 
Predicted: 44000.0 --> Actual: 42500 
Predicted: 25350.0 --> Actual: 24000 
Predicted: 25076.7 --> Actual: 15086 
Predicted: 24088.6 --> Actual: 21000 
Predicted: 27175.0 --> Actual: 20000 
Predicted: 26647.2 --> Actual: 25000 
Predicted: 42674.9 --> Actual: 42500 
Predicted: 48877.7 --> Actual: 57600 
Predicted: 27239.5 --> Actual: 30000 
Predicted: 34394.3 --> Actual: 50000 
Predicted: 26795.0 --> Actual: 24000 
Predicted: 17315.1 --> Actual: 14500 
Predicted: 34450.4 --> Actual: 26500 
Predicted: 48484.5 --> Actual: 12240 
Predicted: 29350.0 --> Actual: 22000 
Predicted: 42720.0 --> Actual: 18000 
Predicted: 36909.2 --> Actual: 47500 
Predicted: 5

Predicted: 31938.0 --> Actual: 22500 
Predicted: 19118.6 --> Actual: 16000 
Predicted: 36986.0 --> Actual: 42500 
Predicted: 46050.0 --> Actual: 65000 
Predicted: 27347.6 --> Actual: 15436 
Predicted: 31665.0 --> Actual: 27500 
Predicted: 47196.0 --> Actual: 50000 
Predicted: 47481.7 --> Actual: 42500 
Predicted: 19526.2 --> Actual: 19000 
Predicted: 34856.9 --> Actual: 40000 
Predicted: 37677.5 --> Actual: 28000 
Predicted: 40359.5 --> Actual: 52500 
Predicted: 48844.0 --> Actual: 40000 
Predicted: 67599.8 --> Actual: 26500 
Predicted: 36460.0 --> Actual: 42000 
Predicted: 29568.0 --> Actual: 18800 
Predicted: 21872.0 --> Actual: 15000 
Predicted: 51539.9 --> Actual: 62499 
Predicted: 20810.0 --> Actual: 15500 
Predicted: 21796.6 --> Actual: 21500 
Predicted: 45150.0 --> Actual: 52500 
Predicted: 52500.0 --> Actual: 69000 
Predicted: 39379.0 --> Actual: 36500 
Predicted: 40030.0 --> Actual: 21000 
Predicted: 28090.0 --> Actual: 47500 
Predicted: 23652.4 --> Actual: 70000 
Predicted: 3

Predicted: 27519.3 --> Actual: 27000 
Predicted: 36075.0 --> Actual: 26000 
Predicted: 31742.3 --> Actual: 36864 
Predicted: 29536.1 --> Actual: 59520 
Predicted: 33516.0 --> Actual: 28000 
Predicted: 39960.0 --> Actual: 37500 
Predicted: 20090.6 --> Actual: 36480 
Predicted: 22838.0 --> Actual: 32400 
Predicted: 46751.0 --> Actual: 37500 
Predicted: 14452.6 --> Actual: 17875 
Predicted: 22610.6 --> Actual: 19500 
Predicted: 25200.0 --> Actual: 25000 
Predicted: 23429.0 --> Actual: 21500 
Predicted: 33720.7 --> Actual: 15552 
Predicted: 37950.0 --> Actual: 40000 
Predicted: 20768.0 --> Actual: 15000 
Predicted: 13014.3 --> Actual: 11884 
Predicted: 50850.0 --> Actual: 40000 
Predicted: 40455.5 --> Actual: 13843 
Predicted: 42368.0 --> Actual: 39000 
Predicted: 19732.7 --> Actual: 20500 
Predicted: 35146.0 --> Actual: 29500 
Predicted: 44567.5 --> Actual: 80000 
Predicted: 26214.8 --> Actual: 32640 
Predicted: 50064.0 --> Actual: 90000 
Predicted: 28340.0 --> Actual: 22107 
Predicted: 5

Predicted: 20075.0 --> Actual: 20000 
Predicted: 19511.0 --> Actual: 21000 
Predicted: 32300.0 --> Actual: 33500 
Predicted: 22813.1 --> Actual: 37200 
Predicted: 60869.8 --> Actual: 90000 
Predicted: 15704.4 --> Actual: 28252 
Predicted: 46002.8 --> Actual: 31000 
Predicted: 28624.0 --> Actual: 21500 
Predicted: 30077.5 --> Actual: 20400 
Predicted: 17040.4 --> Actual: 14404 
Predicted: 24400.0 --> Actual: 29750 
Predicted: 41866.0 --> Actual: 22080 
Predicted: 32226.0 --> Actual: 26500 
Predicted: 22703.6 --> Actual: 22000 
Predicted: 55915.0 --> Actual: 20160 
Predicted: 21690.7 --> Actual: 24336 
Predicted: 23840.0 --> Actual: 24000 
Predicted: 17344.4 --> Actual: 21312 
Predicted: 33008.4 --> Actual: 20000 
Predicted: 28518.0 --> Actual: 24000 
Predicted: 27192.0 --> Actual: 52500 
Predicted: 20083.3 --> Actual: 10500 
Predicted: 45870.0 --> Actual: 15000 
Predicted: 34750.0 --> Actual: 40000 
Predicted: 23452.8 --> Actual: 32500 
Predicted: 43860.0 --> Actual: 32500 
Predicted: 4

Predicted: 30301.1 --> Actual: 18143 
Predicted: 52622.5 --> Actual: 14400 
Predicted: 34154.4 --> Actual: 55000 
Predicted: 31704.0 --> Actual: 32500 
Predicted: 56228.0 --> Actual: 75000 
Predicted: 34549.9 --> Actual: 90000 
Predicted: 40400.0 --> Actual: 35000 
Predicted: 25350.0 --> Actual: 27500 
Predicted: 31850.9 --> Actual: 53581 
Predicted: 41162.0 --> Actual: 41000 
Predicted: 28752.0 --> Actual: 26000 
Predicted: 49021.9 --> Actual: 13516 
Predicted: 34764.0 --> Actual: 40000 
Predicted: 44335.0 --> Actual: 87000 
Predicted: 27728.0 --> Actual: 23500 
Predicted: 36237.0 --> Actual: 10000 
Predicted: 50116.4 --> Actual: 80000 
Predicted: 26296.0 --> Actual: 40320 
Predicted: 34613.7 --> Actual: 27750 
Predicted: 30750.0 --> Actual: 12500 
Predicted: 27002.6 --> Actual: 27000 
Predicted: 18893.3 --> Actual: 14100 
Predicted: 27522.4 --> Actual: 32750 
Predicted: 48079.8 --> Actual: 14515 
Predicted: 51850.0 --> Actual: 38000 
Predicted: 34325.0 --> Actual: 30000 
Predicted: 3

Predicted: 16995.8 --> Actual: 18240 
Predicted: 45725.4 --> Actual: 150000 
Predicted: 21218.0 --> Actual: 19796 
Predicted: 27351.6 --> Actual: 10000 
Predicted: 24519.0 --> Actual: 16800 
Predicted: 30248.4 --> Actual: 20800 
Predicted: 34910.0 --> Actual: 26500 
Predicted: 43050.0 --> Actual: 42500 
Predicted: 41000.0 --> Actual: 37500 
Predicted: 23620.0 --> Actual: 15000 
Predicted: 25400.1 --> Actual: 13440 
Predicted: 24086.4 --> Actual: 30720 
Predicted: 22839.4 --> Actual: 16080 
Predicted: 26735.0 --> Actual: 24000 
Predicted: 27550.0 --> Actual: 24000 
Predicted: 24875.0 --> Actual: 31500 
Predicted: 26208.0 --> Actual: 22000 
Predicted: 39585.0 --> Actual: 42500 
Predicted: 30028.0 --> Actual: 28500 
Predicted: 26877.4 --> Actual: 13440 
Predicted: 21226.9 --> Actual: 23923 
Predicted: 25625.0 --> Actual: 24000 
Predicted: 34346.1 --> Actual: 138240 
Predicted: 43324.0 --> Actual: 36000 
Predicted: 41550.0 --> Actual: 37500 
Predicted: 41034.4 --> Actual: 30000 
Predicted:

In [14]:
#Let op! This took ~51 minutes to run on my 2015 macbook
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV

#Hyperparameter tuning, these are the params we wanna tune, understand what they all mean for the purpose of
#writing the report
leaf_size = list(range(1,50))
n_neighbors = list(range(1,30))
p=[1,2]

#Convert to dictionary
hyperparameters = dict(leaf_size=leaf_size, n_neighbors=n_neighbors, p=p)

#Create new KNN object
KNNR_2 = KNeighborsRegressor()


#Here a Halving Grid Search CV is used to find the best hyperparameters for the model
#It takes a long time but is much faster than classic Grid Search CV, at roughly equivalent performance
halving_cv = HalvingGridSearchCV(
    KNNR_2, hyperparameters, n_jobs=-1, cv=3, min_resources="exhaust", factor=3,verbose = 10)


print("HalvingGrid search...")
search_time_start = time.time()
best_model = halving_cv.fit(X_train,y_train)
print("Grid search time:", time.time() - search_time_start)
print('\n')

#Print The value of best Hyperparameters
print('Best leaf_size:', best_model.best_estimator_.get_params()['leaf_size'])
print('Best p:', best_model.best_estimator_.get_params()['p'])
print('Best n_neighbors:', best_model.best_estimator_.get_params()['n_neighbors'])

HalvingGrid search...
n_iterations: 8
n_required_iterations: 8
n_possible_iterations: 8
min_resources_: 67
max_resources_: 146860
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 2842
n_resources: 67
Fitting 3 folds for each of 2842 candidates, totalling 8526 fits
----------
iter: 1
n_candidates: 948
n_resources: 201
Fitting 3 folds for each of 948 candidates, totalling 2844 fits
----------
iter: 2
n_candidates: 316
n_resources: 603
Fitting 3 folds for each of 316 candidates, totalling 948 fits
----------
iter: 3
n_candidates: 106
n_resources: 1809
Fitting 3 folds for each of 106 candidates, totalling 318 fits
----------
iter: 4
n_candidates: 36
n_resources: 5427
Fitting 3 folds for each of 36 candidates, totalling 108 fits




----------
iter: 5
n_candidates: 12
n_resources: 16281
Fitting 3 folds for each of 12 candidates, totalling 36 fits
----------
iter: 6
n_candidates: 4
n_resources: 48843
Fitting 3 folds for each of 4 candidates, totalling 12 fits
----------
iter: 7
n_candidates: 2
n_resources: 146529
Fitting 3 folds for each of 2 candidates, totalling 6 fits
Grid search time: 3085.4615092277527
None


Best leaf_size: 9
Best p: 1
Best n_neighbors: 24


In [22]:
#Train new model with best hyper parameters
#KNNR_3 = KNeighborsRegressor(leaf_size = 9,p=1,n_neighbors = 24)
#tuned_model = KNNR_3.fit(X_train,y_train)
y_pred_tuned = best_model.predict(X_valid)

In [23]:
#Evaluate new model, compare its performance to the first
MAE_tuned = mean_absolute_error(y_valid,y_pred_tuned)

r2_tuned = r2_score(y_valid,y_pred_tuned)
print(f'MAE of tuned model: {MAE_tuned}\nr2 tuned model: {r2_tuned}')
print('\n')
print(f'MAE before hyper param tuning:{MAE}\nr2 before tuning: {r2}')

MAE of tuned model: 9958.36160136727
r2 tuned model: 0.2993190618316982


MAE before hyper param tuning:9913.842137516853
r2 before tuning: 0.27429773090495924


In [None]:
#Turns out hyper param tuning in this way didn't really improve our model... 
#Might want to look around for other approaches

In [None]:
#Finally, run the model on the test data to get test predictions, final model evaluation

# This is hyper param tuning with classic Grid Search CV
#This step takes 6 hours and might light your computer on fire

#Hyperparameter tuning, these are the params we wanna tune, understand what they all mean for the purpose of
#writing the report
leaf_size = list(range(1,50))
n_neighbors = list(range(1,30))
p=[1,2]

#Convert to dictionary
hyperparameters = dict(leaf_size=leaf_size, n_neighbors=n_neighbors, p=p)

#Create new KNN object
KNNR_2 = KNeighborsRegressor()


#Use GridSearch, understand how this works for writing report
clf = GridSearchCV(KNNR_2, hyperparameters, cv=3, n_jobs = -1, verbose = 10)

#Fit the model

print("Grid search...")
search_time_start = time.time()

best_model = clf.fit(X_train,y_train)
print(print("Grid search time:", time.time() - search_time_start))
print('\n')

#Print The value of best Hyperparameters
print('Best leaf_size:', best_model.best_estimator_.get_params()['leaf_size'])
print('Best p:', best_model.best_estimator_.get_params()['p'])
print('Best n_neighbors:', best_model.best_estimator_.get_params()['n_neighbors'])

