# KNN

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale, StandardScaler
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import neighbors
from sklearn.svm import SVR

In [3]:
from warnings import filterwarnings
filterwarnings("ignore")

In [6]:
df = pd.read_csv("Hitters.csv")
df = df.dropna()
dms = pd.get_dummies(df[["League", "Division", "NewLeague"]])
y = df["Salary"]
X_ = df.drop(["Salary", "League", "Division", "NewLeague"], axis=1).astype("float64")
X = pd.concat([X_, dms[["League_N", "Division_W", "NewLeague_N"]]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.25,
                                                    random_state=42)

In [16]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 197 entries, 183 to 133
Data columns (total 19 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   AtBat        197 non-null    float64
 1   Hits         197 non-null    float64
 2   HmRun        197 non-null    float64
 3   Runs         197 non-null    float64
 4   RBI          197 non-null    float64
 5   Walks        197 non-null    float64
 6   Years        197 non-null    float64
 7   CAtBat       197 non-null    float64
 8   CHits        197 non-null    float64
 9   CHmRun       197 non-null    float64
 10  CRuns        197 non-null    float64
 11  CRBI         197 non-null    float64
 12  CWalks       197 non-null    float64
 13  PutOuts      197 non-null    float64
 14  Assists      197 non-null    float64
 15  Errors       197 non-null    float64
 16  League_N     197 non-null    uint8  
 17  Division_W   197 non-null    uint8  
 18  NewLeague_N  197 non-null    uint8  
dtypes: flo

### MODEL & MODEL

In [10]:
knn_model = KNeighborsRegressor().fit(X_train, y_train)

In [12]:
dir(knn_model)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_algorithm_metric',
 '_check_n_features',
 '_estimator_type',
 '_fit',
 '_fit_X',
 '_fit_method',
 '_get_param_names',
 '_get_tags',
 '_kneighbors_reduce_func',
 '_more_tags',
 '_pairwise',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_tree',
 '_validate_data',
 '_y',
 'algorithm',
 'effective_metric_',
 'effective_metric_params_',
 'fit',
 'get_params',
 'kneighbors',
 'kneighbors_graph',
 'leaf_size',
 'metric',
 'metric_params',
 'n_features_in_',
 'n_jobs',
 'n_neighbors',
 'n_samples_fit_',
 'p',
 'predict',
 'radius',
 'sc

In [17]:
y_pred = knn_model.predict(X_test)
y_pred

array([ 510.3334,  808.3334,  772.5   ,  125.5   , 1005.    ,  325.5   ,
        216.5   ,  101.5   ,  982.    ,  886.6666,  590.    ,  901.6666,
        831.6666,  157.5   ,  393.    , 1005.    ,  735.5   ,   97.    ,
        884.4   ,  302.    ,  450.    ,  817.6666,  832.6666,  392.3334,
        528.    ,   81.6   ,  735.    ,  470.    ,  722.5   ,  101.    ,
         90.5   ,   74.6   ,  748.3334,  217.    ,  280.5334, 1044.5   ,
        955.    ,  232.    ,   78.6   ,  529.    ,   77.6   ,  106.5   ,
        516.6666,  593.6666, 1005.    ,  649.1666,  715.    ,  101.5   ,
        134.5   ,  810.    ,  743.    ,  521.3334,  664.3334,  195.    ,
        102.4   ,  728.5   ,  488.    ,  962.5   ,  230.8334, 1040.    ,
        885.    ,  542.    ,  720.4   ,  571.    ,  735.    ,   81.6   ])

In [19]:
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

426.6570764525201

### MODEL TUNING

In [22]:
RMSE_ = list()

for i in range(1, 100):
    knn_model = KNeighborsRegressor(n_neighbors=i).fit(X_train, y_train)
    y_pred = knn_model.predict(X_test)
    RMSE_.append(np.sqrt(mean_squared_error(y_test, y_pred)))
RMSE_

[455.03925390751965,
 415.99629571490965,
 420.6765370082348,
 428.8564674588792,
 426.6570764525201,
 423.5071669008732,
 414.9361222421057,
 413.7094731463598,
 417.84419990871265,
 421.6252180741266,
 425.32356114758335,
 427.55166827728755,
 433.38660621123563,
 431.1996979088877,
 433.82362418875744,
 437.17725576521354,
 435.7774833343415,
 438.1270242871465,
 443.5164172646945,
 439.99677941119586,
 432.8621874376963,
 429.6442608138743,
 430.51494520326924,
 430.8423998443682,
 430.3394502245506,
 427.6988019272701,
 425.2087796018491,
 426.2337111131765,
 425.52046696934565,
 425.91290021417433,
 427.014082049554,
 425.4498838814169,
 423.91773051864845,
 422.1717421199587,
 420.31235970496095,
 422.1068929363204,
 423.3005383323169,
 423.3928353110623,
 423.60979319978,
 421.8760026571793,
 419.87831673626675,
 419.5726949375753,
 419.68170881647836,
 419.5254707672875,
 419.32866211449607,
 418.2854007286153,
 418.8412186848561,
 418.0059326458897,
 418.4658703584224,
 417.1

### GRIDSEARCHCV

In [33]:
knn_params = {"n_neighbors": np.arange(1, 30, 1)}

In [34]:
knn_model_ = KNeighborsRegressor()

In [35]:
knn_cv_model = GridSearchCV(knn_model_, knn_params, cv=10).fit(X_train, y_train)

In [36]:
knn_cv_model.best_params_

{'n_neighbors': 8}

### FINAL

In [45]:
knn_tuned = KNeighborsRegressor(n_neighbors=knn_cv_model.best_params_["n_neighbors"]).fit(X_train, y_train)

In [46]:
y_pred_ = knn_tuned.predict(X_test)
y_pred_

array([ 624.583375,  812.083375,  846.25    ,  155.3125  ,  850.      ,
        310.9375  ,  215.3125  ,  125.5625  ,  751.875   ,  981.25    ,
        634.0625  ,  878.541625,  963.541625,  155.3125  ,  390.625   ,
        850.      ,  765.9375  ,   93.4375  ,  894.416625,  313.75    ,
        609.583375,  704.166625,  702.5     ,  481.145875,  469.0625  ,
         80.375   ,  915.696375,  418.75    ,  574.6875  ,   95.      ,
         92.1875  ,   73.8125  ,  638.333375,  229.375   ,  263.458375,
       1204.446375,  981.25    ,  230.625   ,   75.6875  ,  633.75    ,
         77.875   ,  115.5625  ,  660.416625,  565.729125,  861.875   ,
        685.9375  ,  850.      ,  107.8125  ,  155.3125  ,  704.375   ,
        706.25    ,  699.583375,  713.645875,  197.5     ,  111.5     ,
        738.4375  ,  691.666625,  843.229125,  188.270875,  987.5     ,
        981.25    ,  505.      ,  840.875   ,  566.875   ,  787.5     ,
         86.625   ])

In [47]:
np.sqrt(mean_squared_error(y_test, y_pred_))

413.7094731463598