In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from scipy.stats import skew

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline
pd.set_option("display.max_columns",None)

In [2]:
df = pd.read_csv("DATA/Advertising.csv",index_col = 0)

In [3]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [4]:
df.isnull().sum()

TV           0
radio        0
newspaper    0
sales        0
dtype: int64

In [5]:
df.columns

Index(['TV', 'radio', 'newspaper', 'sales'], dtype='object')

In [6]:
X = df[['TV','radio','newspaper']]
y = df['sales']

In [7]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X, y, test_size = 0.3,
                                                random_state = 10)

In [8]:
#predicting using the KNeighbors_Regressor

from sklearn.neighbors import KNeighborsRegressor

model_KNN = KNeighborsRegressor(n_neighbors = 3, metric="euclidean")

#fit the model on the data and predict the values

model_KNN.fit(X_train, y_train)
y_pred = model_KNN.predict(X_test)

In [9]:
y_pred

array([16.76666667,  8.86666667, 18.7       , 23.8       , 20.1       ,
       14.63333333, 15.9       , 20.93333333, 19.43333333,  8.86666667,
       24.        ,  9.8       ,  8.86666667, 20.1       , 21.23333333,
       12.13333333, 19.26666667,  5.63333333, 19.33333333, 21.23333333,
       16.46666667,  5.63333333, 22.46666667, 15.96666667, 14.66666667,
        7.13333333,  9.46666667,  9.8       , 19.93333333,  6.83333333,
       13.23333333, 18.93333333,  7.16666667,  6.13333333, 16.2       ,
       13.6       ,  9.66666667, 18.        , 10.53333333, 12.        ,
       10.66666667, 12.53333333, 14.63333333,  9.53333333, 20.1       ,
        8.5       , 16.46666667, 13.56666667,  6.5       , 11.5       ,
       10.2       , 11.66666667, 23.8       , 19.2       , 14.63333333,
        8.23333333, 22.2       , 19.1       , 12.3       , 10.5       ])

In [10]:
import joblib

#Save model on disk
joblib.dump(model_KNN, "trained_knn_model.pkl")

#Load model on disk
model = joblib.load("trained_knn_model.pkl")

In [11]:
from sklearn.metrics import r2_score, mean_squared_error
r2 = r2_score(y_test,y_pred)
mse = mean_squared_error(y_test, y_pred)
print(r2)
print(mse)


0.925958613863326
2.778240740740741


In [12]:
min(y_test), max(y_test)

(1.6, 27.0)

In [13]:
acc_dict = dict()
for K in range(1, 31):
    
    # Create object with K values
    model_KNN = KNeighborsRegressor(n_neighbors=K, metric= "euclidean")
    
    #Model Fit
    model_KNN.fit(X_train, y_train)
    
    #model prediction
    y_pred = model_KNN.predict(X_test)
    r2 = r2_score(y_test,y_pred)
    
    #Print accuracy score
    print("Accuracy is: {} for K-Value: {}".format(r2,K))
    acc_dict[K] =r2

Accuracy is: 0.9383841428002567 for K-Value: 1
Accuracy is: 0.9182264031854402 for K-Value: 2
Accuracy is: 0.925958613863326 for K-Value: 3
Accuracy is: 0.932788657210048 for K-Value: 4
Accuracy is: 0.9149539491202795 for K-Value: 5
Accuracy is: 0.8929665815850817 for K-Value: 6
Accuracy is: 0.8744848671701098 for K-Value: 7
Accuracy is: 0.8654478098543175 for K-Value: 8
Accuracy is: 0.8492939672848692 for K-Value: 9
Accuracy is: 0.8402713283054284 for K-Value: 10
Accuracy is: 0.8371675782423147 for K-Value: 11
Accuracy is: 0.8373829505130865 for K-Value: 12
Accuracy is: 0.8276548833014651 for K-Value: 13
Accuracy is: 0.8283131539137829 for K-Value: 14
Accuracy is: 0.8161422263836416 for K-Value: 15
Accuracy is: 0.8004112554208694 for K-Value: 16
Accuracy is: 0.7942012190658417 for K-Value: 17
Accuracy is: 0.7802634154775825 for K-Value: 18
Accuracy is: 0.7676471516517576 for K-Value: 19
Accuracy is: 0.7573316851506093 for K-Value: 20
Accuracy is: 0.7503499480255902 for K-Value: 21
Acc