In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from scipy.stats import skew

import warnings 
warnings.filterwarnings("ignore")

%matplotlib inline
pd.set_option("display.max_columns", None)

In [5]:
df = pd.read_csv(r"Data\\Advertising.csv", index_col = 0)

In [6]:
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [7]:
df.isnull().sum()

TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64

In [22]:
df.shape

(200, 4)

In [8]:
df.columns

Index(['TV', 'Radio', 'Newspaper', 'Sales'], dtype='object')

In [12]:
X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']

In [14]:
from sklearn.model_selection import train_test_split
# Split the data into test and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=10)

In [17]:
# predicting using the KNeighbours_Classifier
from sklearn.neighbors import KNeighborsRegressor
model_KNN = KNeighborsRegressor(n_neighbors=3, metric = "euclidean")
#fit the model on the data and predict the values
model_KNN.fit(X_train, y_train)
y_pred = model_KNN.predict(X_test)

In [24]:
import joblib # Save model on disk to emailing
joblib.dump(model_KNN,"trained_knn_model.pkl")

['trained_knn_model.pkl']

In [27]:
model = joblib.load("trained_knn_model.pkl") # load model on disk

In [28]:
from sklearn.metrics import r2_score, mean_squared_error
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print(r2)
print(mse)

0.925958613863326
2.778240740740741


In [29]:
min(y_test), max(y_test)

(1.6, 27.0)

In [30]:
acc_dict = dict()
for K in range(1, 31):
    #create object with k value
    model_KNN = KNeighborsRegressor(n_neighbors=K, metric="euclidean")
    
    #model fit
    model_KNN.fit(X_train, y_train)
    #model prediction
    y_pred = model_KNN.predict(X_test)
    r2 = r2_score(y_test,y_pred)
    # print accuracy score
    print("Accuracy is: {} for K-value: {}".format(r2, K))
    acc_dict[K] = r2

Accuracy is: 0.9383841428002567 for K-value: 1
Accuracy is: 0.9182264031854402 for K-value: 2
Accuracy is: 0.925958613863326 for K-value: 3
Accuracy is: 0.932788657210048 for K-value: 4
Accuracy is: 0.9149539491202795 for K-value: 5
Accuracy is: 0.8929665815850817 for K-value: 6
Accuracy is: 0.8744848671701098 for K-value: 7
Accuracy is: 0.8654478098543175 for K-value: 8
Accuracy is: 0.8492939672848692 for K-value: 9
Accuracy is: 0.8402713283054284 for K-value: 10
Accuracy is: 0.8371675782423147 for K-value: 11
Accuracy is: 0.8373829505130865 for K-value: 12
Accuracy is: 0.8276548833014651 for K-value: 13
Accuracy is: 0.8283131539137829 for K-value: 14
Accuracy is: 0.8161422263836416 for K-value: 15
Accuracy is: 0.8004112554208694 for K-value: 16
Accuracy is: 0.7942012190658417 for K-value: 17
Accuracy is: 0.7802634154775825 for K-value: 18
Accuracy is: 0.7676471516517576 for K-value: 19
Accuracy is: 0.7573316851506093 for K-value: 20
Accuracy is: 0.7503499480255902 for K-value: 21
Acc