In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split,KFold,GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
import warnings
warnings.filterwarnings('ignore')

# Meical Cost Personal

In [2]:
insurance = pd.read_csv(r'D:\Datawork\Practical Machine Learning\Cases\Medical Cost Personal\insurance.csv')
dum_insu = pd.get_dummies(insurance,prefix='x',prefix_sep="_")
x = dum_insu.drop('charges',axis=1)
y = dum_insu['charges']

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=23)
knn_reg = KNeighborsRegressor(n_neighbors=3)
knn_reg.fit(x_train,y_train)
y_pred = knn_reg.predict(x_test)
print("R2 score with KNN : ",r2_score(y_test,y_pred))

#Using Standard Scalar
std_scalar = StandardScaler()
std_scalar.fit(x_train,y_train)
x_scl_train = std_scalar.transform(x_train)
x_scl_test = std_scalar.transform(x_test)
y_pred = knn_reg.predict(x_scl_test)
print("R2 score With Stardard Scaling KNN : ",r2_score(y_test,y_pred))

#Using MinMax Scalar
mm_scalar = MinMaxScaler()
mm_scalar.fit(x_train,y_train)
x_scl_train = mm_scalar.transform(x_train)
x_scl_test = mm_scalar.transform(x_test)
y_pred = knn_reg.predict(x_scl_test)
print("R2 score With Minmax Scaling KNN : ",r2_score(y_test,y_pred))

#Using Standard Scalar with Pipeline
pipe_std = Pipeline([('SCL',std_scalar),('KNN',knn_reg)])
pipe_std.fit(x_train,y_train)
y_pred = pipe_std.predict(x_scl_test)
print("R2 score With Stardard Scaling KNN with Pipeline : ",r2_score(y_test,y_pred))

#Using MinMax Scalar with Pipeline
pipe_mm = Pipeline([('MM',mm_scalar),('KNN',knn_reg)])
pipe_mm.fit(x_train,y_train)
y_pred = pipe_mm.predict(x_scl_test)
print("R2 score With Minmax Scaling KNN with Pipeline : ",r2_score(y_test,y_pred))

#GridSearchCV
kfold = KFold(n_splits=5, shuffle=True, random_state=23)
params = {'n_neighbors' : [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]}
gcv = GridSearchCV(knn_reg, param_grid=params,cv =kfold)
gcv.fit(x,y)
print("Best Parameters using KNN with pipeline : ", gcv.best_params_)
print("Best R2 score using KNN with pipeline : ", gcv.best_score_)

#Using Standard Scalar and GridSearchCV with Pipeline
pipe_std = Pipeline([('SCL',std_scalar),('KNN',knn_reg)])
params = {'KNN__n_neighbors' : [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]}
gcv = GridSearchCV(pipe_std, param_grid=params,cv =kfold)
gcv.fit(x,y)
print("Best Parameters using KNN and Standard Scaling with pipeline : ", gcv.best_params_)
print("Best R2 score using KNN and Standard Scaling  with pipeline : ", gcv.best_score_)

#Using MinMax Scalar and GridSearchCV with Pipeline
pipe_mm = Pipeline([('MM',mm_scalar),('KNN',knn_reg)])
params = {'KNN__n_neighbors' : [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]}
gcv = GridSearchCV(pipe_mm, param_grid=params,cv =kfold)
gcv.fit(x,y)
print("Best Parameters using KNN and Minmax Scaling with pipeline : ", gcv.best_params_)
print("Best R2 score using KNN and Minmax Scaling  with pipeline : ", gcv.best_score_)

R2 score with KNN :  0.2713707406755661
R2 score With Stardard Scaling KNN :  -0.8810810583322275
R2 score With Minmax Scaling KNN :  -0.8810810583322275
R2 score With Stardard Scaling KNN with Pipeline :  0.11141999187642992
R2 score With Minmax Scaling KNN with Pipeline :  0.31391756444821206
Best Parameters using KNN with pipeline :  {'n_neighbors': 7}
Best R2 score using KNN with pipeline :  0.2722257242532621
Best Parameters using KNN and Standard Scaling with pipeline :  {'KNN__n_neighbors': 5}
Best R2 score using KNN and Standard Scaling  with pipeline :  0.7924808766581807
Best Parameters using KNN and Minmax Scaling with pipeline :  {'KNN__n_neighbors': 5}
Best R2 score using KNN and Minmax Scaling  with pipeline :  0.7616282165883503


# Concrete Strength

In [3]:
conc_str = pd.read_csv(r'D:\Datawork\Practical Machine Learning\Cases\Concrete Strength\Concrete_Data.csv')

x = conc_str.drop('Strength',axis=1)
y = conc_str['Strength']

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=23)
knn_reg = KNeighborsRegressor(n_neighbors=3)
knn_reg.fit(x_train,y_train)
y_pred = knn_reg.predict(x_test)
print("R2 score with KNN : ",r2_score(y_test,y_pred))

#Using Standard Scalar
std_scalar = StandardScaler()
std_scalar.fit(x_train,y_train)
x_scl_train = std_scalar.transform(x_train)
x_scl_test = std_scalar.transform(x_test)
y_pred = knn_reg.predict(x_scl_test)
print("R2 score With Stardard Scaling KNN : ",r2_score(y_test,y_pred))

#Using MinMax Scalar
mm_scalar = MinMaxScaler()
mm_scalar.fit(x_train,y_train)
x_scl_train = mm_scalar.transform(x_train)
x_scl_test = mm_scalar.transform(x_test)
y_pred = knn_reg.predict(x_scl_test)
print("R2 score With Minmax Scaling KNN : ",r2_score(y_test,y_pred))

#Using Standard Scalar with Pipeline
pipe_std = Pipeline([('SCL',std_scalar),('KNN',knn_reg)])
pipe_std.fit(x_train,y_train)
y_pred = pipe_std.predict(x_scl_test)
print("R2 score With Stardard Scaling KNN with Pipeline : ",r2_score(y_test,y_pred))

#Using MinMax Scalar with Pipeline
pipe_mm = Pipeline([('MM',mm_scalar),('KNN',knn_reg)])
pipe_mm.fit(x_train,y_train)
y_pred = pipe_mm.predict(x_scl_test)
print("R2 score With Minmax Scaling KNN with Pipeline : ",r2_score(y_test,y_pred))

#GridSearchCV
kfold = KFold(n_splits=5, shuffle=True, random_state=23)
params = {'n_neighbors' : [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]}
gcv = GridSearchCV(knn_reg, param_grid=params,cv =kfold)
gcv.fit(x,y)
print("Best Parameters using KNN with pipeline : ", gcv.best_params_)
print("Best R2 score using KNN with pipeline : ", gcv.best_score_)

#Using Standard Scalar and GridSearchCV with Pipeline
pipe_std = Pipeline([('SCL',std_scalar),('KNN',knn_reg)])
params = {'KNN__n_neighbors' : [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]}
gcv = GridSearchCV(pipe_std, param_grid=params,cv =kfold)
gcv.fit(x,y)
print("Best Parameters using KNN and Standard Scaling with pipeline : ", gcv.best_params_)
print("Best R2 score using KNN and Standard Scaling  with pipeline : ", gcv.best_score_)

#Using MinMax Scalar and GridSearchCV with Pipeline
pipe_mm = Pipeline([('MM',mm_scalar),('KNN',knn_reg)])
params = {'KNN__n_neighbors' : [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]}
gcv = GridSearchCV(pipe_mm, param_grid=params,cv =kfold)
gcv.fit(x,y)
print("Best Parameters using KNN and Minmax Scaling with pipeline : ", gcv.best_params_)
print("Best R2 score using KNN and Minmax Scaling  with pipeline : ", gcv.best_score_)

R2 score with KNN :  0.6972705849865954
R2 score With Stardard Scaling KNN :  -0.2563706254545275
R2 score With Minmax Scaling KNN :  -0.2563706254545275
R2 score With Stardard Scaling KNN with Pipeline :  -1.0362368712405696
R2 score With Minmax Scaling KNN with Pipeline :  -6.2307902854996655e-06
Best Parameters using KNN with pipeline :  {'n_neighbors': 1}
Best R2 score using KNN with pipeline :  0.7149885402998049
Best Parameters using KNN and Standard Scaling with pipeline :  {'KNN__n_neighbors': 5}
Best R2 score using KNN and Standard Scaling  with pipeline :  0.7130846921126409
Best Parameters using KNN and Minmax Scaling with pipeline :  {'KNN__n_neighbors': 3}
Best R2 score using KNN and Minmax Scaling  with pipeline :  0.6912152648447859
