In [1]:
###-----------------
### Import Libraries
###-----------------

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold,cross_val_score
from sklearn.metrics import log_loss,accuracy_score
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings('ignore')

In [2]:
mowers=pd.read_csv("RidingMowers.csv")

mowers.head()

Unnamed: 0,Income,Lot_Size,Response
0,34,26,Not Bought
1,34,40,Not Bought
2,34,46,Not Bought
3,34,48,Not Bought
4,34,53,Not Bought


In [3]:
lbl=LabelEncoder()
mowers['Response']=lbl.fit_transform(mowers['Response'])

In [4]:
X=mowers.drop('Response',axis=1)
y=mowers['Response']


In [5]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,
                                               random_state=23,
                                               stratify=y)

In [6]:
knn=KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train,y_train)

y_pred=knn.predict(X_test)
print(accuracy_score(y_test,y_pred))

y_pred_prob=knn.predict_proba(X_test)[:,1]
print(log_loss(y_test,y_pred_prob))

0.8518518518518519
5.33980050209143


# Using Grid search cv

In [7]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
knn=KNeighborsClassifier()
params={'n_neighbors':np.arange(1,11)}

#log_loss
gcv=GridSearchCV(knn,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

print(gcv.best_params_)
print(gcv.best_score_)

{'n_neighbors': 8}
-1.1403137133213659


# Applying on Breast Cancer Problem

In [8]:
cancer=pd.read_csv('BreastCancer.csv',index_col=0)
lbl=LabelEncoder()
cancer['Class']=lbl.fit_transform(cancer['Class'])

In [9]:
X=cancer.drop('Class',axis=1)
y=cancer['Class']

In [10]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,
                                               random_state=23,
                                               stratify=y)

In [11]:
knn=KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train,y_train) 

y_pred=knn.predict(X_test)
print(accuracy_score(y_test,y_pred))

0.9523809523809523


In [12]:
y_pred_prob=knn.predict_proba(X_test)[:,1]
print(log_loss(y_test,y_pred_prob))

1.716364447100817


In [13]:
params={'n_neighbors':np.arange(1,11)}

#log_loss
gcv=GridSearchCV(knn,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

print(gcv.best_params_)
print(gcv.best_score_)

{'n_neighbors': 10}
-0.23229511352501434


# Image Segmentation Data

In [14]:
img=pd.read_csv('Image_Segmention.csv')

img.head()

Unnamed: 0,Class,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,BRICKFACE,188,133,9,0.0,0.0,0.333333,0.266667,0.5,0.077778,6.666666,8.333334,7.777778,3.888889,5.0,3.333333,-8.333333,8.444445,0.53858,-0.924817
1,BRICKFACE,105,139,9,0.0,0.0,0.277778,0.107407,0.833333,0.522222,6.111111,7.555555,7.222222,3.555556,4.333334,3.333333,-7.666666,7.555555,0.532628,-0.965946
2,BRICKFACE,34,137,9,0.0,0.0,0.5,0.166667,1.111111,0.474074,5.851852,7.777778,6.444445,3.333333,5.777778,1.777778,-7.555555,7.777778,0.573633,-0.744272
3,BRICKFACE,39,111,9,0.0,0.0,0.722222,0.374074,0.888889,0.429629,6.037037,7.0,7.666666,3.444444,2.888889,4.888889,-7.777778,7.888889,0.562919,-1.175773
4,BRICKFACE,16,128,9,0.0,0.0,0.5,0.077778,0.666667,0.311111,5.555555,6.888889,6.666666,3.111111,4.0,3.333333,-7.333334,7.111111,0.561508,-0.985811


In [15]:
lbl=LabelEncoder()
img['Class']=lbl.fit_transform(img['Class'])

In [16]:
X=img.drop('Class',axis=1)
y=img['Class']

In [17]:
kfold=StratifiedKFold(n_splits=5,shuffle=True,random_state=23)
knn=KNeighborsClassifier(n_neighbors=1)
params={'n_neighbors':np.arange(1,11)}

gcv=GridSearchCV(knn,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)

print(gcv.best_params_)
print(gcv.best_score_)

{'n_neighbors': 10}
-0.8815302233391723
