In [1]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split 
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures # for polynomial features
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.neighbors import NearestNeighbors, KNeighborsClassifier
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [2]:
kyp = pd.read_csv(r'..\Cases\Kyphosis\Kyphosis.csv')
kyp.head()

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15


In [3]:
from sklearn.preprocessing import LabelEncoder


le = LabelEncoder()
y = le.fit_transform(kyp['Kyphosis'])
X = kyp.drop('Kyphosis', axis=1)
print(le.classes_)

['absent' 'present']


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=24, stratify=y)

In [5]:
std_sca = StandardScaler()
X_scl_trn = std_sca.fit_transform(X_train)
X_scl_tst = std_sca.transform(X_test)

In [6]:
knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(X_scl_trn, y_train)

In [7]:
y_pred = knn.predict(X_scl_tst)

In [8]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss, roc_auc_score

y_pred_prob = knn.predict_proba(X_scl_tst)

print(accuracy_score(y_test, y_pred))
print(roc_auc_score(y_test,y_pred_prob[:,1]))
print(log_loss(y_test, y_pred_prob))

0.76
0.9199999999999999
0.3232560199190553


In [11]:
std_scaler = StandardScaler()

pipe_std = Pipeline([('SCL', std_scaler), ('KNN', knn)])
pipe_std.fit(X_train, y_train)
y_pred = pipe_std.predict(X_test)

y_pred_prob = pipe_std.predict_proba(X_test)

print(accuracy_score(y_test, y_pred))
print(roc_auc_score(y_test,y_pred_prob[:,1]))
print(log_loss(y_test, y_pred_prob))

0.76
0.9199999999999999
0.3232560199190553


In [12]:
mm_scaler  = MinMaxScaler()

pipe_std = Pipeline([('SCL', mm_scaler), ('KNN', knn)])
pipe_std.fit(X_train, y_train)
y_pred = pipe_std.predict(X_test)

y_pred_prob = pipe_std.predict_proba(X_test)

print(accuracy_score(y_test, y_pred))
print(roc_auc_score(y_test,y_pred_prob[:,1]))
print(log_loss(y_test, y_pred_prob))

0.76
0.89
0.35098190714145305


In [18]:
from sklearn.model_selection import StratifiedKFold

kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
knn = KNeighborsClassifier()

pipe = Pipeline([('SCL', std_scaler), ('KNN', knn)])

print(pipe.get_params())
params = {'KNN__n_neighbors': [1,2,3,4,5,6,7,8,9,10]}

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold, scoring='neg_log_loss')

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', StandardScaler()), ('KNN', KNeighborsClassifier())], 'verbose': False, 'SCL': StandardScaler(), 'KNN': KNeighborsClassifier(), 'SCL__copy': True, 'SCL__with_mean': True, 'SCL__with_std': True, 'KNN__algorithm': 'auto', 'KNN__leaf_size': 30, 'KNN__metric': 'minkowski', 'KNN__metric_params': None, 'KNN__n_jobs': None, 'KNN__n_neighbors': 5, 'KNN__p': 2, 'KNN__weights': 'uniform'}
{'KNN__n_neighbors': 10}
-0.3545562027841026


In [19]:
from sklearn.model_selection import StratifiedKFold

kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

mm_scaler = MinMaxScaler()
knn = KNeighborsClassifier()

pipe = Pipeline([('SCL', mm_scaler), ('KNN', knn)])

print(pipe.get_params())
params = {'KNN__n_neighbors': [1,2,3,4,5,6,7,8,9,10]}

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold, scoring='neg_log_loss')

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', MinMaxScaler()), ('KNN', KNeighborsClassifier())], 'verbose': False, 'SCL': MinMaxScaler(), 'KNN': KNeighborsClassifier(), 'SCL__clip': False, 'SCL__copy': True, 'SCL__feature_range': (0, 1), 'KNN__algorithm': 'auto', 'KNN__leaf_size': 30, 'KNN__metric': 'minkowski', 'KNN__metric_params': None, 'KNN__n_jobs': None, 'KNN__n_neighbors': 5, 'KNN__p': 2, 'KNN__weights': 'uniform'}
{'KNN__n_neighbors': 9}
-0.3541342613432673


In [20]:
from sklearn.model_selection import StratifiedKFold

kfold = StratifiedKFold(shuffle=True, random_state=24, n_splits=5)

std_scaler = StandardScaler()
mm_scaler = MinMaxScaler()
knn = KNeighborsClassifier()

pipe = Pipeline([('SCL', None), ('KNN', knn)])

print(pipe.get_params())
params = {'KNN__n_neighbors': [1,2,3,4,5,6,7,8,9,10],
          'SCL':[std_scaler, mm_scaler, None]}

gcv = GridSearchCV(pipe, param_grid=params, cv=kfold, scoring='neg_log_loss')

gcv.fit(X, y)

print(gcv.best_params_)
print(gcv.best_score_)

{'memory': None, 'steps': [('SCL', None), ('KNN', KNeighborsClassifier())], 'verbose': False, 'SCL': None, 'KNN': KNeighborsClassifier(), 'KNN__algorithm': 'auto', 'KNN__leaf_size': 30, 'KNN__metric': 'minkowski', 'KNN__metric_params': None, 'KNN__n_jobs': None, 'KNN__n_neighbors': 5, 'KNN__p': 2, 'KNN__weights': 'uniform'}
{'KNN__n_neighbors': 9, 'SCL': MinMaxScaler()}
-0.3541342613432673
