In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV,StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
import warnings
warnings.filterwarnings(action='ignore')

In [8]:
#Loading the HR Data
hr = pd.read_csv("/content/drive/MyDrive/KP_Module/Practical Machine Learning/Practise/Cases/human-resources-analytics/HR_comma_sep.csv")
#since data is categorical we have to convert it into numeric values
#so using get_dummies we can convert feilds into numbers
dum_hr = pd.get_dummies(hr,drop_first=True)
X = dum_hr.drop('left',axis=1)
y = dum_hr['left']

#Performing K-fold Cross-Validation
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=23)

#Initiating the All models
log_reg = LogisticRegression()
knn = KNeighborsClassifier()
gau = GaussianNB()

#for Scaling the data we can use both the scaling methods
scalar = StandardScaler()
mn = MinMaxScaler()


################# KNN-Classifier ##############

#since our KNN model is sequential we can use pipeline
pipe = Pipeline([('SCL',scalar),("KNN",knn)])
#setting the required parameters for KNN Classifier
params = {'KNN__n_neighbors':np.arange(1,10),
          'SCL':[scalar,mn]}

#to find Best Results we can use GridSearchCV
gcv = GridSearchCV(pipe,param_grid=params,scoring='neg_log_loss',cv=kfold)

#fiting the model
gcv.fit(X,y)

#Retriving the Best Result
print(gcv.best_params_)
print(gcv.best_score_)


############## Logistic Regression ############

#setting the required parameters for Logistic Regression
params = {'penalty':['l1', 'l2', 'elasticnet'],
        'solver':['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
        'l1_ratio':np.linspace(0,1,10), 'multi_class':['auto', 'ovr', 'multinomial']}

gcv = GridSearchCV(log_reg,param_grid=params,cv=kfold,scoring='neg_log_loss')
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)


############## Gaussian Naive Bayes Classifier ##############
#setting the required parameters for Gaussian Classifier
params = {'var_smoothing':np.arange(0,20)}

#to find Best Results we can use GridSearchCV
gcv = GridSearchCV(gau,param_grid=params,cv=kfold,scoring='neg_log_loss')
#Fitting the Model
gcv.fit(X,y)
#Retriving the Best Results

print(gcv.best_params_)
print(gcv.best_score_)

{'KNN__n_neighbors': 9, 'SCL': StandardScaler()}
-0.4961435410376313
{'l1_ratio': 0.0, 'multi_class': 'multinomial', 'penalty': 'l2', 'solver': 'newton-cg'}
-0.4293021560377547
{'var_smoothing': 1}
-0.5390792065469133


In [10]:
from sklearn.preprocessing import LabelEncoder

#Loading the HR Data
data = pd.read_csv("/content/drive/MyDrive/KP_Module/Practical Machine Learning/Practise/Cases/Image Segmentation/Image_Segmention.csv")
#since data contains Multi classes we have to use LabelEncoder
#initiating the labelencoder
lbl = LabelEncoder()

X = data.drop('Class',axis=1)
y = lbl.fit_transform(data['Class'])

#Performing K-fold Cross-Validation
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=23)

#Initiating the All models
log_reg = LogisticRegression()
knn = KNeighborsClassifier()
gau = GaussianNB()

#for Scaling the data we can use both the scaling methods
scalar = StandardScaler()
mn = MinMaxScaler()


################# KNN-Classifier ##############

#since our KNN model is sequential we can use pipeline
pipe = Pipeline([('SCL',scalar),("KNN",knn)])
#setting the required parameters for KNN Classifier
params = {'KNN__n_neighbors':np.arange(1,10),
          'SCL':[scalar,mn]}

#to find Best Results we can use GridSearchCV
gcv = GridSearchCV(pipe,param_grid=params,scoring='neg_log_loss',cv=kfold)

#fiting the model
gcv.fit(X,y)

#Retriving the Best Result
print(gcv.best_params_)
print(gcv.best_score_)


# ############## Logistic Regression ############

# #setting the required parameters for Logistic Regression
# params = {'penalty':['l1', 'l2', 'elasticnet'],
#         'solver':['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
#         'l1_ratio':np.linspace(0,1,10), 'multi_class':['auto', 'ovr', 'multinomial']}

# gcv = GridSearchCV(log_reg,param_grid=params,cv=kfold,scoring='neg_log_loss')
# gcv.fit(X,y)
# print(gcv.best_params_)
# print(gcv.best_score_)


############## Gaussian Naive Bayes Classifier ##############
#setting the required parameters for Gaussian Classifier
params = {'var_smoothing':np.arange(0,20)}

#to find Best Results we can use GridSearchCV
gcv = GridSearchCV(gau,param_grid=params,cv=kfold,scoring='neg_log_loss')
#Fitting the Model
gcv.fit(X,y)
#Retriving the Best Results

print(gcv.best_params_)
print(gcv.best_score_)

{'KNN__n_neighbors': 9, 'SCL': StandardScaler()}
-1.024611870341183
{'var_smoothing': 1}
-1.3548847857044024
