# Solving Sudoku and Comparing 

In [3]:
%%python
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report,mean_absolute_error
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import warnings 
import time
warnings.filterwarnings('ignore')

# KNN Classifier

In [4]:
%%python
st = time.time()
X,y = make_classification(n_samples=1000,n_features=3,n_redundant=1,n_classes=2,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)
classifier = KNeighborsClassifier(n_neighbors=5,algorithm='auto')
classifier.fit(X_train,y_train)

y_pred = classifier.predict(X_test)
et = time.time()

print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))
print('Execution time:',et-st)

[[110  12]
 [ 12 116]]
0.904
              precision    recall  f1-score   support

           0       0.90      0.90      0.90       122
           1       0.91      0.91      0.91       128

    accuracy                           0.90       250
   macro avg       0.90      0.90      0.90       250
weighted avg       0.90      0.90      0.90       250

Execution time: 0.00623011589050293


# Support Vector Classifier using Custom dataset

In [5]:
%%python
from sklearn.svm import SVC
st = time.time()
X,y = make_classification(n_samples=1000,n_features=2,n_classes=2,n_clusters_per_class=2,n_redundant=0)

# Splitting for training and testing
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)
svc = SVC(kernel='linear')
svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)
et = time.time()
print('Accuracy Score: \n',accuracy_score(y_test,y_pred))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred))
print('Classification Report: \n',classification_report(y_test,y_pred))
print('Execution time:',et-st)

Accuracy Score: 
 0.968
Confusion Matrix: 
 [[129   2]
 [  6 113]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.96      0.98      0.97       131
           1       0.98      0.95      0.97       119

    accuracy                           0.97       250
   macro avg       0.97      0.97      0.97       250
weighted avg       0.97      0.97      0.97       250

Execution time: 0.004292964935302734


### Hyperparameter tuning in SVC

In [6]:
%%python
from sklearn.model_selection import GridSearchCV
st = time.time()
params = {
    'C': [0.1,1,10,100,1000],
    'kernel': ['linear'],
    'gamma': [1,0.1,0.01,0.001,0.0001]
}
gridcv = GridSearchCV(svc,param_grid=params,cv=5,verbose=3,refit=True)
gridcv.fit(X_train,y_train)
et = time.time()
print('Execution time:',et-st)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.960 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.940 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.933 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.987 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.967 total time=   0.0s
[CV 1/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.960 total time=   0.0s
[CV 2/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.940 total time=   0.0s
[CV 3/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.933 total time=   0.0s
[CV 4/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.987 total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.967 total time=   0.0s
[CV 1/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.960 total time=   0.0s
[CV 2/5] END ..C=0.1, gamma=0.01, kernel=linear

In [7]:
%%python
st = time.time()
y_pred_gridcv = gridcv.predict(X_test)
et = time.time()
print('Accuracy Score: \n',accuracy_score(y_test,y_pred_gridcv))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_gridcv))
print('Classification Report: \n',classification_report(y_test,y_pred_gridcv))
print('Execution time:',et-st)

Accuracy Score: 
 0.968
Confusion Matrix: 
 [[129   2]
 [  6 113]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.96      0.98      0.97       131
           1       0.98      0.95      0.97       119

    accuracy                           0.97       250
   macro avg       0.97      0.97      0.97       250
weighted avg       0.97      0.97      0.97       250

Execution time: 0.0007827281951904297


# Support Vector Regressor

In [8]:
%%python
from sklearn.datasets import make_regression
from sklearn.svm import SVR
from sklearn.metrics import r2_score
st = time.time()
X,y = make_regression(n_samples=1000,n_features=2,n_targets=1,noise=3.0)

# Splitting for training and testing
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=42)

svr = SVR(kernel='linear')
svr.fit(X_train,y_train)

y_pred = svr.predict(X_test)
et = time.time()
print(r2_score(y_test,y_pred))
print('Execution time:',et-st)

0.9985521185704661
Execution time: 0.018668413162231445


### Hyperparameter tuning in SVR¶

In [17]:
%%python
st = time.time()
params = {
    'kernel':['linear'],
    'C':[0.1,1,10,100,1000],
    'gamma':[1,0.1,0.01,0.001,0.0001],
    'epsilon':[0.1,0.2,0.3]
}
gridcv = GridSearchCV(SVR(),param_grid=params,cv=5,scoring='neg_mean_squared_error',verbose=3,refit=True)
gridcv.fit(X_train,y_train)
et = time.time()
print('Execution time:',et-st)

Fitting 5 folds for each of 75 candidates, totalling 375 fits
[CV 1/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-0.009 total time=   0.0s
[CV 2/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-0.006 total time=   0.0s
[CV 3/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-0.008 total time=   0.0s
[CV 4/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-0.018 total time=   0.0s
[CV 5/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-0.008 total time=   0.0s
[CV 1/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-0.009 total time=   0.0s
[CV 2/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-0.006 total time=   0.0s
[CV 3/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-0.008 total time=   0.0s
[CV 4/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-0.018 total time=   0.0s
[CV 5/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-0.008 total time=   0.0s
[CV 1/5] END C=0.1, epsilon=

# Decision Tree Classifier

In [10]:
%%python
st = time.time()
dataset = load_iris()
df = pd.DataFrame(dataset.data,columns=dataset.feature_names)
X = df.iloc[:,:-1]
y = df.iloc[:,-1]
# Splitting for training and testing
X_train, X_test,y_train,y_test = train_test_split(X,y,test_size=0.33,random_state=42)
# dst = DecisionTreeClassifier(criterion='entropy').fit(X_train,y_train)
# y_pred = dst.predict(X_test)
# mae = mean_absolute_error(y_test,y_pred)
# cfmtx = confusion_matrix(y_test,y_pred)
# accscr = accuracy_score(y_test,y_pred)
# print('Mean absolute error: \n',mae)
# print('Confusion matrix: \n',cfmtx)
# print('Accuracy Score: \n',accscr)
et = time.time()
print('Execution time:',et-st)

Execution time: 0.002386331558227539


## Decision Tree Classifier using post prunning

In [15]:
%%python
st = time.time()
postpdst = DecisionTreeClassifier(criterion='entropy',max_depth=2)
postpdst.fit(X_train,y_train)
y_pred_prunned = postpdst.predict(X_test)
mae_prunned = mean_absolute_error(y_test,y_pred_prunned)
cfmtx_prunned = confusion_matrix(y_test,y_pred_prunned)
accscr_prunned = accuracy_score(y_test,y_pred_prunned)
et = time.time()
print('Mean absolute error: \n',mae_prunned)
print('Confusion matrix: \n',cfmtx_prunned)
print('Accuracy Score: \n',accscr_prunned)
print('Execution time:',et-st)

Mean absolute error: 
 0.0
Confusion matrix: 
 [[12  0]
 [ 0  8]]
Accuracy Score: 
 1.0
Execution time: 0.002525806427001953


### preprunning

In [16]:
%%python
st = time.time()
parameters = {
    'criterion':['gini','entropy','log_loss'],
    'splitter' :['best','random'],
    'max_depth':[1,2,3,4,5],
    'max_features':['auto','sqrt','log2']
}
classifier = DecisionTreeClassifier()
clf = GridSearchCV(classifier,param_grid=parameters,cv=5,scoring='accuracy')
clf.fit(X_train,y_train)

y_pred_clf = clf.predict(X_test)
mae_clf = mean_absolute_error(y_test,y_pred_clf)
accscr_clf = accuracy_score(y_test,y_pred_clf)
et = time.time()
print('Mean absolute error: \n',mae_clf)
print('Accuracy Score: \n',accscr_clf)
print(classification_report(y_test,y_pred_clf))
print('Execution time:',et-st)

Mean absolute error: 
 0.0
Accuracy Score: 
 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

Execution time: 1.2190561294555664


# Logistic Regression

In [13]:
%%python
st = time.time()
dataset = load_iris()
df = pd.DataFrame(dataset.data,columns=dataset.feature_names)
df['target'] = dataset.target
dfb = df[df['target']!=2]

# Independent and dependent features
X = dfb.iloc[:,:-1]
y = dfb.iloc[:,-1]


# Splitting for training and testing
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=42)

classification = LogisticRegression(max_iter=200)
classification.fit(X_train,y_train)
y_pred = classification.predict(X_test)

et = time.time()
print('Confusion Matrix \n',confusion_matrix(y_test,y_pred))
print('Accuracy_score',accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))
print('Execution time:',et-st)

Confusion Matrix 
 [[12  0]
 [ 0  8]]
Accuracy_score 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

Execution time: 0.008228540420532227


### Applying KFOLD Cross validation

In [14]:
%%python
st = time.time()
cv = KFold(n_splits=5)
scores = cross_val_score(classification,X_train,y_train,scoring='accuracy',cv=cv)
final_score = np.mean(scores)
et = time.time()
print('Final Score after 5 different cross validation splits:',final_score)
print('Execution time:',et-st)

Final Score after 5 different cross validation splits: 1.0
Execution time: 0.02135443687438965
