# Solving Sudoku and Comparing 

In [12]:
%%python
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.datasets import make_classification
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report,mean_absolute_error
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import warnings 
warnings.filterwarnings('ignore')

# KNN Classifier

In [13]:
%%python
X,y = make_classification(n_samples=1000,n_features=3,n_redundant=1,n_classes=2,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)
classifier = KNeighborsClassifier(n_neighbors=5,algorithm='auto')
classifier.fit(X_train,y_train)

y_pred = classifier.predict(X_test)

print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[110  12]
 [ 12 116]]
0.904
              precision    recall  f1-score   support

           0       0.90      0.90      0.90       122
           1       0.91      0.91      0.91       128

    accuracy                           0.90       250
   macro avg       0.90      0.90      0.90       250
weighted avg       0.90      0.90      0.90       250



# Support Vector Classifier using Custom dataset

In [14]:
%%python
from sklearn.svm import SVC
X,y = make_classification(n_samples=1000,n_features=2,n_classes=2,n_clusters_per_class=2,n_redundant=0)

# Splitting for training and testing
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)
svc = SVC(kernel='linear')
svc.fit(X_train,y_train)
y_pred = svc.predict(X_test)
print('Accuracy Score: \n',accuracy_score(y_test,y_pred))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred))
print('Classification Report: \n',classification_report(y_test,y_pred))

Accuracy Score: 
 0.88
Confusion Matrix: 
 [[ 98  22]
 [  8 122]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.92      0.82      0.87       120
           1       0.85      0.94      0.89       130

    accuracy                           0.88       250
   macro avg       0.89      0.88      0.88       250
weighted avg       0.88      0.88      0.88       250



### Hyperparameter tuning in SVC

In [15]:
%%python
from sklearn.model_selection import GridSearchCV
params = {
    'C': [0.1,1,10,100,1000],
    'kernel': ['linear'],
    'gamma': [1,0.1,0.01,0.001,0.0001]
}
gridcv = GridSearchCV(svc,param_grid=params,cv=5,verbose=3,refit=True)
gridcv.fit(X_train,y_train)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.867 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.920 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.873 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.880 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.887 total time=   0.0s
[CV 1/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.867 total time=   0.0s
[CV 2/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.920 total time=   0.0s
[CV 3/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.873 total time=   0.0s
[CV 4/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.880 total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.887 total time=   0.0s
[CV 1/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.867 total time=   0.0s
[CV 2/5] END ..C=0.1, gamma=0.01, kernel=linear

In [16]:
%%python
y_pred_gridcv = gridcv.predict(X_test)

print('Accuracy Score: \n',accuracy_score(y_test,y_pred_gridcv))
print('Confusion Matrix: \n',confusion_matrix(y_test,y_pred_gridcv))
print('Classification Report: \n',classification_report(y_test,y_pred_gridcv))

Accuracy Score: 
 0.884
Confusion Matrix: 
 [[ 99  21]
 [  8 122]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.93      0.82      0.87       120
           1       0.85      0.94      0.89       130

    accuracy                           0.88       250
   macro avg       0.89      0.88      0.88       250
weighted avg       0.89      0.88      0.88       250



# Support Vector Regressor

In [17]:
%%python
from sklearn.datasets import make_regression
from sklearn.svm import SVR
from sklearn.metrics import r2_score

X,y = make_regression(n_samples=1000,n_features=2,n_targets=1,noise=3.0)

# Splitting for training and testing
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=42)

svr = SVR(kernel='linear')
svr.fit(X_train,y_train)

y_pred = svr.predict(X_test)
print(r2_score(y_test,y_pred))

0.9989277050499329


### Hyperparameter tuning in SVR¶

In [18]:
%%python
params = {
    'kernel':['linear'],
    'C':[0.1,1,10,100,1000],
    'gamma':[1,0.1,0.01,0.001,0.0001],
    'epsilon':[0.1,0.2,0.3]
}
gridcv = GridSearchCV(SVR(),param_grid=params,cv=5,scoring='neg_mean_squared_error',verbose=3,refit=True)
gridcv.fit(X_train,y_train)

Fitting 5 folds for each of 75 candidates, totalling 375 fits
[CV 1/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-2230.059 total time=   0.0s
[CV 2/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-2201.772 total time=   0.0s
[CV 3/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-1951.115 total time=   0.0s
[CV 4/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-2460.005 total time=   0.0s
[CV 5/5] END C=0.1, epsilon=0.1, gamma=1, kernel=linear;, score=-1972.703 total time=   0.0s
[CV 1/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-2230.059 total time=   0.0s
[CV 2/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-2201.772 total time=   0.0s
[CV 3/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-1951.115 total time=   0.0s
[CV 4/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-2460.005 total time=   0.0s
[CV 5/5] END C=0.1, epsilon=0.1, gamma=0.1, kernel=linear;, score=-1972.703 total time=   0.0

# Decision Tree Classifier

In [38]:
%%python
dataset = load_iris()
df = pd.DataFrame(dataset.data,columns=dataset.feature_names)
X = df.iloc[:,:-1]
y = df.iloc[:,-1]
# Splitting for training and testing
X_train, X_test,y_train,y_test = train_test_split(X,y,test_size=0.33,random_state=42)
# dst = DecisionTreeClassifier(criterion='entropy').fit(X_train,y_train)
# y_pred = dst.predict(X_test)
# mae = mean_absolute_error(y_test,y_pred)
# cfmtx = confusion_matrix(y_test,y_pred)
# accscr = accuracy_score(y_test,y_pred)
# print('Mean absolute error: \n',mae)
# print('Confusion matrix: \n',cfmtx)
# print('Accuracy Score: \n',accscr)

## Decision Tree Classifier using post prunning

In [34]:
%%python
postpdst = DecisionTreeClassifier(criterion='entropy',max_depth=2)
postpdst.fit(X_train,y_train)
y_pred_prunned = postpdst.predict(X_test)
mae_prunned = mean_absolute_error(y_test,y_pred_prunned)
cfmtx_prunned = confusion_matrix(y_test,y_pred_prunned)
accscr_prunned = accuracy_score(y_test,y_pred_prunned)
print('Mean absolute error: \n',mae_prunned)
print('Confusion matrix: \n',cfmtx_prunned)
print('Accuracy Score: \n',accscr_prunned)

Mean absolute error: 
 0.0
Confusion matrix: 
 [[12  0]
 [ 0  8]]
Accuracy Score: 
 1.0


### preprunning

In [33]:
%%python
parameters = {
    'criterion':['gini','entropy','log_loss'],
    'splitter' :['best','random'],
    'max_depth':[1,2,3,4,5],
    'max_features':['auto','sqrt','log2']
}
classifier = DecisionTreeClassifier()
clf = GridSearchCV(classifier,param_grid=parameters,cv=5,scoring='accuracy')
clf.fit(X_train,y_train)

y_pred_clf = clf.predict(X_test)
mae_clf = mean_absolute_error(y_test,y_pred_clf)
accscr_clf = accuracy_score(y_test,y_pred_clf)
print('Mean absolute error: \n',mae_clf)
print('Accuracy Score: \n',accscr_clf)
print(classification_report(y_test,y_pred_clf))

Mean absolute error: 
 0.0
Accuracy Score: 
 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



# Logistic Regression

In [31]:
%%python
dataset = load_iris()
df = pd.DataFrame(dataset.data,columns=dataset.feature_names)
df['target'] = dataset.target
dfb = df[df['target']!=2]

# Independent and dependent features
X = dfb.iloc[:,:-1]
y = dfb.iloc[:,-1]


# Splitting for training and testing
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=42)

classification = LogisticRegression(max_iter=200)
classification.fit(X_train,y_train)
y_pred = classification.predict(X_test)


print('Confusion Matrix \n',confusion_matrix(y_test,y_pred))
print('Accuracy_score',accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

Confusion Matrix 
 [[12  0]
 [ 0  8]]
Accuracy_score 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



### Applying KFOLD Cross validation

In [32]:
%%python
cv = KFold(n_splits=5)
scores = cross_val_score(classification,X_train,y_train,scoring='accuracy',cv=cv)
final_score = np.mean(scores)
print('Final Score after 5 different cross validation splits:',final_score)

Final Score after 5 different cross validation splits: 1.0
