<a href="https://colab.research.google.com/github/RifatMuhtasim/Machine_Learning/blob/main/Miscellaneous_Topics/Cross_Validation_With_Digits_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Cross Validation:** <br/>
Cross-validation allows us to compare different Machine Learning methods and get a sense of how well they work in practice.

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [2]:
# Import Digits Dataset
from sklearn.datasets import load_digits
digits = load_digits()
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

## Compare the Different Model with Manual Process.

In [3]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(digits['data'], digits['target'], test_size=0.25)

In [4]:
# Logistic Regression
lr = LogisticRegression(max_iter=5000)
lr.fit(X_train, y_train)
lr.score(X_test, y_test)

0.9688888888888889

In [5]:
# SVM
svm = SVC()
svm.fit(X_train, y_train)
svm.score(X_test, y_test)

0.9911111111111112

In [6]:
# Random Forest
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
rf.score(X_test, y_test)

0.9844444444444445

### Using Function

In [7]:
def Get_score(model, X_train, X_test, y_train, y_test):
  model.fit(X_train, y_train)
  return model.score(X_test, y_test)

In [8]:
print( "Logistic Regression:", Get_score(LogisticRegression(max_iter=5000), X_train, X_test, y_train, y_test) )

print( "Support Vector Machine:", Get_score(SVC(), X_train, X_test, y_train, y_test) )

print( "Random Forest:", Get_score(RandomForestClassifier(n_estimators=100), X_train, X_test, y_train, y_test) )

Logistic Regression: 0.9688888888888889
Support Vector Machine: 0.9911111111111112
Random Forest: 0.9777777777777777


## Using the K Fold Process

### model.selection: KFold

In [9]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=4)
kf

KFold(n_splits=4, random_state=None, shuffle=False)

In [10]:
dummy_df = [0,1,2,3,4,5,6,7,8,9]

for train_index, test_index in kf.split(dummy_df):
  print(train_index, test_index)


[3 4 5 6 7 8 9] [0 1 2]
[0 1 2 6 7 8 9] [3 4 5]
[0 1 2 3 4 5 8 9] [6 7]
[0 1 2 3 4 5 6 7] [8 9]


In [11]:
from sklearn.model_selection import StratifiedKFold

folds = StratifiedKFold(n_splits=4)
folds

StratifiedKFold(n_splits=4, random_state=None, shuffle=False)

### Model Selection: cross_val_score

In [12]:
from sklearn.model_selection import cross_val_score

In [13]:
# Logistic Regression

cross_val_score(LogisticRegression(max_iter=5000), digits['data'], digits['target'])

array([0.925     , 0.87777778, 0.93871866, 0.93314763, 0.89693593])

In [14]:
# SVM

cross_val_score(SVC(), digits['data'], digits['target'])

array([0.96111111, 0.94444444, 0.98328691, 0.98885794, 0.93871866])

In [15]:
# Random Forest

cross_val_score(RandomForestClassifier(n_estimators=100), digits['data'], digits['target'])

array([0.93333333, 0.90555556, 0.94986072, 0.97214485, 0.92200557])