handwritternmnist.py

# -*- coding: utf-8 -*-
"""handwritternmnist.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1z1qdwERbWAmpALegAHr-qUyNUt8tzNFa
"""

!pip install scikit-learn

from sklearn.datasets import fetch_openml  #fetch_openml()

!pip install pandas


import warnings

import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

from sklearn.datasets import fetch_openml
mnist = fetch_openml("mnist_784")

mnist

X,y=mnist['data'],mnist['target']

X.shape

y.shape

# Commented out IPython magic to ensure Python compatibility.
# %matplotlib inline

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout

from keras.datasets import mnist

mnist.load_data

(X_train, y_train), (X_test, y_test)=mnist.load_data()

X_train.shape, X_test.shape, y_train.shape, y_test.shape

def plot_input_img(i):
    plt.imshow(X_train[i], cmap='binary')
    plt.title(y_train[i])
    plt.show()

for i in range(5):
    plot_input_img(i)

"""Preprocess the dataset"""

#normalizing the image to [0, 1] range
X_train=X_train.astype(np.float32)/255
X_test=X_test.astype(np.float32)/255

X_train=np.expand_dims(X_train, -1)  #expand
X_test=np.expand_dims(X_test, -1)   #(60000, 28, 28, 1)

X_train.shape

"""OneHotVector"""

y_train =keras.utils.to_categorical(y_train)

y_train  #only whereever the value is present it will show the 1 there

y_test =keras.utils.to_categorical(y_test)

"""###To buils the model we have to import the libraries"""

model=Sequential()
model.add(Conv2D(32, (3,3), input_shape=(28, 28,1), activation='relu'))
model.add(MaxPool2D((2, 2)))

model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPool2D((2, 2)))

model.add(Flatten())
model.add(Dropout(0.25))
model.add(Dense(10, activation="softmax"))
model.summary()

model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'] )

from keras.callbacks import EarlyStopping, ModelCheckpoint
es = EarlyStopping(monitor='val_acc', min_delta=0.01, patience=4, verbose=1)
mc = ModelCheckpoint(("./bestmodel.h5"), monitor="val_acc", verbose=1, save_best_only=True)

model.fit(X_train, y_train, epochs=50, validation_split=0.3)

model_s = model.save("./bestmodel.h5")

model_s=keras.models.load_model("./bestmodel.h5")

score=model_s.evaluate(X_test, y_test)
print(f"the model accuracy is {score[1]}")

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

# Concatenate the training and testing data for cross-validation
X_all = np.concatenate((X_train, X_test), axis=0)
y_all = np.concatenate((y_train, y_test), axis=0)

# Define the number of folds for cross-validation
n_splits = 5  # You can adjust this based on your preference

# Initialize the StratifiedKFold object
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Lists to store accuracy for each fold
accuracies = []

# Iterate through the folds
for train_index, val_index in skf.split(X_all, np.argmax(y_all, axis=1)):
    # Split the data into training and validation sets
    X_train_fold, X_val_fold = X_all[train_index], X_all[val_index]
    y_train_fold, y_val_fold = y_all[train_index], y_all[val_index]

    # Create a new model for each fold
    model = Sequential()
    model.add(Conv2D(32, (3,3), input_shape=(28, 28,1), activation='relu'))
    model.add(MaxPool2D((2, 2)))
    model.add(Conv2D(64, (3,3), activation='relu'))
    model.add(MaxPool2D((2, 2)))
    model.add(Flatten())
    model.add(Dropout(0.25))
    model.add(Dense(10, activation="softmax"))

    model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])

    # Fit the model on the training data for this fold
    model.fit(X_train_fold, y_train_fold, epochs=50, validation_data=(X_val_fold, y_val_fold), callbacks=[es, mc])

    # Load the best model saved during training
    model.load_weights("./bestmodel.h5")

    # Evaluate the model on the validation set
    val_loss, val_acc = model.evaluate(X_val_fold, y_val_fold)
    accuracies.append(val_acc)

# Print the accuracies for each fold
for i, acc in enumerate(accuracies):
    print(f"Fold {i+1} Accuracy: {acc}")

# Print the average accuracy across all folds
print(f"Average Accuracy: {np.mean(accuracies)}")