-
Notifications
You must be signed in to change notification settings - Fork 0
/
handwritternmnist.py
162 lines (108 loc) · 4.49 KB
/
handwritternmnist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# -*- coding: utf-8 -*-
"""handwritternmnist.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1z1qdwERbWAmpALegAHr-qUyNUt8tzNFa
"""
!pip install scikit-learn
from sklearn.datasets import fetch_openml #fetch_openml()
!pip install pandas
import warnings
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import fetch_openml
mnist = fetch_openml("mnist_784")
mnist
X,y=mnist['data'],mnist['target']
X.shape
y.shape
# Commented out IPython magic to ensure Python compatibility.
# %matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from keras.datasets import mnist
mnist.load_data
(X_train, y_train), (X_test, y_test)=mnist.load_data()
X_train.shape, X_test.shape, y_train.shape, y_test.shape
def plot_input_img(i):
plt.imshow(X_train[i], cmap='binary')
plt.title(y_train[i])
plt.show()
for i in range(5):
plot_input_img(i)
"""Preprocess the dataset"""
#normalizing the image to [0, 1] range
X_train=X_train.astype(np.float32)/255
X_test=X_test.astype(np.float32)/255
X_train=np.expand_dims(X_train, -1) #expand
X_test=np.expand_dims(X_test, -1) #(60000, 28, 28, 1)
X_train.shape
"""OneHotVector"""
y_train =keras.utils.to_categorical(y_train)
y_train #only whereever the value is present it will show the 1 there
y_test =keras.utils.to_categorical(y_test)
"""###To buils the model we have to import the libraries"""
model=Sequential()
model.add(Conv2D(32, (3,3), input_shape=(28, 28,1), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.25))
model.add(Dense(10, activation="softmax"))
model.summary()
model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'] )
from keras.callbacks import EarlyStopping, ModelCheckpoint
es = EarlyStopping(monitor='val_acc', min_delta=0.01, patience=4, verbose=1)
mc = ModelCheckpoint(("./bestmodel.h5"), monitor="val_acc", verbose=1, save_best_only=True)
model.fit(X_train, y_train, epochs=50, validation_split=0.3)
model_s = model.save("./bestmodel.h5")
model_s=keras.models.load_model("./bestmodel.h5")
score=model_s.evaluate(X_test, y_test)
print(f"the model accuracy is {score[1]}")
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
# Concatenate the training and testing data for cross-validation
X_all = np.concatenate((X_train, X_test), axis=0)
y_all = np.concatenate((y_train, y_test), axis=0)
# Define the number of folds for cross-validation
n_splits = 5 # You can adjust this based on your preference
# Initialize the StratifiedKFold object
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
# Lists to store accuracy for each fold
accuracies = []
# Iterate through the folds
for train_index, val_index in skf.split(X_all, np.argmax(y_all, axis=1)):
# Split the data into training and validation sets
X_train_fold, X_val_fold = X_all[train_index], X_all[val_index]
y_train_fold, y_val_fold = y_all[train_index], y_all[val_index]
# Create a new model for each fold
model = Sequential()
model.add(Conv2D(32, (3,3), input_shape=(28, 28,1), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3,3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Flatten())
model.add(Dropout(0.25))
model.add(Dense(10, activation="softmax"))
model.compile(optimizer='adam', loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])
# Fit the model on the training data for this fold
model.fit(X_train_fold, y_train_fold, epochs=50, validation_data=(X_val_fold, y_val_fold), callbacks=[es, mc])
# Load the best model saved during training
model.load_weights("./bestmodel.h5")
# Evaluate the model on the validation set
val_loss, val_acc = model.evaluate(X_val_fold, y_val_fold)
accuracies.append(val_acc)
# Print the accuracies for each fold
for i, acc in enumerate(accuracies):
print(f"Fold {i+1} Accuracy: {acc}")
# Print the average accuracy across all folds
print(f"Average Accuracy: {np.mean(accuracies)}")