In [19]:
import time
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from scipy.io import loadmat
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2  # OpenCV for image resizing
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras.layers import BatchNormalization

In [20]:
# Update these paths to the correct locations of your .mat files
train_data_path = r'C:\Users\Kiran\Downloads\handwritten digits\SVHN\train_32x32.mat'
test_data_path = r'C:\Users\Kiran\Downloads\handwritten digits\SVHN\test_32x32.mat'

# Load SVHN dataset
train_data = loadmat(train_data_path)
test_data = loadmat(test_data_path)

X_train_svhn = np.array(train_data['X'])
Y_train_svhn = np.array(train_data['y'])
X_test_svhn = np.array(test_data['X'])
Y_test_svhn = np.array(test_data['y'])

# Transpose the SVHN data to match the required shape (num_samples, height, width, channels)
X_train_svhn = np.transpose(X_train_svhn, (3, 0, 1, 2))
X_test_svhn = np.transpose(X_test_svhn, (3, 0, 1, 2))

# Replace label 10 with 0 to match standard digit classification (0-9)
Y_train_svhn[Y_train_svhn == 10] = 0
Y_test_svhn[Y_test_svhn == 10] = 0

# Load MNIST dataset
(X_train_mnist, Y_train_mnist), (X_test_mnist, Y_test_mnist) = mnist.load_data()

# Resize MNIST images to 32x32
X_train_mnist_resized = np.array([cv2.resize(img, (32, 32)) for img in X_train_mnist])
X_test_mnist_resized = np.array([cv2.resize(img, (32, 32)) for img in X_test_mnist])

# Expand dimensions to convert to RGB (3 channels)
X_train_mnist_resized = np.stack((X_train_mnist_resized,) * 3, axis=-1)
X_test_mnist_resized = np.stack((X_test_mnist_resized,) * 3, axis=-1)

# Ensure MNIST labels are in the same format
Y_train_mnist = Y_train_mnist.reshape(-1, 1)
Y_test_mnist = Y_test_mnist.reshape(-1, 1)

# Combine SVHN and MNIST datasets
X_train_combined = np.vstack((X_train_svhn, X_train_mnist_resized))
Y_train_combined = np.vstack((Y_train_svhn, Y_train_mnist))

X_test_combined = np.vstack((X_test_svhn, X_test_mnist_resized))
Y_test_combined = np.vstack((Y_test_svhn, Y_test_mnist))

# Normalize the combined dataset
X_train_combined = X_train_combined.astype('float32') / 255.0
X_test_combined = X_test_combined.astype('float32') / 255.0

print(X_train_combined.shape, Y_train_combined.shape)
print(X_test_combined.shape, Y_test_combined.shape)

print(X_train_combined.ndim)
print(Y_train_combined.ndim)

aug_X = np.empty_like(X_train_combined)
aug_Y = np.empty_like(Y_train_combined)


datagen = ImageDataGenerator(
    
    channel_shift_range=50.0,  # Randomly shift color channels within the range [-50, 50]
    brightness_range=[0.5, 1.0],  # Adjust brightness to be between 50% and 100%
    horizontal_flip=True
    
)
num_samples = X_train_combined.shape[0]
data_gen = datagen.flow(X_train_combined, Y_train_combined,batch_size=32)
for i in range (num_samples // 32):
    X_batch,Y_batch = next(data_gen)
    aug_X[i*32:(i+1)*32] = X_batch
    aug_Y[i*32:(i+1)*32] = Y_batch
X_train_combined = np.vstack((X_train_combined,aug_X))
Y_train_combined = np.vstack((Y_train_combined,aug_Y))

(133257, 32, 32, 3) (133257, 1)
(36032, 32, 32, 3) (36032, 1)
4
2


In [22]:
# def contrast_adjustment(img):
#     alpha = 0.5  # Contrast control
#     img = img * 255.0  # Convert to 0-255 range
#     mean = np.mean(img, axis=(0, 1, 2), keepdims=True)
#     img = alpha * (img - mean) + mean
#     img = np.clip(img, 0, 255)
#     img /= 255.0  # Normalize back to 0-1 range
#     return img
# 
# # Apply the custom function using ImageDataGenerator
# datagen1 = ImageDataGenerator(
#     preprocessing_function=contrast_adjustment,
#     brightness_range=[0.5, 1.0],  # Adjust brightness to be between 50% and 100%
#     horizontal_flip=True
# )
# 
# data_gen1 = datagen1.flow(X_train_combined,Y_train_combined,batch_size=32)
# for i in range (num_samples // 32):
#     X_batch,Y_batch = next(data_gen1)
#     aug_X[i*32:(i+1)*32] = X_batch
#     aug_Y[i*32:(i+1)*32] = Y_batch
# X_train_combined = np.vstack((X_train_combined,aug_X))
# Y_train_combined = np.vstack((Y_train_combined,aug_Y))

MemoryError: Unable to allocate 4.58 GiB for an array with shape (399771, 32, 32, 3) and data type float32

In [None]:
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), padding='same',activation = "relu", input_shape=(32, 32, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(64, kernel_size=(3, 3),activation = "relu" ,padding='same'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(128, kernel_size=(3, 3),activation = "relu" ,padding='same'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    
    Conv2D(256, kernel_size=(3, 3), activation = "relu",padding='same'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

In [None]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(X_train_combined, Y_train_combined, epochs=2, validation_data=(X_test_combined, Y_test_combined))

In [None]:
loss, accuracy = model.evaluate(X_test_combined, Y_test_combined)
print(f"The accuracy is: {accuracy}")
print(f"The loss is: {loss}")

# Prediction on new images
path_dir = r"C:\Users\Kiran\Downloads\handwritten digits\handwritten"
for i in os.listdir(path_dir):
    img_path = os.path.join(path_dir, i)
    img = image.load_img(img_path, color_mode='rgb', target_size=(32, 32,3))
    plt.imshow(img, cmap='gray')

    X = image.img_to_array(img)
    X = np.expand_dims(X, axis=0)  # Expand dims to create a batch of size 1
    logits = model.predict(X)
    print(logits)
    predicted_class = np.argmax(logits)
    print(f"Predicted class for {i}: {predicted_class}")
    plt.title(predicted_class)
    plt.show()
    time.sleep(2)
    plt.close()