In [None]:
# Import Libraries
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report,confusion_matrix
import tensorflow as tf
import cv2
import os
import numpy as np

In [4]:
import os
import cv2
import numpy as np

# Define class labels and image size
label = ['driving_license', 'social_security', 'others']
img_size = 224

def get_data(data_dir):
    """
    Reads images from a directory structure with subfolders for each class and resizes them.

    Args:
        data_dir (str): The base directory containing subfolders for each class.

    Returns:
        np.ndarray: An array of image data and corresponding class labels.
    """
    data = []  # Initialize an empty list to store image data and labels

    for each_label in label:
        path = os.path.join(data_dir, each_label)  # Path to the subfolder for each class
        class_num = label.index(each_label)  # Get the class label

        for img in os.listdir(path):
            try:
                # Read the image in BGR format and convert it to RGB format
                img_arr = cv2.imread(os.path.join(path, img))[...,::-1]
                
                # Resize the image to the preferred size
                resized_arr = cv2.resize(img_arr, (img_size, img_size))
                
                # Append the resized image and its class label to the data list
                data.append([resized_arr, class_num])
            except Exception as e:
                print(e)
    
    return np.array(data)  # Convert the list of data to a numpy array


In [5]:
# Defined training data path
train = get_data('./Training data/')

In [None]:
import seaborn as sns

# Initialize an empty list to store class names
l = []

# Iterate through the 'train' dataset
for i in train:
    if i[1] == 0:
        l.append("driving license")
    elif i[1] == 1:
        l.append("social security")
    else:
        l.append("Others")

# Set the style for the Seaborn plot
sns.set_style('darkgrid')

# Create a countplot to visualize the class distribution
sns.countplot(l)


In [8]:
## make numpy array of xtrain and y train,normalize it,and reshape to 224*224

x_train = []
y_train = []
x_val = []  
y_val = []  

for feature, label in train:
    x_train.append(feature)
    y_train.append(label)

# Normalize the data
x_train = np.array(x_train) / 255

# Reshape the data to the desired shape (assuming grayscale images)
x_train = x_train.reshape(-1, img_size, img_size, 1)

# Convert y_train to a NumPy array
y_train = np.array(y_train)

# Similarly, for validation data (assuming you have a validation dataset 'val')
for feature, label in val:  # 'val' should be defined similarly to 'train'
    x_val.append(feature)
    y_val.append(label)

# Normalize the validation data
x_val = np.array(x_val) / 255

# Reshape the validation data to the desired shape
x_val = x_val.reshape(-1, img_size, img_size, 1)

# Convert y_val to a NumPy array
y_val = np.array(y_val)


In [9]:
# Create an ImageDataGenerator with specific parameters for data augmentation

datagen = ImageDataGenerator(
    featurewise_center=False,  # Set input mean to 0 over the dataset
    samplewise_center=False,  # Set each sample mean to 0
    featurewise_std_normalization=False,  # Divide inputs by the standard deviation of the dataset
    samplewise_std_normalization=False,  # Divide each input by its standard deviation
    zca_whitening=False,  # Apply ZCA whitening
    rotation_range=30,  # Randomly rotate images in the range (degrees, 0 to 180)
    zoom_range=0.2,  # Randomly zoom images
    width_shift_range=0.1,  # Randomly shift images horizontally (fraction of total width)
    height_shift_range=0.1,  # Randomly shift images vertically (fraction of total height)
    horizontal_flip=True,  # Randomly flip images horizontally
    vertical_flip=False  # Randomly flip images vertically (set to False in this case)
)

# Compute statistics related to data augmentation based on the provided training data
datagen.fit(x_train)


In [10]:
### Adding sequential model , add a bunch of conv, maxpool, dropout , flatten it and then eventually add a dense of 3 for all 3 classes.

# Create a Sequential model
model = Sequential()

# Add a Convolutional layer with 32 filters, 3x3 kernel, "same" padding, and ReLU activation
model.add(Conv2D(32, 3, padding="same", activation="relu", input_shape=(224, 224, 3))

# Add a MaxPooling layer to downsample
model.add(MaxPool2D())

# Add another Convolutional layer with the same configuration
model.add(Conv2D(32, 3, padding="same", activation="relu"))

# Add another MaxPooling layer
model.add(MaxPool2D())

# Add another Convolutional layer with the same configuration
model.add(Conv2D(32, 3, padding="same", activation="relu"))

# Add another MaxPooling layer
model.add(MaxPool2D())

# Add a Convolutional layer with 64 filters and ReLU activation
model.add(Conv2D(64, 3, padding="same", activation="relu"))

# Add a MaxPooling layer
model.add(MaxPool2D())

# Add a Dropout layer with a dropout rate of 0.4 (40% dropout)
model.add(Dropout(0.4))

# Flatten the output
model.add(Flatten())

# Add a Dense layer with 128 units and ReLU activation
model.add(Dense(128, activation="relu"))

# Add the final Dense layer with 3 units for the 3 output classes and a softmax activation
model.add(Dense(3, activation="softmax"))

# Display a summary of the model architecture
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 112, 112, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 112, 112, 32)      9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 56, 56, 32)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 28, 28, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 28, 28, 64)        1

In [11]:
# Define the optimizer with a learning rate of 0.000001
opt = Adam(learning_rate=0.000001)

# Compile the model with the specified optimizer, loss function, and metrics
model.compile(optimizer=opt, 
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              metrics=['accuracy'])


In [12]:
# Fit the model to the training data for 200 epochs
history = model.fit(x_train, y_train, epochs=200)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [13]:
# Perform inference on the training data (x_train) to make predictions
predictions = model.predict_classes(x_train)
predictions = predictions.reshape(1, -1)[0]

# Print a classification report to evaluate accuracy metrics
# Specify the target names for the classes
print(classification_report(y_train, predictions, target_names=['driving license (Class 0)', 'pan card (Class 1)', 'Others (Class 2)']))


W0818 21:57:47.380365 140298920219008 deprecation.py:323] From <ipython-input-13-1e233bf338d4>:1: Sequential.predict_classes (from tensorflow.python.keras.engine.sequential) is deprecated and will be removed after 2021-01-01.
Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


                           precision    recall  f1-score   support

driving license (Class 0)       0.82      0.86      0.84       200
       pan card (Class 1)       0.89      0.84      0.86       200
         Others (Class 2)       0.79      0.79      0.79       200

                micro avg       0.83      0.83      0.83       600
                macro avg       0.83      0.83      0.83       600
             weighted avg       0.83      0.83      0.83       600



In [14]:
# Save the entire model, including its architecture, weights, and optimizer state.
model.save('model.h5')

---