In [None]:
!gdown https://drive.google.com/uc?id=1FehewnjdHb5Tu-ZeP_FcQqqAGdwgPDN8

In [None]:
!unzip doc_classification.zip

# Importing required library

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import VGG19
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import cv2
import numpy as np
from tensorflow.keras.models import load_model

In [None]:
# Define your dataset directories
train_dir = "/content/data/train"
validation_dir = "/content/data/validation"

# Define the input shape expected by VGG19
input_shape = (224, 224, 3)

# Load the VGG19 model with pre-trained weights (exclude the top dense layers)
base_model = VGG19(weights='imagenet', include_top=False,
                   input_shape=input_shape)

# Freeze the convolutional layers
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# Build a new model on top of VGG19
model = models.Sequential()
model.add(base_model)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
# Adjust the number of output classes based on your dataset
model.add(layers.Dense(3, activation='softmax'))

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
# Data augmentation for training
train_datagen = ImageDataGenerator(rescale=1./255)

# Validation data should not be augmented
validation_datagen = ImageDataGenerator(rescale=1./255)

# Set batch size
batch_size = 32

In [None]:
# Create generators for training and validation data
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='categorical'  # Use 'categorical' for multi-class classification
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='categorical'
)

In [None]:
# Create checkpoint for saving best modle
checkpoint_filepath = 'ckpt'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [None]:
# Train the model
epochs = 5  # Adjust the number of epochs based on your dataset and computational resources
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    callbacks=[model_checkpoint_callback],
    validation_steps=validation_generator.samples // batch_size
)

In [None]:
# Plot training history
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Save the trained model
model.save("vgg19_model_multiclass.keras")

# Inference

In [None]:
# Load the pre-trained model
# model = load_model("/content/vgg19_model_multiclass.keras")
class_names = ['citizenship', 'license', 'passport']


def predict(img):
    image = cv2.imread(img)

    # Resize the image
    resized_image = cv2.resize(image, (224, 224))

    # Add an extra dimension to match the expected input shape of the model
    input_image = np.expand_dims(resized_image, axis=0)

    # Assuming model is a pre-trained VGG16 model
    model = tf.keras.models.load_model("/content/ckpt")
    predictions = model.predict(input_image)[0]

    # Get the index of the predicted class
    predicted_class_index = np.argmax(predictions)

    # Get the class name based on the index
    predicted_class_name = class_names[predicted_class_index]

    print("Predicted Class:", predicted_class_name)
    print("Predicted Probabilities:", predictions)

In [None]:
import PIL
image_path = "/content/test2.jpeg"
predict(image_path)
PIL.Image.open(image_path)