In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

In [2]:
import tensorflow
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [12]:
import os
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam

In [3]:
def resize_images(input_folder, output_folder, target_size=(600, 800)):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through each image in the input folder
    for filename in os.listdir(input_folder):
        img_path = os.path.join(input_folder, filename)

        # Open the image using Pillow (PIL)
        img = Image.open(img_path)

        # Resize the image to the target size
        img_resized = img.resize(target_size, Image.ANTIALIAS)

        # Save the resized image to the output folder
        output_path = os.path.join(output_folder, filename)
        img_resized.save(output_path)


In [4]:
def augment_images(input_folder, output_folder, augment_size=10):
    # Set up data augmentation
    datagen = ImageDataGenerator(
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        # horizontal_flip=True,
        fill_mode='nearest'
    )

    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through each image in the input folder
    for filename in os.listdir(input_folder):
        img_path = os.path.join(input_folder, filename)

        # Open the image using Pillow (PIL)
        img = Image.open(img_path)

        # Convert the image to a NumPy array and resize
        img_array = np.array(img.resize((600, 800)).convert("RGB"))

        # Expand dimensions to meet the input shape requirements of the data generator
        img_array = img_array.reshape((1,) + img_array.shape)

        # Apply data augmentation and save augmented images
        i = 0
        for batch in datagen.flow(img_array, batch_size=1, save_to_dir=output_folder, save_prefix='augmented', save_format='png'):
            i += 1
            if i >= augment_size:
                break  # Break after generating the specified number of augmented images

In [5]:
# Resizing non-resume images
resize_images('/content/drive/MyDrive/Bureau_assignment/Dataset/Non-resume', '/content/drive/MyDrive/Bureau_assignment/Dataset/non-res_resized')

  img_resized = img.resize(target_size, Image.ANTIALIAS)


In [16]:
# Augmenting non resume resized images
augment_images('/content/drive/MyDrive/Bureau_assignment/Dataset/non-res_resized', '/content/drive/MyDrive/Bureau_assignment/Dataset/augmented_non-res', augment_size=10)

In [17]:
# Resizing resume images
resize_images('/content/drive/MyDrive/Bureau_assignment/Dataset/Resume', '/content/drive/MyDrive/Bureau_assignment/Dataset/res_resized')

  img_resized = img.resize(target_size, Image.ANTIALIAS)


In [20]:
# Augmenting non resume resized images
augment_images('/content/drive/MyDrive/Bureau_assignment/Dataset/res_resized', '/content/drive/MyDrive/Bureau_assignment/Dataset/augmented_res', augment_size=10)

In [27]:
# Define paths to your augmented dataset
train_data_dir = '/content/drive/MyDrive/Bureau_assignment/Dataset/Augmented_dataset'
# validation_data_dir = 'path/to/augmented_dataset/validation'

# Define image size and batch size
img_size = (600, 800)
batch_size = 32

# Set up data generators for training and validation
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',  # Assuming it's a binary classification task
    shuffle=True  # Set to True if you want to shuffle the order of the images
)

# validation_datagen = ImageDataGenerator(rescale=1./255)
# validation_generator = validation_datagen.flow_from_directory(
#     validation_data_dir,
#     target_size=img_size,
#     batch_size=batch_size,
#     class_mode='binary',  # Assuming it's a binary classification task
#     shuffle=False  # Validation data usually doesn't need to be shuffled
# )

# Load ResNet50 model with pre-trained weights (excluding top layers)
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(600, 800, 3))

# Freeze the layers of the pre-trained model
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu')(x)
predictions = layers.Dense(1, activation='sigmoid')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=10
    # validation_data=validation_generator,
    # validation_steps=validation_generator.samples // batch_size
)


Found 2647 images belonging to 2 classes.




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [21]:
# Save the trained model
# model.save('resnet50_resume_classifier.h5')

  saving_api.save_model(


In [28]:
model.layers

[<keras.src.engine.input_layer.InputLayer at 0x7a7730bab070>,
 <keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D at 0x7a77501b9240>,
 <keras.src.layers.convolutional.conv2d.Conv2D at 0x7a7730ba9f60>,
 <keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x7a770520cbb0>,
 <keras.src.layers.core.activation.Activation at 0x7a7730a7f6d0>,
 <keras.src.layers.reshaping.zero_padding2d.ZeroPadding2D at 0x7a7704f88280>,
 <keras.src.layers.pooling.max_pooling2d.MaxPooling2D at 0x7a7704f88ca0>,
 <keras.src.layers.convolutional.conv2d.Conv2D at 0x7a7704f8a110>,
 <keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x7a7704f8bc10>,
 <keras.src.layers.core.activation.Activation at 0x7a7704f8bd30>,
 <keras.src.layers.convolutional.conv2d.Conv2D at 0x7a7704f88820>,
 <keras.src.layers.normalization.batch_normalization.BatchNormalization at 0x7a7704f8ba30>,
 <keras.src.layers.core.activation.Activation at 0x7a7704f8ba60>,
 <keras.src.layers.convolutional.

In [29]:
# Function to preprocess an image for prediction
def preprocess_image(img_path, target_size=(600, 800)):
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return img_array / 255.0  # Normalize the pixel values to the range [0, 1]

# Make predictions on a single image
test_image_path = '/content/drive/MyDrive/Bureau_assignment/Dataset/Test_dataset/non-resume/Screenshot 2023-12-30 234940.png'  # Replace with the path to your test image
preprocessed_image = preprocess_image(test_image_path)

# Get the prediction
prediction = model.predict(preprocessed_image)

# Display the result
if prediction > 0.5:
    print("Prediction: Resume")
else:
    print("Prediction: Non-Resume")

Prediction: Non-Resume


In [31]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
    '/content/drive/MyDrive/Bureau_assignment/Dataset/Test_dataset',
    target_size=(600, 800),
    batch_size=batch_size,
    class_mode='binary',  # Adjust based on your classification task
    shuffle=False  # Important to keep the order of predictions consistent
)

evaluation = model.evaluate(test_generator)
print(f'Test Loss: {evaluation[0]}, Test Accuracy: {evaluation[1]}')

Found 42 images belonging to 2 classes.
Test Loss: 0.6208658814430237, Test Accuracy: 0.738095223903656
