In [44]:
import os
import cv2
import numpy as np
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D, Dense
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import MobileNetV2
from keras.preprocessing import image
from keras.optimizers import Adam
from keras import Input

In [45]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [46]:
resume_folder = '/content/drive/MyDrive/resumes'
non_resume_folder = '/content/drive/MyDrive/not_resumes'

In [47]:
# Load and Explore the Data
def load_and_preprocess_data(data_dir, label, augmentation):
    data = []
    labels = []
    for filename in os.listdir(data_dir):
        img_path = os.path.join(data_dir, filename)

        # Check if the file exists and is a valid image
        if os.path.isfile(img_path):
            img = cv2.imread(img_path)

            # Check if the image is successfully loaded
            if img is not None:
                img = cv2.resize(img, (128, 128))
                img = img / 255.0
                data.append(img)
                labels.append(label)

    data = np.array(data)
    labels = np.array(labels)

    if augmentation:
        datagen = ImageDataGenerator(
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            zoom_range=0.2,
            shear_range=0.2,
            horizontal_flip=True,
            vertical_flip=True
        )
        datagen.fit(data)
        augmented_data = datagen.flow(data, labels, batch_size=len(data), shuffle=False).next()
        data, labels = augmented_data[0], labels

    return data, labels

In [48]:
# Load and preprocess data from the two folders
resume_data_aug, resume_labels_aug = load_and_preprocess_data(resume_folder, 1, True)
non_resume_data_aug, non_resume_labels_aug = load_and_preprocess_data(non_resume_folder, 0, True)

resume_data, resume_labels = load_and_preprocess_data(resume_folder, 1, False)
non_resume_data, non_resume_labels = load_and_preprocess_data(non_resume_folder, 0, False)

In [49]:
# Combine data and labels
X_train_aug = np.concatenate((resume_data_aug, non_resume_data_aug), axis=0)
y_train_aug = np.concatenate((resume_labels_aug, non_resume_labels_aug), axis=0)

X_train = np.concatenate((resume_data, non_resume_data), axis=0)
y_train = np.concatenate((resume_labels, non_resume_labels), axis=0)

In [50]:
# Shuffle the data
augmented_data = list(zip(X_train_aug, y_train_aug))
np.random.shuffle(augmented_data)
X_train_aug, y_train_aug = zip(*augmented_data)
X_train_aug = np.array(X_train_aug)
y_train_aug = np.array(y_train_aug)

In [51]:
# Convert labels to one-hot encoding
y_train_aug = np.array(y_train_aug)  # Ensure y_train_aug is a NumPy array
y_train_aug = y_train_aug.reshape(-1, 1)  # Reshape to a single column
y_train = np.array(y_train)  # Ensure y_train is a NumPy array
y_train = y_train.reshape(-1, 1)  # Reshape to a single column

In [52]:
# Split the data into training and testing sets
X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(
    X_train_aug, y_train_aug, test_size=0.3, random_state=42
)

In [53]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_tensor=Input(shape=(128, 128, 3)))

# Freeze the convolutional layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)



In [54]:
# Create the final model
model = Model(inputs=base_model.input, outputs=x)

# Compile the model
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Display the model summary
# model.summary()



In [55]:
# Train the model
epochs = 10
batch_size = 32

history = model.fit(X_train_split, y_train_split, batch_size=batch_size, epochs=epochs, validation_data=(X_test_split, y_test_split))

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_split, y_test_split, batch_size=batch_size)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.2158
Test Accuracy: 90.91%
