In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from imblearn.over_sampling import RandomOverSampler


In [16]:
# Load the data from the Excel file
data = pd.read_excel('Profile_View.xlsx')
image_paths = data['Image_Path'].values
labels = data['Profile_View'].values

# Perform label encoding for binary classification
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Apply oversampling to balance the classes
sampler = RandomOverSampler(random_state=42)
image_paths_resampled, labels_resampled = sampler.fit_resample(image_paths.reshape(-1, 1), labels)

# Flatten the resampled paths and labels
image_paths_resampled = np.array(image_paths_resampled).flatten()
labels_resampled = np.array(labels_resampled).flatten()

# Split the resampled dataset into train, validation, and test sets
train_paths, test_paths, train_labels, test_labels = train_test_split(image_paths_resampled, labels_resampled, test_size=0.2, random_state=42)
train_paths, val_paths, train_labels, val_labels = train_test_split(train_paths, train_labels, test_size=0.2, random_state=42)

# Set the dimensions of your input images
img_width, img_height = 150, 150

# Set the number of epochs and batch size
epochs = 10
batch_size = 8

# Data preprocessing and augmentation
train_datagen = ImageDataGenerator(rescale=1.0/255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
val_datagen = ImageDataGenerator(rescale=1.0/255)
test_datagen = ImageDataGenerator(rescale=1.0/255)

train_generator = train_datagen.flow_from_dataframe(
    pd.DataFrame({'Image_Path': train_paths, 'Profile_View': train_labels}),
    x_col='Image_Path',
    y_col='Profile_View',
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='raw')

val_generator = val_datagen.flow_from_dataframe(
    pd.DataFrame({'Image_Path': val_paths, 'Profile_View': val_labels}),
    x_col='Image_Path',
    y_col='Profile_View',
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='raw')

test_generator = test_datagen.flow_from_dataframe(
    pd.DataFrame({'Image_Path': test_paths, 'Profile_View': test_labels}),
    x_col='Image_Path',
    y_col='Profile_View',
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='raw')

# Build the CNN model
model = tf.keras.models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),  # Dropout layer to help prevent overfitting
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Define early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    epochs=epochs,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size,
    callbacks=[early_stopping])

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_generator)
print("Test accuracy:", test_acc)


Found 179 validated image filenames.
Found 45 validated image filenames.
Found 56 validated image filenames.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.98214287
