In [None]:
# System Imports
import os
import subprocess
import shutil

# Pre Processing Imports 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split 
import numpy as np 
import pandas as pd

# Deep Learning Imports 
import tensorflow as tf
from tensorflow import keras

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

from keras.applications.resnet50 import ResNet50, preprocess_input
from keras.optimizers import Adam

from keras.applications import ResNet50
from keras.layers import GlobalAveragePooling2D, Dense
from keras.models import Model

In [None]:
# Load the identity file into a DataFrame
df = pd.read_csv('/data/identity_CelebA.txt', delim_whitespace=True, header=None, names=['filename', 'label'])

# create train test splits based on the txt file (containing file names and labels)
train_df, test_df = train_test_split(df, test_size=0.2, stratify=df['label'])


In [None]:
# Move the actual images to the respective folders based on the train-test split of the labels 
def move_images(df, source_dir, target_dir):
    os.makedirs(target_dir, exist_ok=True)  # Ensure target directory exists
    for filename in df['filename']:
        shutil.move(os.path.join(source_dir, filename), os.path.join(target_dir, filename))

source_directory = 'data/img_align_celeba'
train_directory = 'data/train'
test_directory = 'data/test'

# Move train and test images
move_images(train_df, source_directory, train_directory)
move_images(test_df, source_directory, test_directory)

In [None]:
# Define image size and batch size
image_size = (224, 224)  # Size expected by ResNet
batch_size = 32

# Data generators for train and test sets
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_generator = train_datagen.flow_from_directory(
    train_directory,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='sparse'
)

test_generator = test_datagen.flow_from_directory(
    test_directory,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='sparse'
)

# Load ResNet50 model, pre-trained on ImageNet, excluding the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the layers of the base_model
for layer in base_model.layers:
    layer.trainable = False

# Build the model
model = Sequential([
    base_model,
    Flatten(),
    Dense(1024, activation='relu'),
    Dense(len(train_generator.class_indices), activation='softmax')  # Output layer for classification
])

# Compile the model
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_generator, validation_data=test_generator, epochs=10)