In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import shutil
from sklearn.model_selection import train_test_split, KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

# Define the path to the directory where the original dataset is stored
original_images_dir = r'C:\Users\alfbg\Documents\HEI\Semestre 8\Intelligence Articficielle\Projet\archive\img_align_celeba\img_align_celeba'

# Define the path to the CSV file containing attribute annotations
attributes_file = r'C:\Users\alfbg\Documents\HEI\Semestre 8\Intelligence Articficielle\Projet\archive\list_attr_celeba.csv'

# Load the attributes file
attributes = pd.read_csv(attributes_file)

# Specify the attribute to focus on, e.g., 'Smiling'
target_attribute = 'Smiling'

# Filter the dataset to include only the images and the target attribute
data = attributes[['image_id', target_attribute]]

# Convert the values in the 'Smiling' column to strings
data[target_attribute] = data[target_attribute].astype(str)

# Split the data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Define the dimensions of the input images
input_shape = (195, 269, 3)

# Create an instance of the ImageDataGenerator class for data augmentation
datagen = ImageDataGenerator(rescale=1./255)

# Initialize KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Lists to store scores of each fold
acc_scores = []
loss_scores = []

# K-fold Cross Validation model evaluation
for train_index, val_index in kf.split(train_data):
    training_data = train_data.iloc[train_index]
    validation_data = train_data.iloc[val_index]

    # Create generators for training and validation
    train_generator = datagen.flow_from_dataframe(
        dataframe=training_data,
        directory=original_images_dir,
        x_col="image_id",
        y_col=target_attribute,
        target_size=(input_shape[0], input_shape[1]),
        batch_size=20,
        class_mode='binary'
    )

    val_generator = datagen.flow_from_dataframe(
        dataframe=validation_data,
        directory=original_images_dir,
        x_col="image_id",
        y_col=target_attribute,
        target_size=(input_shape[0], input_shape[1]),
        batch_size=20,
        class_mode='binary'
    )

    # Reinitialize the model (to reset weights)
    model = Sequential([
        Conv2D(7, (5, 5), activation='relu', input_shape=input_shape),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(7, (5, 5), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(30, activation='relu'),
        Dropout(0.1),
        Dense(30, activation='sigmoid')
    ])

    # Compile the model
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

    # Train the model
    history = model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=5,
        validation_data=val_generator,
        validation_steps=len(val_generator)
    )

    # Evaluate the model
    loss, acc = model.evaluate(val_generator, steps=len(val_generator))
    acc_scores.append(acc)
    loss_scores.append(loss)

# Calculate average scores across all folds
avg_acc_score = np.mean(acc_scores)
avg_loss_score = np.mean(loss_scores)

print(f"Average Validation Accuracy: {avg_acc_score:.4f}")
print(f"Average Validation Loss: {avg_loss_score:.4f}")

# Evaluate on test data
test_generator = datagen.flow_from_dataframe(
    dataframe=test_data,
    directory=original_images_dir,
    x_col="image_id",
    y_col=target_attribute,
    target_size=(input_shape[0], input_shape[1]),
    batch_size=20,
    class_mode='binary'
)

test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc:.4f}, Test Loss: {test_loss:.4f}")

# Predictions and Confusion Matrix
y_pred = model.predict(test_generator)
y_pred = (y_pred > 0.5).astype('int32')
y_true = test_generator.classes
print(classification_report(y_true, y_pred))

# Generating the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\alfbg\\Documents\\HEI\\Semestre 8\\Intelligence Articficielle\\Projet\\archive\\list_attr_celeba.csv'