In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions




In [2]:
train_data_dir = 'images/Augmentation/train'  # Path to training data
test_data_dir = 'images/Augmentation/test/'  # Path to testing data

# Image dimensions (VGGNet input size)
img_width, img_height = 128, 128

# Function to load images and labels
def load_data(data_dir):
    images = []
    labels = []
    label_encoder = LabelEncoder()

    emotions = os.listdir(data_dir)
    label_encoder.fit(emotions)  # Fit LabelEncoder directly on emotions

    for emotion in emotions:
        label_name = emotion  # Extracting label name from folder name
        label = label_encoder.transform([label_name])[0]  # Transform label to encoded integer
        for img_file in os.listdir(os.path.join(data_dir, emotion)):
            img_path = os.path.join(data_dir, emotion, img_file)
            img = cv2.imread(img_path)
            img = img.astype('float32') / 255.0  # Normalize pixel values
            images.append(img)
            labels.append(label)

    return np.array(images), np.array(labels)


# Load training and testing data
X_train, y_train = load_data(train_data_dir)
X_test, y_test = load_data(test_data_dir)



In [3]:
# Use one-hot encoding for the labels
num_classes = len(np.unique(y_train))
y_train = np.eye(num_classes)[y_train]
y_test = np.eye(num_classes)[y_test]
# Convert one-hot encoded labels back to single labels
y_train_single = np.argmax(y_train, axis=1)
y_test_single = np.argmax(y_test, axis=1)

# Split the training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)



In [5]:
import os
import cv2
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import Flatten, Dense

# Define the number of splits (K)
n_splits = 5  # You can change this value as needed

# Assuming you have defined X_train, y_train, X_test, and y_test

# Convert one-hot encoded labels back to single labels
y_train_single = np.argmax(y_train, axis=1)

# Initialize K-fold cross-validation
kfold = StratifiedKFold(n_splits=n_splits, shuffle=True)

# Initialize lists to store the results
acc_per_fold = []
loss_per_fold = []

# Start K-fold cross-validation
fold_no = 1
for train_index, test_index in kfold.split(X_train, y_train_single):
    print(f'Fold {fold_no}...')

    # Split data into training and validation sets for this fold
    X_train_fold, X_val_fold = X_train[train_index], X_train[test_index]
    y_train_fold, y_val_fold = y_train[train_index], y_train[test_index]
    
    # Convert one-hot encoded labels back to single labels for this fold
    y_train_fold_single = np.argmax(y_train_fold, axis=1)
    y_val_fold_single = np.argmax(y_val_fold, axis=1)

    # Create a Sequential model
    model = Sequential()

    # Flatten the input
    model.add(Flatten(input_shape=(128, 128, 3)))

    # Add a Dense hidden layer with ReLU activation
    model.add(Dense(128, activation='relu'))

    # Add the output layer with softmax activation for classification (7 classes)
    model.add(Dense(7, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Train the model for this fold
    history = model.fit(X_train_fold, y_train_fold, epochs=10, batch_size=32, validation_data=(X_val_fold, y_val_fold), verbose=1)

    # Evaluate the model on the validation data
    scores = model.evaluate(X_val_fold, y_val_fold, verbose=0)
    print(f'Score for Fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1] * 100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

    # Predict probabilities for each class on the validation data
    probabilities = model.predict(X_val_fold)
    for i, probs in enumerate(probabilities):
        print(f"Sample {i + 1} - Probabilities for each class:")
        for class_idx, class_prob in enumerate(probs):
            print(f"Class {class_idx}: Probability - {class_prob:.4f}")

    fold_no += 1

# Print the average results across all folds
print('Average scores across all folds:')
print(f'Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'Loss: {np.mean(loss_per_fold)} (+- {np.std(loss_per_fold)})')

# Once K-fold cross-validation is done, you can retrain the model on the entire training data and evaluate on the test data
# Assuming the best model configuration is determined from cross-validation, retrain the model on the full training data
best_model = Sequential()
best_model.add(Flatten(input_shape=(128, 128, 3)))
best_model.add(Dense(128, activation='relu'))
best_model.add(Dense(7, activation='softmax'))
best_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
best_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

# Evaluate the best model on the test data
test_loss, test_accuracy = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Fold 1...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for Fold 1: loss of 0.06361096352338791; accuracy of 100.0%
Sample 1 - Probabilities for each class:
Class 0: Probability - 0.0001
Class 1: Probability - 0.0154
Class 2: Probability - 0.0001
Class 3: Probability - 0.0001
Class 4: Probability - 0.0001
Class 5: Probability - 0.0168
Class 6: Probability - 0.9674
Sample 2 - Probabilities for each class:
Class 0: Probability - 0.0146
Class 1: Probability - 0.9406
Class 2: Probability - 0.0193
Class 3: Probability - 0.0100
Class 4: Probability - 0.0022
Class 5: Probability - 0.0129
Class 6: Probability - 0.0004
Sample 3 - Probabilities for each class:
Class 0: Probability - 0.0231
Class 1: Probability - 0.0289
Class 2: Probability - 0.0027
Class 3: Probability - 0.0001
Class 4: Probability - 0.9343
Class 5: Probability - 0.0023
Class 6: Probability - 0.0086
Sample 4 - Probabilities for each class:
Class 0: Probabilit