In [None]:
import os
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import shutil
from IPython.display import clear_output
import tensorflow as tf
import sklearn
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

tf.random.set_seed(42)
sklearn.utils.check_random_state(42)

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)

In [None]:
train_data = pd.read_csv('documents'+os.sep+'emovo.csv')

train, test = train_test_split(train_data, test_size=0.15, shuffle=True)


In [None]:
# Define the path to the directory containing the images
data_dir = 'images' +os.sep

# Define the target image size
height = 400
width = 400
channels = 3
target_size = (height, width)
target_shape = (height, width, channels)

# Define the class mode
class_mode = 'categorical' #'categorical' for multi-class classification

# Define the number of folds
k = 5

# Define the number of epochs for each fold
epochs = 10

# Define the batch size
batch_size = 32

# Define the data generator for data augmentation
datagen = ImageDataGenerator(rescale=1./255,
                             zoom_range=0.2,
                             horizontal_flip=True,
                             fill_mode='nearest')

# Get the list of all image files in the directory
image_files = train_data['file_name'].values

# Define the list of all image labels
labels = train_data['class'].unique()

# Convert the labels to a numpy array
labels = np.array(labels)

# Define the number of classes
num_classes = len(labels)
print('Number of classes:', num_classes)

In [None]:
# Define and compile the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (5, 5), activation='relu', input_shape=target_shape),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(256, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# Define the k-fold cross-validator
kf = KFold(n_splits=k, shuffle=True)

# Define a dictionary to store the history of the model for each fold
fold2history = {}

# Loop over the folds
for fold, (train_index, val_index) in enumerate(kf.split(train)):

        # Define the training and validation data for this fold
        train_df = train.iloc[train_index]
        val_df = train.iloc[val_index]

        # Define the data generator for training data
        train_datagen = datagen.flow_from_dataframe(
                train_df,
                directory=None,
                x_col='file_name',
                y_col='class',
                target_size=target_size,
                batch_size=batch_size,
                class_mode=class_mode)

        # Define the data generator for validation data
        val_datagen = datagen.flow_from_dataframe(
                val_df,
                directory=None,
                x_col='file_name',
                y_col='class',
                target_size=target_size,
                batch_size=batch_size,
                class_mode=class_mode)
        
        # Define the data generator for test data
        test_datagen = datagen.flow_from_dataframe(
                test,
                directory=None,
                x_col='file_name',
                y_col='class',
                target_size=target_size,
                batch_size=batch_size,
                class_mode=class_mode)

        clear_output(wait=True)
        print(f'Fold {fold+1}/{k}')
        # Train the model on the training data
        history = model.fit(train_datagen, epochs=epochs, validation_data=val_datagen, verbose=1)
        history = history.history
        
        # Evaluate the model on the validation data
        loss, accuracy = model.evaluate(test_datagen)
        print(f'Test loss: {loss:.4f}, Test accuracy: {accuracy:.4f}')
        
        # Save the history of the model for each fold
        fold2history[fold] = (history, loss, accuracy)
    
# save the model
model.save('models' +os.sep+ 'model.h5')

In [None]:
history = []
test_loss = []
test_accuracy = []

for fold in fold2history.keys():
    history.extend(fold2history[fold][0]['val_accuracy'])
    test_loss.append(fold2history[fold][1])
    test_accuracy.append(fold2history[fold][2])

In [None]:
plt.figure(figsize=(9, 5))
plt.title('Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

# set axis limits to show the plot clearly
plt.axis([1, len(history)+1, 0, 1])

# draw dot lines for each fold
for i in range(1, k):
    plt.plot([i*epochs, i*epochs], [0, 1], 'k:', linewidth=1)
    
plt.plot(np.arange(1, len(history)+1), history, label='val_accuracy')

In [None]:
# test accuracy plot

plt.figure(figsize=(9, 5))
plt.title('Test accuracy for each fold')
plt.xlabel('Folds')
plt.ylabel('Accuracy')

# set axis limits to show the plot clearly
plt.axis([0, len(test_accuracy)+1, 0.2, 0.8])

for i, v in enumerate(test_accuracy):
    plt.text(i+1, v+0.025, "%.3f" %v, ha="center")
 
plt.xticks(np.arange(1, len(test_accuracy)+1, 1))   
plt.plot(np.arange(1, len(test_accuracy)+1), test_accuracy, label='test_accuracy')

In [None]:
# test accuracy plot

plt.figure(figsize=(9, 5))
plt.title('Test loss for each fold')
plt.xlabel('Folds')
plt.ylabel('Loss')

# set axis limits to show the plot clearly
plt.axis([0, len(test_loss)+1, 0.2, 0.8])

for i, v in enumerate(test_loss):
    plt.text(i+1, v+0.025, "%.3f" %v, ha="center")
 
plt.xticks(np.arange(1, len(test_loss)+1, 1))   
plt.plot(np.arange(1, len(test_loss)+1), test_loss, label='test_loss')