In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import random
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
import os

In [2]:
# Function that traverse through given directory and for every directory returns 
# lists of image and the corresponding emotion

def load_data(data_path):

    data_dir_list = os.listdir(data_path)

    img_data_list = []
    img_label_list = []

    # Traverse through each folder
    for dataset in data_dir_list:
        if os.path.isdir(data_path + '/' +  dataset):
            img_list=os.listdir(data_path+'/'+ dataset)
            
            # Resize each image
            for img in img_list:
                input_img=cv2.imread(data_path + '/'+ dataset + '/'+ img )

                # Convert image to grayscale
                input_img_gray = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
                
                input_img_resize=cv2.resize(input_img_gray,(48,48))
                img_data_list.append(input_img_resize)
                img_label_list.append(dataset)
                
    img_data = np.array(img_data_list)
    img_labels = np.array(img_label_list)

    return img_data, img_labels

In [3]:
# Normalize the pixel values to the range [0, 1]

def normalize_data(data):

    return data.astype('float32') / 255.0

In [4]:
# Because faces should be symetrical, but they aren't perfectly symetric, one possible way to make more data is
# to add mirrored images of faces. In this way, we are doubling instances of classes. We can exclude some classes
# from being processed this way 
    
def add_mirror_images(images, labels, exclude = []):
    
    all_images_list = []
    all_labels_list = []
    n = len(images)
    
    for i in range(n):
        
        all_images_list.append(images[i])
        all_labels_list.append(labels[i])
        
        # Don't perform mirroring if label is set to excluded
        if labels[i] not in exclude:
            # Mirror the image horizontally
            mirrored_image = cv2.flip(images[i], 1)
            all_images_list.append(mirrored_image)
            all_labels_list.append(labels[i]) 
        
    all_images = np.array(all_images_list)
    all_labels = np.array(all_labels_list)
    
    return all_images, all_labels 

In [5]:
# This function generates new instances of given classes up to given number of instances

def add_mirrored_images_upto(images, labels, upto = 0, include = []):
    
    emotions = [emotion for emotion in set(labels)]
    counts = {}
    
    for emotion in emotions:
        counts[emotion] = 0
        
    for emotion in labels:
        counts[emotion] += 1
        
    all_images_list = []
    all_labels_list = []
    
    n = len(images)
    
    for i in range(n):
        
        all_images_list.append(images[i])
        all_labels_list.append(labels[i])
        
        emotion = labels[i]
        if (emotion in include) and counts[emotion] < upto:
            # Mirror the image horizontally
            mirrored_image = cv2.flip(images[i], 1)
            all_images_list.append(mirrored_image)
            all_labels_list.append(labels[i]) 
            counts[emotion] += 1
            
    all_images = np.array(all_images_list)
    all_labels = np.array(all_labels_list)
    
    return all_images, all_labels 

In [6]:
# Assuming 'image' is your 48x48 image with values between 0 and 1

def plot_image(image, label):
    plt.imshow(image, cmap='gray') 
    plt.axis('off')  
    plt.show()
    print(label)

In [7]:
# Function for making Bar plot which shows count of every emotion in alphabetical order

def plot_count(images, labels):
    
    labels_set = set(labels)
    label_counts = {}
    
    for label in labels_set:
        label_counts[label] = 0
    
    for label in labels_set:
        for i in range(len(labels)):
            if (labels[i] == label):
                label_counts[label] += 1
    
    # Sort dictionary in order to make same bar plot every time
    sorted_label_keys = sorted(label_counts.keys())
    sorted_label_counts = {key: label_counts[key] for key in sorted_label_keys}
    
    # Create a bar plot
    plt.figure(figsize=(10, 6)) 
    plt.bar(sorted_label_counts.keys(), sorted_label_counts.values(), color='skyblue')
    plt.xlabel('Emotion')
    plt.ylabel('Count')
    plt.title('Emotion Count')
    plt.xticks(rotation=45) 
    
    # Display the counts above each bar
    for i, count in enumerate(sorted_label_counts.values()):
        plt.text(i, count, str(count), ha='center', va='bottom')

    plt.tight_layout()
    plt.show()

In [8]:
# Function which will delete random instances from images if they exceed the number upto

def _decrease_bigger_indexes(target_images_indexes, random_index):
    for i in range(len(target_images_indexes)):
        if target_images_indexes[i] > random_index:
            target_images_indexes[i] -= 1
            
    return target_images_indexes    
    
def random_delete_upto(images, labels, target_label, upto, seed = None):
    
    # Set seed
    if seed is not None:
        random.seed(seed)
        np.random.seed(seed)
        
    target_images_indexes = []
    data_images = images
    data_labels = labels
    
    # Finding all target images
    for i in range(len(images)):
        if target_label == labels[i]:
            target_images_indexes.append(i)
            
         
    # Randomly deleting images and corresponsive labels
    n = len(target_images_indexes)
    while upto < n:
        random_index = random.choice(target_images_indexes)
        target_images_indexes.remove(random_index)
        data_images = np.delete(data_images, random_index, axis=0)
        data_labels = np.delete(data_labels, random_index, axis=0)
        target_images_indexes = _decrease_bigger_indexes(target_images_indexes, random_index)
        n -= 1

    return data_images, data_labels

In [9]:
# Sorting that puts my target label at the end

def reverse_sort_key(item, target_label):
    image, label = item
    # If the label matches the target_label, return a large value to place it at the end
    if label == target_label:
        return 'zzzzzzzzzzzzzzzzz'
    # Otherwise, return the original label for normal sorting
    return label

def random_delete_upto_2(images, labels, target_label, upto, seed=None):
    
    target_label_count = 0
    for label in labels:
        if target_label == label:
            target_label_count += 1
            
    if target_label_count < upto:
        return
    
    # Set seed
    if seed is not None:
        random.seed(seed)
        np.random.seed(seed)

    # Combine images and labels into a list of tuples
    data = list(zip(images, labels))
    
    # Shuffle the combined data
    random.shuffle(data)
    
    # Sort data
    sorted_data = sorted(data, key=lambda item: reverse_sort_key(item, target_label))    
    
    rest_labels_count = len(labels) - target_label_count
    
    # Delete from the end the excess of instances of target_class
    data = sorted_data[:rest_labels_count + upto]
    
    # unzip data    
    images, labels = zip(*data)
    
    return images, labels

In [19]:
# This function makes n_ensembles where each have n_counts instances. Reference is class from which will always 
# be taken the same n_counts of instances

def make_ensembles(emotions, labels, n_ensembles, n_counts, reference = ""):
    
    n_classes = len(np.unique(labels))
    data = zip(emotions, labels)
    n_instances_of_class = n_counts * n_ensembles
    

    # removing n_counts of reference class, they will always be the same, and placing them in ref_data
    sorted_data = sorted(data, key=lambda item: reverse_sort_key(item, reference)) 
    ref_data = sorted_data[-n_counts:]
    sorted_data = sorted_data[:-n_counts]
    ensembles = []
    
    ref_emotions, ref_labels = zip(*ref_data)
    sorted_emotions, sorted_labels = zip(*sorted_data)

    ref_emotions = list(ref_emotions)
    ref_labels = list(ref_labels)
    sorted_emotions = list(sorted_emotions)
    sorted_labels = list(sorted_labels)

    for i in range(n_ensembles):
                
        balanced_subset_data = []
        balanced_subset_labels = []
        
        for emotion in ref_emotions:
            balanced_subset_data.append(emotion)
            
        for label in ref_labels:
            balanced_subset_labels.append(label)
            
        ensembles_data = []
        
        # Select n_instances from each majority class
        for j in range(n_classes-1):  # -1 because we removed reference class
            extracted_emotions = sorted_emotions[(j*n_instances_of_class)+i*n_counts:(j*n_instances_of_class)+(i+1)*n_counts]
            extracted_labels = sorted_labels[(j*n_instances_of_class)+i*n_counts:(j*n_instances_of_class)+(i+1)*n_counts]
            
            for emotion in extracted_emotions:
                balanced_subset_data.append(emotion)
    
            for label in extracted_labels:
                balanced_subset_labels.append(label)
        
            
        ensembles_data.append(np.array(balanced_subset_data))
        ensembles_data.append(np.array(balanced_subset_labels))

        ensembles.append(ensembles_data)
    
    return ensembles

In [18]:
def standardize(train_data):
#     # Initialize the StandardScaler
#     scaler = StandardScaler()

#     # Fit and transform LBP features
#     scaler.fit(train_data)
#     standardized_train_data = scaler.transform(train_data)
    
#     return standardized_train_data

    train_array = np.array(train_data)

    # Calculate the mean and standard deviation for each sublist (axis=0)
    for i in range(len(train_array)):
        if len(train_array[i]) != 555:
            print(i)
    means = np.mean(train_array, axis=0)
    std_devs = np.std(train_array, axis=0)

    # Perform standardization for each sublist
    standardized_set = [(sublist - mean) / std_dev for sublist, mean, std_dev in zip(train_data, means, std_devs)]

In [12]:
def generate_report(y_test, y_pred):

    conf_matrix = confusion_matrix(y_test, y_pred)

    f1 = f1_score(y_test, y_pred, average='weighted')

    accuracy = accuracy_score(y_test, y_pred)
    
    class_report = classification_report(y_test, y_pred)
    
    print("Confusion matrix: \n", conf_matrix,
         "\nF1 score: \n", f1,
         "\nAccuracy: \n", accuracy,
         "\nClassification report: \n", class_report)

In [13]:
# Libraries for showing lbp_example

from skimage.transform import rotate
from skimage.feature import local_binary_pattern
from skimage import data
from skimage.color import label2rgb

In [21]:
# def compute_lbp(image, radius=1):

#     points = 8 * radius

#     # Compute LBP features
#     lbp_image = feature.local_binary_pattern(image, P=points, R=radius, method="nri_uniform")
#     n_bins = int(lbp_image.max() + 1)
#     lbp_histogram, _ = np.histogram(lbp_image.ravel(), bins=n_bins, range=(0, n_bins))

#     # Normalize LBP histogram
#     lbp_histogram = lbp_histogram.astype("float")
#     lbp_histogram /= (lbp_histogram.sum() + 1e-4)
    
#     return lbp_histogram

def compute_lbp(image, radius=1, desired_length=None):

    points = 8 * radius

    # Compute LBP features
    lbp_image = feature.local_binary_pattern(image, P=points, R=radius, method="nri_uniform")
    n_bins = int(lbp_image.max() + 1)
    lbp_histogram, _ = np.histogram(lbp_image.ravel(), bins=n_bins, range=(0, n_bins))

    # Normalize LBP histogram
    lbp_histogram = lbp_histogram.astype("float")
    lbp_histogram /= (lbp_histogram.sum() + 1e-4)

    # Ensure the histogram has the desired length
    if desired_length != None:
        if len(lbp_histogram) < desired_length:
            # Pad the histogram with zeros
            lbp_histogram = np.pad(lbp_histogram, (0, desired_length - len(lbp_histogram)))

        elif len(lbp_histogram) > desired_length:
            # Truncate the histogram
            lbp_histogram = lbp_histogram[:desired_length]
    
    return lbp_histogram

In [15]:
def plot_lbp_hist(lbp_histogram):
    
    # Create a range for the x-axis (bin numbers)
    bins = np.arange(len(lbp_histogram))

    # Create a bar plot of the LBP histogram
    plt.bar(bins, lbp_histogram, width=1, align='center')

    plt.xlabel('LBP Value')
    plt.ylabel('Frequency')
    plt.title('LBP Histogram')

    plt.show()

In [16]:
def _overlay_labels(image, lbp, labels):
    mask = np.logical_or.reduce([lbp == each for each in labels])
    return label2rgb(mask, image=image, bg_label=0, alpha=0.5)


def _highlight_bars(bars, indexes):
    for i in indexes:
        bars[i].set_facecolor('r')



def _hist(ax, lbp):
    n_bins = int(lbp.max() + 1)
    return ax.hist(lbp.ravel(), density=True, bins=n_bins, range=(0, n_bins),
                   facecolor='0.5')


# plot histograms of LBP of textures
def plot_lbp_example(image, radius=1):
    
    points = 8 * radius
    
    lbp = local_binary_pattern(image, points, radius, method="uniform")

    fig, (ax_img, ax_hist) = plt.subplots(nrows=2, ncols=3, figsize=(9, 6))
    plt.gray()

    titles = ('edge', 'flat', 'corner')
    w = width = radius - 1
    edge_labels = range(points // 2 - w, points // 2 + w + 1)
    flat_labels = list(range(0, w + 1)) + list(range(points - w, points + 2))
    i_14 = points // 4            # 1/4th of the histogram
    i_34 = 3 * (points // 4)      # 3/4th of the histogram
    corner_labels = (list(range(i_14 - w, i_14 + w + 1)) +
                     list(range(i_34 - w, i_34 + w + 1)))

    label_sets = (edge_labels, flat_labels, corner_labels)

    for ax, labels in zip(ax_img, label_sets):
        ax.imshow(_overlay_labels(image, lbp, labels))

    for ax, labels, name in zip(ax_hist, label_sets, titles):
        counts, _, bars = _hist(ax, lbp)
        _highlight_bars(bars, labels)
        ax.set_ylim(top=np.max(counts[:-1]))
        ax.set_xlim(right=points + 2)
        ax.set_title(name)

    ax_hist[0].set_ylabel('Percentage')
    for ax in ax_img:
        ax.axis('off')

In [17]:
def plot_history(history, epochs):
    plt.figure(figsize=(10, 4))

    plt.subplot(1, 2, 1)
    plt.title('Loss')
    plt.plot(np.arange(0, epochs), history.history['loss'], label='train')
    plt.plot(np.arange(0, epochs), history.history['val_loss'], label='validation')
    plt.legend(loc='best')


    plt.subplot(1, 2, 2)
    plt.title('Accuracy')
    plt.plot(np.arange(0, epochs), history.history['accuracy'], label='train')
    plt.plot(np.arange(0, epochs), history.history['val_accuracy'], label='validation')
    plt.legend(loc='best')

    plt.show()
