# Importing Necessary Libraries

In [None]:
# Libraries from PyTorch
import torch
from torch import nn
from torchvision import datasets,transforms,models
from torch.utils.data import Dataset,DataLoader

# Libraries for data
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Libraries that makes life easier
from timeit import default_timer as timer
from pathlib import Path
from tqdm.auto import tqdm
import opendatasets as od
import cv2 as cv
from collections import OrderedDict
import os
from torchinfo import summary

# Libraries from Scikit-learn
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.decomposition import TruncatedSVD

# Getting the Dataset

In [None]:
if Path(".\\FER-2013").is_file():
    print("The dataset already exists. Skipping download...")
else:
    print("Downloading the datset...")
    od.download("https://www.kaggle.com/datasets/msambare/fer2013/data")

# Preprocessing and Formatting the Dataset

##### We would be clubbing together the `disgust` and `surprise` emotion directories into a single emotion directory `shock` because of the difference in the numbers of files in those directories

##### Also we would be reducing the number of files in the `train dataset` so that the model doesn't gets baised into predicting a single emotion
##### However, there would be no reduction in the number of files in the `test dataset` only the clubbing of the directories will take place

###### Execute the next line of code if you are accessing this jupyter file for the first time

In [None]:
%run dataset_formatting.py

## Curating a dataset from the FER2013 dataset

#### `Step-1` :- we would be converting the images into sketches so that the features becomes obvious for CNN for feature extraction

In [None]:
train_arr = np.arange(0,2304,1)
train_arr = np.append(train_arr, [7])
train_arr.shape

In [None]:
test_arr = np.arange(0,2304,1)
test_arr = np.append(test_arr, [7])
test_arr.shape

In [None]:
def convert_to_sketch(state,arr):
    for emotion in range(0,6):
        emotion_dir = f"./fer2013/{state}/{emotion}"
        for count,file in enumerate(os.listdir(emotion_dir)):
            src = f"{emotion_dir}/{file}"
            img = cv.imread(src)
            gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
            inverted = 255 - gray
            blurred = cv.GaussianBlur(inverted,(21,21),0)
            drawing = cv.divide(gray,255 - blurred, scale=256)
            flat = drawing.flatten()
            if state=='train':
                flat2 = np.append(flat,[emotion])
                arr = np.vstack((arr,flat2))
            elif state=='test':
                flat3 = np.append(flat,[emotion])
                arr = np.vstack((arr,flat3))
    return arr

In [None]:
states = ['train','test']
for state in states:
    train_arr = convert_to_sketch(state,train_arr)
    test_arr = convert_to_sketch(state,test_arr)

In [None]:
train_arr.shape

In [None]:
test_arr.shape

In [None]:
new_arr_train = np.delete(train_arr,0,axis=0)
new_arr_train

In [None]:
new_arr_test = np.delete(test_arr,0,axis=0)
new_arr_test

### Visualizing the dataset using t-SNE

In [None]:
x = new_arr_train[:,:-1]
y = new_arr_train[:,-1]

In [None]:
len(x),len(y)

In [None]:
tsvd = TruncatedSVD(n_components=50).fit_transform(x)
tsne_res = TSNE(n_components=2, n_jobs = -1, random_state = 42).fit_transform(tsvd)

In [None]:
plt.figure(figsize=(7, 7))
plt.title("Visualization of t-SNE results on FER dataset ", fontsize=24, weight='bold')
sns.scatterplot(x=tsne_res[:, 0], y=tsne_res[:, 1], hue=y, palette="bright", legend="full")
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel("Component 1", fontsize=16)
plt.ylabel("Component 2", fontsize=16)
plt.legend(fontsize=16);

### Fitting the emotions into clusters

In [None]:
cluster = ['c1','c2','c3','c4','c5','c6']
emotions = ['angry','fear','happy','neutral','sad','shock']

In [None]:
def making_clusters(x,y,emotion_no):
    emotion_c1 = 0
    emotion_c2 = 0
    emotion_c3 = 0
    emotion_c4 = 0
    emotion_c5 = 0
    emotion_c6 = 0
    emotion_C1 = []
    emotion_C2 = []
    emotion_C3 = []
    emotion_C4 = []
    emotion_C5 = []
    emotion_C6 = []
    for i in range(0,28778):
        if y[i]==emotion_no:
            y1 = tsne_res[i:i+1, 1]
            x1 = tsne_res[i:i+1, 0]
            if ((y1 - (1.7320*x1)) <= 0) and ((y1 >= 0)):
                emotion_c1 = 1 + emotion_c1
                emotion_C1.append(i)
            elif ((y1 - (1.7320*x1))>=0) and ((y1 + (1.7320*x1))>=0):
                emotion_c2 = 1 + emotion_c2
                emotion_C2.append(i)
            elif ((y1 + (1.7320*x1))<=0) and ((y1 >= 0)):
                emotion_c3 = 1 + emotion_c3
                emotion_C3.append(i)
            elif ((y1 - (1.7320*x1)) >= 0) and ((y1 <= 0)):
                emotion_c4 = 1 + emotion_c4
                emotion_C4.append(i)
            elif ((y1 - (1.7320*x1)) <= 0) and ((y1 + (1.7320*x1))<=0):
                emotion_c5 = 1 + emotion_c5
                emotion_C5.append(i)
            elif  ((y1 + (1.7320*x1))>=0) and ((y1 <= 0)):
                emotion_c6 = 1 + emotion_c6
                emotion_C6.append(i)
    emotion_c = [emotion_c1,emotion_c2,emotion_c3,emotion_c4,emotion_c5,emotion_c6]
    emotion_C = [emotion_C1,emotion_C2,emotion_C3,emotion_C4,emotion_C5,emotion_C6]
    
    return emotion_c,emotion_C

In [None]:
angry_c , angry_C = making_clusters(x,y,0)
fear_c , fear_C = making_clusters(x,y,1)
happy_c , happy_C = making_clusters(x,y,2)
neutral_c , neutral_C = making_clusters(x,y,3)
sad_c , sad_C = making_clusters(x,y,4)
shock_c , shock_C = making_clusters(x,y,5)

### Visualizing the clusters obtained

In [None]:
plt.figure(figsize=(14,15))
plt.subplot(321)
plt.grid()
sns.barplot(x=cluster,y=angry_c)
plt.title(label="best cluster area for angry")
plt.subplot(322)
plt.grid()
sns.barplot(x=cluster,y=fear_c)
plt.title(label="best cluster area for fear")
plt.subplot(323)
plt.grid()
sns.barplot(x=cluster,y=happy_c)
plt.title(label="best cluster area for happy")
plt.subplot(324)
plt.grid()
sns.barplot(x=cluster,y=neutral_c)
plt.title(label="best cluster area for neutral")
plt.subplot(325)
plt.grid()
sns.barplot(x=cluster,y=sad_c)
plt.title(label="best cluster area for sad")
plt.subplot(326)
plt.grid()
sns.barplot(x=cluster,y=shock_c)
plt.title(label="best cluster area for shock")
plt.show()

### Creating an array of the most suitable clusters

In [None]:
listd = angry_C[0] + angry_C[1] + angry_C[2] + angry_C[3] + angry_C[4] + fear_C[3] + fear_C[1] + fear_C[2] + fear_C[4] + fear_C[5] + happy_C[0] + happy_C[1] + happy_C[3] + happy_C[2] + happy_C[5] + neutral_C[1] + neutral_C[2] + neutral_C[0] + neutral_C[4] + neutral_C[5] + sad_C[0] + sad_C[1] + sad_C[3] + sad_C[4] + sad_C[5] + shock_C[0] + shock_C[4] + shock_C[2] + shock_C[3] + shock_C[5]
len(listd)

In [None]:
new_arr1 = np.delete(new_arr_train,listd,axis=0)
new_arr1 , new_arr1.shape

### Visualizing the newly formatted dataset using t-SNE

In [None]:
X1 = new_arr1[:,:-1]
Y1 = new_arr1[:,-1]

In [None]:
tsvd1 = TruncatedSVD(n_components=50).fit_transform(X1)
tsne_res1 = TSNE(n_components=2, n_jobs = -1, random_state = 42).fit_transform(tsvd1)

In [None]:
plt.figure(figsize=(7, 7))
plt.title("Visualization of t-SNE results on FER dataset ", fontsize=24, weight='bold')
sns.scatterplot(x=tsne_res1[:, 0], y=tsne_res1[:, 1], hue=Y1, palette="bright", legend="full")
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel("Component 1", fontsize=16)
plt.ylabel("Component 2", fontsize=16)
plt.legend(fontsize=16);

### Creating a DataFrame of the newly formatted data

In [None]:
column = []
for i in range(1,49):
    for j in range(1,49):
        column.append(f"{i}x{j}")

column.append("label")

In [None]:
df_train = pd.DataFrame(new_arr1, columns =column) 
print("\nPandas DataFrame: ") 
df_train

In [None]:
df_test = pd.DataFrame(test_arr,columns=column)
print("\nPandas DataFrame: ")
df_test

### Saving the curated dataset

In [None]:
save_dir = './FER_curated_dataset/'
os.makedirs(save_dir,exist_ok=True)
df_train.to_csv(save_dir + 'train.csv')
df_test.to_csv(save_dir + 'test.csv')