In [1]:
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE
import plotly.express as px
import matplotlib.pyplot as plt
import umap
import torch

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import plotly.io as pio
import tqdm

  @numba.jit()
  @numba.jit()
  @numba.jit()
  from .autonotebook import tqdm as notebook_tqdm
  @numba.jit()


In [2]:
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [3]:
# VAE loss function
def vae_loss(x, x_recon, mean, log_var):
    recon_loss = nn.MSELoss()(x_recon, x)
    kl_loss = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp())
    return recon_loss + kl_loss

In [4]:
single_data = pd.read_csv('/Users/jiachenyao/Desktop/Thesis/Preprocessed_data/single_emo_facial_no_time_6.csv')
mix_data = pd.read_csv('/Users/jiachenyao/Desktop/Thesis/Preprocessed_data/Mixed_emotions_noratio_notime_6.csv')

In [5]:
single_emo = single_data[(single_data['emotion'] == 'ang') | (single_data['emotion'] == 'sad')]
mix_emo = mix_data[(mix_data['emotion'] == 'mix_ang_sad')]


In [6]:
emo_df = pd.concat([single_emo, mix_emo], ignore_index=True)

In [7]:
# Feature selection
# 230 features
emo_df = emo_df.iloc[:, np.concatenate([range(0,1),range(9,17), range(37,45),range(65,73),range(93,101),range(121,129),range(149,157),
                                            
                                            range(177,185),range(205,213),range(233,241),range(263,271), 
                                            
                                            range(312,322),range(380,390),range(448,458),range(516,526),range(584,594), # eye brow: 17-26
                                            
                                            range(343,363), range(411,431),range(479,499),range(547,567),range(615,635)])] ## mouth: 48-67

# 100 features only mouth
#emo_df = emo_df.iloc[:, np.concatenate([range(0,1),range(343,363), range(411,431),range(479,499),range(547,567),range(615,635)])]


# only AU 18 features
#emo_df = emo_df.iloc[:, np.concatenate([range(0,1),range(675,692)])]

# AU+Gaze+Pose 31 features
#emo_df = emo_df.iloc[:, np.concatenate([range(0,9),range(289,295), range(675,692)])]

            

In [8]:
emo_df

Unnamed: 0,emotion,eye_lmk_x_0,eye_lmk_x_1,eye_lmk_x_2,eye_lmk_x_3,eye_lmk_x_4,eye_lmk_x_5,eye_lmk_x_6,eye_lmk_x_7,eye_lmk_x_28,...,Z_58,Z_59,Z_60,Z_61,Z_62,Z_63,Z_64,Z_65,Z_66,Z_67
0,ang,0.711955,0.712962,0.715553,0.718204,0.719583,0.719218,0.715869,0.713117,0.750916,...,0.698421,0.698101,0.700264,0.697521,0.698928,0.699909,0.705679,0.701441,0.700562,0.699117
1,ang,0.000010,0.000010,0.000010,0.000010,0.000010,0.000010,0.000010,0.000010,0.000008,...,0.000017,0.000016,0.000013,0.000017,0.000017,0.000018,0.000017,0.000018,0.000018,0.000017
2,ang,0.699639,0.700808,0.703862,0.707032,0.708291,0.707817,0.704303,0.701237,0.739859,...,0.690027,0.690306,0.694217,0.689751,0.690757,0.691245,0.697096,0.692603,0.692239,0.691164
3,ang,0.713404,0.714331,0.716582,0.719014,0.720291,0.719908,0.716914,0.714468,0.752673,...,0.691826,0.692216,0.695558,0.691543,0.692399,0.693639,0.699970,0.694753,0.693507,0.692784
4,ang,0.722602,0.723448,0.725739,0.728229,0.729594,0.729338,0.726337,0.723720,0.760795,...,0.708396,0.707174,0.707277,0.706706,0.708591,0.709843,0.715057,0.711879,0.710905,0.709063
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1201,mix_ang_sad,0.000045,0.000045,0.000045,0.000044,0.000044,0.000044,0.000045,0.000045,0.000034,...,0.000036,0.000036,0.000034,0.000034,0.000034,0.000035,0.000037,0.000036,0.000035,0.000035
1202,mix_ang_sad,0.663620,0.662691,0.664020,0.666289,0.668440,0.669645,0.668052,0.665878,0.690091,...,0.733189,0.735556,0.739293,0.732287,0.732263,0.731851,0.735169,0.733579,0.733643,0.734415
1203,mix_ang_sad,0.702688,0.701286,0.701480,0.703075,0.705230,0.706986,0.706584,0.704896,0.720419,...,0.746470,0.747804,0.753432,0.743684,0.744730,0.747400,0.759892,0.750268,0.746895,0.746718
1204,mix_ang_sad,0.751415,0.751407,0.753345,0.755834,0.757669,0.758315,0.756156,0.753420,0.766103,...,0.777746,0.781770,0.786061,0.775601,0.774586,0.775403,0.785129,0.778844,0.777626,0.779308


In [9]:
labels = emo_df.iloc[:, 0].values
data = emo_df.iloc[:, 1:].values



# t-SNE

In [10]:
# 2D t-SNE
tsne_2d = TSNE(n_components=2, perplexity=30, random_state=0)
tsne_2d_results = tsne_2d.fit_transform(data)

In [11]:
colors_tsne_2d = {"sad":"#913175",'ang': "#0E8388", "mix_ang_sad":"#BEF0CB"}

fig_2d = px.scatter(x=tsne_2d_results[:, 0], y=tsne_2d_results[:, 1], color=labels, color_discrete_map=colors_tsne_2d)

fig_2d.update_layout(scene=dict(xaxis=dict(title='t-SNE Dim 1'),
                             yaxis=dict(title='t-SNE Dim 2')),
    title='t-SNE Projection for Fear, Sad and Their Mix',
    title_x=0.5, 
    title_y=0.95,
    width=600,
    height=500
)
fig_2d.show()

#fig_2d.write_image("/Users/jiachenyao/Desktop/Thesis/Figures/230/tsne_ang_sad_2d.png")


In [12]:
# 3D t-SNE
tsne_3d = TSNE(n_components=3, random_state=0, perplexity=30, n_iter=1000)
tsne_3d_results = tsne_3d.fit_transform(data)

In [13]:
color_tsne_3d_mapping = {'ang': '#3A98B9', 'fea': '#E8D5C4', 'mix_ang_fea': '#9DC08B'}

fig_3d = px.scatter_3d(x=tsne_3d_results[:, 0], y=tsne_3d_results[:, 1], z=tsne_3d_results[:, 2], color=labels,  color_discrete_map=color_tsne_3d_mapping)

# Update the layout to adjust the axis labels and title
fig_3d.update_layout(scene=dict(xaxis=dict(title='t-SNE Dim 1'),
                             yaxis=dict(title='t-SNE Dim 2'),
                             zaxis=dict(title='t-SNE Dim 3')),
                  title='t-SNE Projection for Disgust, Happiness and Their Mix',
                  title_x=0.5, 
                  title_y=0.95,
                  width=600, height=500)

fig_3d.update_layout(showlegend=True)
fig_3d.show()



# UMAP

In [14]:
# 2D
color_umap_2d_mapping = {'ang': '#898121', 'sad': '#E7B10A', 'mix_ang_sad': '#609966'}

umap_2d = umap.UMAP(n_components=2, random_state=0, n_neighbors=10, min_dist=0.1)
umap_2d_results = umap_2d.fit_transform(data)

fig_umap_2d = px.scatter(x=umap_2d_results[:, 0], y=umap_2d_results[:, 1], color=labels, color_discrete_map=color_umap_2d_mapping)

fig_umap_2d.update_layout(scene=dict(xaxis=dict(title='UMAP Dim 1'),
                             yaxis=dict(title='UMAP Dim 2')),
    title='UMAP Projection for Fear, Sad and Their Mix',
    title_x=0.5, 
    title_y=0.95,
    width=600,
    height=500
)
fig_umap_2d.show()




OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


: 

: 

In [None]:
# 3D
color_tsne_3d_mapping = {'fea': '#898121', 'sad': '#E7B10A', 'mix_fea_sad': '#609966'}

umap_3d = umap.UMAP(n_components=3, n_neighbors=30, min_dist=0.1, metric='euclidean')
umap_3d_results = umap_3d.fit_transform(data)

umap_3d_fig = px.scatter_3d(x=umap_3d_results[:, 0], y=umap_3d_results[:, 1], z=umap_3d_results[:, 2], color=labels,  color_discrete_map=color_tsne_3d_mapping)
umap_3d_fig.update_layout(scene=dict(xaxis=dict(title='UMAP Dim 1'),
                             yaxis=dict(title='UMAP Dim 2'),
                             zaxis=dict(title='UMAP Dim 3')),
    title='UMAP Projection for Fear, Sad and Their Mix',
    title_x=0.5, 
    title_y=0.95,
    width=600,
    height=500
)
umap_3d_fig.show()


# VAE

In [None]:
class VAE(nn.Module):
    def __init__(self, latent_dim):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(230,128),
            nn.ReLU(True),
            nn.Linear(128,64),
            nn.ReLU(True),
            nn.Linear(64,latent_dim*2)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim,64),
            nn.ReLU(True),
            nn.Linear(64,128),
            nn.ReLU(True),
            nn.Linear(128,230)
        )
    
    def encode(self, x):
        mean_logvar = self.encoder(x)
        mean, log_var = torch.chunk(mean_logvar, 2, dim=1)
        return mean, log_var
    
    def reparaterize(self, mean, log_var):
        std = torch.exp(0.5*log_var)
        eps = torch.randn_like(std)
        return mean + eps*std
    
    def decode(self, z):
        return self.decoder(z)
    
    def forward(self, x):
        mean, log_var = self.encode(x)
        z = self.reparaterize(mean, log_var)
        return self.decode(z), mean, log_var

In [None]:
label_encoder = LabelEncoder()
numerical_labels = label_encoder.fit_transform(labels)
label_mapping = dict(zip(numerical_labels,labels))

In [None]:
# Load the dataset
#data_tensor = torch.tensor(data.values, dtype=torch.float32)
#dataset = CustomDataset(data_tensor, torch.tensor(numerical_labels, dtype=torch.long))
#dataset = CustomDataset(torch.tensor(data, dtype=torch.float32), torch.tensor(numerical_labels, dtype=torch.long))
dataset = CustomDataset(data, numerical_labels)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
for i, (x, y) in enumerate(dataloader):
    print(x)
    print(y)
    break

tensor([[5.7304e-05, 5.6381e-05, 5.5234e-05,  ..., 1.9956e-05, 1.7135e-05,
         1.4632e-05],
        [7.2769e-01, 7.2702e-01, 7.2880e-01,  ..., 6.7111e-01, 6.7272e-01,
         6.7400e-01],
        [7.2630e-01, 7.2734e-01, 7.2902e-01,  ..., 9.0699e-01, 9.0883e-01,
         9.0797e-01],
        ...,
        [6.8173e-01, 6.7982e-01, 6.7977e-01,  ..., 7.5176e-01, 7.5048e-01,
         7.5089e-01],
        [7.4487e-01, 7.4375e-01, 7.4388e-01,  ..., 7.7699e-01, 7.7671e-01,
         7.7776e-01],
        [2.3787e-02, 2.3134e-02, 2.3335e-02,  ..., 1.9248e-02, 1.8933e-02,
         1.8447e-02]], dtype=torch.float64)
tensor([0, 0, 0, 2, 1, 2, 1, 1, 0, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 1, 1, 2, 0,
        2, 1, 1, 0, 1, 1, 1, 1])


In [None]:
# Train the VAE
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
latent_dim = 20
vae = VAE(latent_dim).to(device)
optimizer = optim.Adam(vae.parameters(), lr=1e-4)
epochs = 50

In [None]:
def train_test_vae(epochs, dataloader, vae, optimizer):
    for epoch in range(epochs):
        vae.train()
        train_loss = 0
        for batch_data, _ in dataloader:
            batch_data = batch_data.to(device, dtype=torch.float)
            optimizer.zero_grad()
            x_recon, mean, log_var = vae(batch_data)
            loss = vae_loss(batch_data, x_recon, mean, log_var)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(dataloader.dataset)
        print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}')

        

In [None]:
train_test_vae(epochs, dataloader, vae, optimizer)

Epoch 1, Train Loss: 0.0760
Epoch 2, Train Loss: 0.0304
Epoch 3, Train Loss: 0.0130
Epoch 4, Train Loss: 0.0063
Epoch 5, Train Loss: 0.0046
Epoch 6, Train Loss: 0.0042
Epoch 7, Train Loss: 0.0039
Epoch 8, Train Loss: 0.0037
Epoch 9, Train Loss: 0.0036
Epoch 10, Train Loss: 0.0035
Epoch 11, Train Loss: 0.0034
Epoch 12, Train Loss: 0.0034
Epoch 13, Train Loss: 0.0034
Epoch 14, Train Loss: 0.0033
Epoch 15, Train Loss: 0.0033
Epoch 16, Train Loss: 0.0032
Epoch 17, Train Loss: 0.0032
Epoch 18, Train Loss: 0.0031
Epoch 19, Train Loss: 0.0031
Epoch 20, Train Loss: 0.0031
Epoch 21, Train Loss: 0.0031
Epoch 22, Train Loss: 0.0031
Epoch 23, Train Loss: 0.0031
Epoch 24, Train Loss: 0.0031
Epoch 25, Train Loss: 0.0031
Epoch 26, Train Loss: 0.0030
Epoch 27, Train Loss: 0.0030
Epoch 28, Train Loss: 0.0031
Epoch 29, Train Loss: 0.0031
Epoch 30, Train Loss: 0.0030
Epoch 31, Train Loss: 0.0030
Epoch 32, Train Loss: 0.0030
Epoch 33, Train Loss: 0.0030
Epoch 34, Train Loss: 0.0030
Epoch 35, Train Loss: 0

In [None]:
def extract_latent_space(vae, data_loader):
    latent_space = []
    labels = []

    vae.eval()
    with torch.no_grad():
        for batch_data, batch_labels in data_loader:
            batch_data = batch_data.to(device, dtype=torch.float32)
            #print(batch_data.dtype)
            mean, log_var = vae.encode(batch_data)
            z = vae.reparaterize(mean, log_var)
            latent_space.append(z.cpu().numpy())
            labels.extend(batch_labels.cpu().numpy())

    return np.vstack(latent_space), labels

In [None]:
latent_space, latent_space_labels = extract_latent_space(vae, dataloader)

In [None]:
# 2D
color_vae_2d_mapping = {0: '#898121', 1: '#F0A04B', 2: '#E3DFFD'}

fig_vae_2d = px.scatter(x=tsne_2d_results[:, 0], y=tsne_2d_results[:, 1], color=labels, color_discrete_map=color_vae_2d_mapping)

fig_vae_2d.update_layout(
    title='VAE Projection for Anger, Sadness and Their Mix',
    title_x=0.5, 
    title_y=0.95,
    width=600,
    height=500
)

fig_vae_2d.show()

#fig_vae_2d.write_image('/Users/jiachenyao/Desktop/Thesis/Figures/230/vae_ang_sad_2d.png')


NameError: name 'tsne_2d_results' is not defined

In [None]:
label_mapping

{0: 'ang', 2: 'sad', 1: 'mix_ang_sad'}

In [None]:
latent_space_labels = [str(element) for element in latent_space_labels]
string_label_mapping = {'0': 'ang', '1': 'fea', '2': 'mix_ang_fea'}

for i in range(len(latent_space_labels)):
    latent_space_labels[i] = string_label_mapping[latent_space_labels[i]]

In [None]:
# 3D
vae_3d_colors = {'ang': '#A7727D', 'fea': '#FFD4B2', 'mix_ang_fea': '#6096B4'}

vae_3d_tsne = TSNE(n_components=3, random_state=0, perplexity=30)
vae_3d_tsne_results = vae_3d_tsne.fit_transform(latent_space)

vae_3d_fig = px.scatter_3d(x=vae_3d_tsne_results[:, 0], y=vae_3d_tsne_results[:, 1], z=vae_3d_tsne_results[:, 2], color=latent_space_labels, color_discrete_map=vae_3d_colors)

vae_3d_fig.update_layout(scene=dict(xaxis=dict(title='Latent Dim 1'),
                             yaxis=dict(title='Latent Dim 2'),
                             zaxis=dict(title='Latent Dim 3')),
                  title='Latent Space Clustering for Disgust, Happiness and Their Mix',
                  title_x=0.5, 
                  title_y=0.95,
                  width=600, height=500)
vae_3d_fig.show()

#vae_3d_fig.write_image('/Users/jiachenyao/Desktop/Thesis/Figures/230/vae_ang_sad_3d.png')
