"FC layers referenced from https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65"


In [2]:
import torch
import os
import numpy as np
import pickle
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset
import torch.optim as optim
from sklearn.metrics import precision_score, recall_score, f1_score, precision_recall_fscore_support
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest, f_classif, chi2
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objs as go
from sklearn.manifold import TSNE
import plotly.io as pio
from sklearn.utils import class_weight
import tqdm as notebook_tqdm
from tqdm import tqdm

In [3]:
# !pip install ipywidgets

In [4]:
class FCLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(FCLayer, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        x = self.fc(x)
        return x

class ActivationLayer(nn.Module):
    def __init__(self, activation_fn):
        super(ActivationLayer, self).__init__()
        self.activation_fn = activation_fn

    def forward(self, x):
        x = self.activation_fn(x)
        return x

def tanh(x):
    return torch.tanh(x)

def sigmoid(x):
    return torch.sigmoid(x)

class MyNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MyNetwork, self).__init__()
        self.fc1 = FCLayer(input_dim, hidden_dim)
        self.activation1 = ActivationLayer(tanh)
        self.fc2 = FCLayer(hidden_dim, output_dim)
        self.activation2 = ActivationLayer(sigmoid)

    def forward(self, x):
        x = self.fc1(x)
        x = self.activation1(x)
        x = self.fc2(x)
        x = self.activation2(x)
        return x

# loss function and its derivative
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_prime(y_true, y_pred):
    return 2 * (y_pred - y_true) / y_true.size


In [5]:
# Function to balance class distribution using oversampling
def oversample_data(X_train, Y_train):
    # Determine the class with the maximum number of instances
    max_class_count = np.max(np.bincount(Y_train))
    # Generate indices for oversampling each class
    indices_list = [np.where(Y_train == i)[0] for i in range(num_classes)]
    # Oversample minority classes to match the count of the majority class
    for i, indices in enumerate(indices_list):
        if len(indices) < max_class_count:
            oversampled_indices = np.random.choice(indices, size=max_class_count - len(indices), replace=True)
            X_train = np.concatenate((X_train, X_train[oversampled_indices]), axis=0)
            Y_train = np.concatenate((Y_train, Y_train[oversampled_indices]), axis=0)
    return X_train, Y_train


In [6]:
# loading files
checkFile = os.path.isfile("data/dump/train_labels.pkl")

if not checkFile:
    print("Please run the context_encoder notebook to save label file")
    
else:
    file = open('data/dump/train_labels.pkl', 'rb')
    y_train = pickle.load(file)
    y_train = torch.tensor(y_train)
    file.close()
    
file = open('data/dump/label_decoder.pkl', 'rb')
label_decoder = pickle.load(file)

In [7]:
# loading files 2
file_path = 'embed/u_prime2.pkl'

# Load the list from the file using pickle
with open(file_path, 'rb') as file:
    updated_representations = pickle.load(file)

    # Concatenate all the tensors representing individual utterances
    concatenated_tensors = []
    for dialogue_tensor in updated_representations:
        concatenated_tensors.extend(dialogue_tensor)

# Convert the concatenated list of tensors into a single tensor
tensor_utterances = torch.stack(concatenated_tensors)

checkFile = os.path.isfile("data/dump/BERT_h_prime1.pkl")
if not checkFile:
    print("Run relationTypeEncoder2 encoder before running classifier")
    
else:
    file = open('data/dump/BERT_h_prime1.pkl', 'rb')
    cherry_picked_nodes, _ = pickle.load(file)
    file.close()

checkFile = os.path.isfile("data/dump/BERT_h_prime2.pkl")
if not checkFile:
    print("Run relationTypeEncoder2 before running classifier")
    
else:
    file = open('data/dump/BERT_h_prime2.pkl', 'rb')
    all_node_feats, _ = pickle.load(file)
    file.close()
    
_ = None
print(cherry_picked_nodes.shape, all_node_feats.shape)


torch.Size([12840, 300]) torch.Size([12840, 300])


EDA

In [8]:
# # Checking the structure of graph
# for n in range(10):
#     tensor_data_np = tensor_utterances[n].detach().numpy()

#     # Plot the data
#     plt.figure(figsize=(10, 5))
#     plt.plot(range(len(tensor_data_np)), tensor_data_np)
#     plt.title('Line Graph of Tensor Data')
#     plt.xlabel('Index')
#     plt.ylabel('Value')
#     plt.show()


In [9]:
# # Normalize the h' (1st GAT)
# data = cherry_picked_nodes.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# # Print or analyze the similarity matrix
# # print(similarities)
# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


In [10]:
# # Normalize the h' (2nd GAT)
# data = all_node_feats.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# # Print or analyze the similarity matrix
# # print(similarities)
# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


In [11]:
# # Normalize the u' or updated_representations
# data = tensor_utterances.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


Prep data and EDA


In [12]:
X_train = tensor_utterances
Y_train = y_train

Part 1

In [13]:
# # Define the number of features (k) to select
# k = 100  # Adjust this value as needed

# # Initialize SelectKBest with the desired score function (e.g., f_classif for classification tasks)
# selector = SelectKBest(score_func=f_classif, k=k)

# # Fit SelectKBest on the training data and target variable
# selector.fit(X_train, Y_train)

# # Get the indices of the selected features
# selected_indices = selector.get_support(indices=True)

# # Get the scores of the selected features
# feature_scores = selector.scores_[selected_indices]

# # Display the scores along with their corresponding indices
# # for idx, score in zip(selected_indices, feature_scores):
# #     print(f"Feature index: {idx}, Score: {score}")

# X_train_selected = X_train[:, selected_indices]
# print(X_train_selected.shape)

Selected feature u'

In [14]:
# Apply Min-Max scaling to make the data non-negative
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Initialize SelectKBest with the desired score function (e.g., f_classif for classification tasks)
selector = SelectKBest(score_func=f_classif, k=100)
# Assuming X_train is your feature matrix (12840 instances x 300 dimensions)
# and y_train is your target labels

# Initialize a dictionary to store the indices of top features for each class
top_features_by_class = {}
top_scores = {}
# Calculate the relevance of each feature to each class using chi-squared test
for label in range(7):  # Assuming you have 7 classes
    # Create a binary mask indicating instances belonging to the current class
    mask = (Y_train == label)

    # SelectKBest with chi2 as the scoring function
    selector = SelectKBest(score_func=chi2, k=20)  # Select top 20 features
    selector.fit(X_train_scaled, mask)  # Fit SelectKBest to the data
    # Get the indices of the top 20 features
    top_features_indices = np.argsort(selector.scores_)[-20:]
    scores = selector.scores_[top_features_indices]
    # Store the indices in the dictionary
    top_features_by_class[label] = top_features_indices
    top_scores[label] = scores
    
# Print the top features for each class
for label, indices in top_features_by_class.items():
    print(f"Label {label_decoder[label]}: idx {', '.join(map(str, indices))}")
    print(top_scores[label])

Label anger: idx 246, 497, 600, 74, 396, 278, 637, 82, 483, 411, 262, 589, 346, 200, 289, 395, 591, 161, 87, 544
[2.34643138 2.34656414 2.46194175 2.50832557 2.53195847 2.69447376
 2.75094615 2.75745995 2.88995412 2.9498358  2.97115567 3.04870125
 3.25441899 3.26687332 3.42034638 3.8293954  3.89758703 3.98234341
 4.95180723 6.69162142]
Label disgust: idx 411, 734, 168, 325, 220, 118, 710, 187, 183, 693, 487, 86, 113, 673, 87, 544, 346, 200, 430, 625
[1.1263909  1.12715288 1.14107922 1.15446211 1.16027577 1.18437507
 1.21307733 1.22602702 1.23209869 1.25558807 1.27585881 1.28278117
 1.28912602 1.30334911 1.345847   1.46037267 1.51213768 1.53303158
 1.7282929  1.76445789]
Label fear: idx 247, 553, 426, 92, 295, 425, 748, 403, 560, 374, 12, 287, 188, 652, 663, 598, 734, 60, 411, 6
[1.32962887 1.33329598 1.35888197 1.37588902 1.38860804 1.39378474
 1.39860036 1.40970726 1.4384687  1.45376044 1.47879477 1.50039816
 1.54102315 1.5556053  1.67205718 1.76377323 1.82892902 1.87085238
 2.1237645

In [15]:
concatenated_features_set = set()
for label, indices in top_features_by_class.items():
    concatenated_features_set.update(indices)

concatenated_features_indices = list(concatenated_features_set)

# concatenated_features_indices = []
# for indices in top_features_by_class.values():
#     concatenated_features_indices.extend(indices)


In [16]:
concatenated_features_indices = np.array(concatenated_features_indices)

# Select the desired features from X_train
selected_features1 = tensor_utterances[:, concatenated_features_indices]
print(selected_features1.shape)

torch.Size([12840, 114])


Selected h'

In [17]:
X_train = all_node_feats

In [18]:
# Apply Min-Max scaling to make the data non-negative
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Initialize SelectKBest with the desired score function (e.g., f_classif for classification tasks)
selector = SelectKBest(score_func=f_classif, k=100)
# Assuming X_train is your feature matrix (12840 instances x 300 dimensions)
# and y_train is your target labels

# Initialize a dictionary to store the indices of top features for each class
top_features_by_class = {}
top_scores = {}
# Calculate the relevance of each feature to each class using chi-squared test
for label in range(7):  # Assuming you have 7 classes
    # Create a binary mask indicating instances belonging to the current class
    mask = (Y_train == label)

    # SelectKBest with chi2 as the scoring function
    selector = SelectKBest(score_func=chi2, k=20)  # Select top 20 features
    selector.fit(X_train_scaled, mask)  # Fit SelectKBest to the data
    # Get the indices of the top 20 features
    top_features_indices = np.argsort(selector.scores_)[-20:]
    scores = selector.scores_[top_features_indices]
    # Store the indices in the dictionary
    top_features_by_class[label] = top_features_indices
    top_scores[label] = scores
    
# Print the top features for each class
for label, indices in top_features_by_class.items():
    print(f"Label {label_decoder[label]}: idx {', '.join(map(str, indices))}")
    print(top_scores[label])

Label anger: idx 167, 99, 139, 140, 175, 128, 276, 210, 84, 227, 204, 43, 288, 58, 244, 216, 92, 194, 184, 129
[1.24176531 1.32689859 1.33852027 1.36537897 1.39285872 1.39810761
 1.42481072 1.49667837 1.51571794 1.53939416 1.6200533  1.66673762
 1.86713552 1.91877562 2.03711769 2.04876067 2.37751768 2.4650082
 2.69172596 3.35008585]
Label disgust: idx 239, 208, 250, 152, 115, 176, 183, 214, 55, 123, 101, 194, 13, 200, 136, 171, 283, 204, 129, 234
[0.39419684 0.39860541 0.40854525 0.41389446 0.43105479 0.43148324
 0.44555406 0.51492261 0.52748753 0.54495951 0.55794862 0.61718555
 0.62381285 0.63588488 0.67294284 0.74115633 0.74915705 0.78864054
 0.86503108 1.03449513]
Label fear: idx 223, 18, 78, 293, 163, 67, 101, 108, 104, 210, 221, 71, 175, 93, 177, 13, 225, 48, 234, 55
[0.36006296 0.37594075 0.37779532 0.38241712 0.38545364 0.39580486
 0.40210837 0.45645068 0.45660078 0.46898822 0.47881793 0.4958627
 0.51283995 0.52126024 0.54828596 0.55156159 0.59597655 0.70761942
 0.70814717 0.951

In [19]:
concatenated_features_set = set()
for label, indices in top_features_by_class.items():
    concatenated_features_set.update(indices)

concatenated_features_indices = list(concatenated_features_set)

In [20]:
concatenated_features_indices = np.array(concatenated_features_indices)

# Select the desired features from X_train
selected_features2 = tensor_utterances[:, concatenated_features_indices]
print(selected_features2.shape)

torch.Size([12840, 102])


In [21]:
selected_features1[0]

tensor([-0.0850,  0.2415, -0.1310, -0.1509, -0.4730,  0.2069,  0.2905, -0.2111,
        -0.1705,  0.1179,  0.0678,  0.3224,  0.0701, -0.1988,  0.0966,  0.4576,
         0.2275, -0.3285,  0.0900, -0.0196,  0.5463,  0.2167, -0.1009,  0.0902,
         0.1786,  0.1149, -0.2084, -0.1492, -0.1536, -0.4069,  0.2688,  0.2200,
         0.0887, -0.0920,  0.4038,  0.0625, -0.2448,  0.1404, -0.1904, -0.2601,
         0.0509,  0.2384,  0.1512,  0.1104, -0.0750, -0.0030, -0.2030, -0.2019,
         0.0770,  0.0610,  0.0168,  0.1289, -0.0702,  0.2344, -0.1975,  0.1174,
        -0.2626,  0.0767, -0.0280, -0.0576,  0.1835, -0.0877,  0.0558, -0.3354,
        -0.5473,  0.2981,  0.3413,  0.0841,  0.2141,  0.0806,  0.1045,  0.0841,
        -0.0943, -0.1115, -0.2186, -0.1445,  0.2650, -0.1412,  0.2795, -0.2352,
        -0.1543, -0.1018,  0.1010, -0.4150, -0.1031, -0.1541,  0.0580, -0.0073,
         0.1114,  0.1634,  0.2207, -0.0395,  0.1615, -0.2581, -0.0831,  0.0563,
        -0.1234, -0.0139,  0.4461, -0.28

In [22]:
selected_features2[0]

tensor([ 1.9118e-01, -8.0589e-02, -1.3453e-01, -2.1708e-01,  4.1972e-01,
         2.4973e-01,  6.6335e-02, -3.6764e-01,  2.6825e-01, -1.4974e-01,
         3.8856e-02, -4.3070e-02,  1.0537e-01, -8.2093e-02,  1.9674e-01,
         1.5461e-01,  6.8548e-02,  9.7370e-03, -6.9421e-02,  1.4242e-01,
         4.2998e-01, -5.4965e-03, -4.7828e-01,  1.6158e-01, -2.2648e-02,
        -9.8701e-02,  1.1491e-01, -8.9931e-02, -2.0842e-01,  3.4715e-01,
         2.6876e-01,  2.2542e-01,  8.8338e-02,  1.4181e-01,  6.1919e-02,
        -3.5121e-02,  1.4036e-01,  4.1194e-01, -7.5740e-02, -4.1709e-01,
         3.8579e-02,  8.8523e-02,  5.9374e-02,  1.5188e-01,  1.2799e-01,
        -4.2667e-02, -2.2676e-01,  9.2587e-03, -1.7325e-01, -3.0602e-01,
        -2.4307e-01, -1.9749e-01, -3.0208e-02, -6.5170e-02,  2.7194e-01,
        -8.3018e-02, -2.5097e-03,  2.1087e-02,  2.9575e-01, -5.3700e-02,
        -2.9273e-01,  1.8353e-01,  3.0357e-01,  5.1770e-01,  1.4428e-01,
         3.7037e-01,  3.4677e-01,  8.4134e-02,  3.2

In [23]:
# pca = PCA(n_components=2)
# pca_result = pca.fit_transform(selected_features.detach().numpy())

# # Plot the PCA result with color-coded labels
# plt.figure(figsize=(8, 6))
# for label in np.unique(Y_train):
#     indices = Y_train == label
#     plt.scatter(pca_result[indices, 0], pca_result[indices, 1], label=f'{label_decoder[label]}', alpha=0.5)
#     plt.title('PCA Visualization of Selected Utterance Embeddings (Train) with Color-Coded Labels')
#     plt.xlabel('Principal Component 1')
#     plt.ylabel('Principal Component 2')
#     plt.legend()
#     plt.grid(True)
#     plt.show()

3d plottly

In [24]:
# X_train = selected_features
# X_train = X_train / np.linalg.norm(X_train, axis=1, keepdims=True)
# # Perform T-SNE dimensionality reduction
# tsne = TSNE(n_components=3, random_state=42)
# X_tsne = tsne.fit_transform(X_train)

# # Create a Plotly scatter plot
# fig = go.Figure(data=[go.Scatter3d(
#     x=X_tsne[:, 0],
#     y=X_tsne[:, 1],
#     z=X_tsne[:, 2],
#     mode='markers',
#     marker=dict(
#         size=3,
#         color=Y_train,  # Assuming Y_train contains labels for coloring
#         colorscale='Viridis',  # You can choose a different colorscale
#         opacity=0.8
#     )
# )])

# # Update layout
# fig.update_layout(title='3D T-SNE Plot', autosize=False,
#                   width=800, height=800)

# # Show the plot
# fig.show()

In [25]:
# Save the plot as an HTML file
# pio.write_html(fig, '3d_tsne_plot.html')

Selected feature's GAT

current progress (9pm March 6)

In [26]:
# Assuming cnn_bilstm_representations and gat_representations are PyTorch tensors
concatenated_representation = torch.cat((selected_features1, selected_features2), dim=1)

# concatenated_representation1 = torch.cat((tensor_utterances, cherry_picked_nodes), dim=1)
#  concatenated_representation2 = torch.cat((cherry_picked_nodes, all_node_feats), dim=1)
print(concatenated_representation.shape)

torch.Size([12840, 216])


Training and predicting


1st version (only feature engineering and u')

In [27]:
# print(selected_features.shape)
# # Generate sample data
# num_instances = len(selected_features)
# input_dim = selected_features.shape[1]
# num_classes = 7

# X_train = selected_features
# X_train = X_train / np.linalg.norm(X_train, axis=1, keepdims=True)
# Y_train = y_train
# # X_train = torch.randn(num_instances, input_dim)
# # Assuming Y_train is a vector containing the label indices (0 to num_classes-1) for each instance
# # Y_train = torch.randint(0, num_classes, (num_instances,))

# # Calculate class weights to balance the loss function
# class_counts = torch.bincount(Y_train)
# # class_weights = torch.tensor([0.15, 0.03, 0.20, 0.09, 0.15, 0.23, 0.04])

# # Initialize the model
# model = MyNetwork(input_dim, 7, num_classes)
# print(model)
# # Define loss function and optimizer
# criterion = nn.CrossEntropyLoss(weight=None)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# # Train the model
# num_epochs = 3000
# for epoch in range(num_epochs):
#     # Forward pass
#     outputs = model(X_train)
#     loss = criterion(outputs, Y_train)

#     # Backward and optimize
#     optimizer.zero_grad()
#     loss.backward()
#     optimizer.step()

#     if (epoch+1) % 100 == 0:  # Reduced printing frequency for faster training progress monitoring
#         print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


In [28]:
# # Predict on the training data
# with torch.no_grad():
#     outputs = model(X_train)
#     _, predicted = torch.max(outputs.data, 1)

# # Calculate accuracy
# accuracy = (predicted == Y_train).sum().item() / num_instances
# print(f'Training Accuracy: {accuracy * 100:.2f}%')

# unique_labels, label_counts = np.unique(predicted, return_counts=True)

# # Print the counts for each unique label
# for label, count in zip(unique_labels, label_counts):
#     print(f"Label {label}: {count} occurrences")
# print("------------------------")

# unique_labels, label_counts = np.unique(Y_train, return_counts=True)

# # Print the counts for each unique label
# for label, count in zip(unique_labels, label_counts):
#     print(f"Label {label}: {count} occurrences")

2nd version (feature engineered u', class weighting, data resampling, cost-sensitive learning, regularization)

1. Prep data - normalize and create data loader

In [30]:
# Generate sample data
num_instances = len(concatenated_representation)
input_dim = concatenated_representation.shape[1]
num_classes = 7

# Rescale input features
# selected_features = concatenated_representation / np.linalg.norm(concatenated_representation, axis=1, keepdims=True)

# Apply data resampling (oversampling) to balance class distribution
X_train, Y_train = oversample_data(concatenated_representation, y_train)

# Calculate class weights for class weighting
class_counts = np.bincount(y_train)
total_instances = np.sum(class_counts)
# class_weights = torch.tensor([total_instances / (num_classes * count) for count in class_counts], dtype=torch.float32)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.long)
# print(X_train_tensor.shape, Y_train_tensor.shape)
# X_train_tensor = torch.tensor(selected_features)
# Y_train_tensor = torch.tensor(y_train)

unique_labels, label_counts = np.unique(Y_train, return_counts=True)

# Print the counts for each unique label
for label, count in zip(unique_labels, label_counts):
    print(f"Label {label}: {count} occurrences")

print(X_train_tensor.shape, Y_train_tensor.shape)
# Create a TensorDataset
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)

# Define batch size for DataLoader
batch_size = 1

# Create a PyTorch DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize the model
model = MyNetwork(input_dim, 50, num_classes)
# print(class_weights)
# Define loss function and optimizer with class weights
criterion = nn.CrossEntropyLoss(weight=None)
optimizer = optim.Adam(model.parameters(), lr=0.0001)

Label 0: 5960 occurrences
Label 1: 5960 occurrences
Label 2: 5960 occurrences
Label 3: 5960 occurrences
Label 4: 5960 occurrences
Label 5: 5960 occurrences
Label 6: 5960 occurrences
torch.Size([41720, 216]) torch.Size([41720])


2. Training

In [31]:
# Train the model
num_epochs = 100
print_interval = 20  # Print tqdm every 30 epochs
for epoch in range(num_epochs):
    total_loss = 0.0
    counter = 0
    for inputs, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', leave=False):
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * inputs.size(0)
        counter += 1
    
    # Print average loss per epoch
    if (epoch + 1) % print_interval == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_dataset):.4f}')

                                                                                                                       

Epoch [20/100], Loss: 1.5077


                                                                                                                       

Epoch [40/100], Loss: 1.4350


                                                                                                                       

Epoch [60/100], Loss: 1.4039


                                                                                                                       

Epoch [80/100], Loss: 1.3866


                                                                                                                       

Epoch [100/100], Loss: 1.3754




In [32]:
# Set the model to evaluation mode
model.eval()

# Predict on the training data
with torch.no_grad():
    outputs = model(X_train_tensor)
    _, predicted = torch.max(outputs, 1)

# Convert predicted tensor to numpy array
predicted = predicted.numpy()

# Calculate F1 score per class
f1_per_class = f1_score(Y_train_tensor, predicted, average=None)
f1 = f1_score(Y_train_tensor, predicted, average='macro')

print(f'Training F1 Score: {f1:.4f}')

unique_labels, label_counts = np.unique(predicted, return_counts=True)

# Print F1 score for each class
for i, f1 in enumerate(f1_per_class):
    print(f'F1 Score for Class {label_decoder[i]}: {f1:.4f}')
    
# Print the counts for each unique label
for label, count in zip(unique_labels, label_counts):
    print(f"Label {label_decoder[label]}: {count} occurrences")

Training F1 Score: 0.6487
F1 Score for Class anger: 0.6141
F1 Score for Class disgust: 0.7808
F1 Score for Class fear: 0.7850
F1 Score for Class joy: 0.5539
F1 Score for Class neutral: 0.4701
F1 Score for Class sadness: 0.6773
F1 Score for Class surprise: 0.6597
Label anger: 5153 occurrences
Label disgust: 4908 occurrences
Label fear: 4929 occurrences
Label joy: 6288 occurrences
Label neutral: 9380 occurrences
Label sadness: 5098 occurrences
Label surprise: 5964 occurrences


3rd version is 2nd version + ensembled FC classifier