"FC layers referenced from https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65"


In [80]:
import torch, time, os, pickle
import numpy as np
import torch.nn as nn
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset
import torch.optim as optim
from sklearn.metrics import precision_score, recall_score, f1_score, precision_recall_fscore_support
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest, f_classif, chi2
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objs as go
from sklearn.manifold import TSNE
import plotly.io as pio
from sklearn.utils import class_weight
import tqdm as notebook_tqdm
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tqdm import tqdm
from graph_context_dataset import FeatureEngineeredDataset
import warnings
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense, Dropout
import random
from model import FCClassifier, MyNetwork, DATASET_PATH, MatchingAttention
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import chi2
from sklearn.metrics import f1_score, confusion_matrix, classification_report
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Make sure to specify which dataset to use

 - dataset_original
 - dataset_drop_noise
 - dataset_smote

In [81]:
# dataset_path = "dataset_original"
# dataset_path = "dataset_drop_noise"
# dataset_path = "dataset_smote"
dataset_path = DATASET_PATH

<h3> Declare functions

In [82]:
# class FCLayer(nn.Module):
#     def __init__(self, input_dim, output_dim):
#         super(FCLayer, self).__init__()
#         self.fc = nn.Linear(input_dim, output_dim)

#     def forward(self, x):
#         x = self.fc(x)
#         return x

# class ActivationLayer(nn.Module):
#     def __init__(self, activation_fn):
#         super(ActivationLayer, self).__init__()
#         self.activation_fn = activation_fn

#     def forward(self, x):
#         x = self.activation_fn(x)
#         return x

# def tanh(x):
#     return torch.tanh(x)

# def sigmoid(x):
#     return torch.sigmoid(x)
# # loss function and its derivative
# def mse(y_true, y_pred):
#     return np.mean(np.power(y_true - y_pred, 2))

# def mse_prime(y_true, y_pred):
#     return 2 * (y_pred - y_true) / y_true.size


In [83]:
def oversample_data(X_train, Y_train, num_classes):
    # Determine the class with the maximum number of instances
    max_class_count = np.max(np.bincount(Y_train))
    # Generate indices for oversampling each class
    indices_list = [np.where(Y_train == i)[0] for i in range(num_classes)]
    # Oversample minority classes to match the count of the majority class
    for i, indices in enumerate(indices_list):
        if len(indices) < max_class_count:
            # Calculate the number of instances to oversample for this class
            num_to_oversample = max_class_count - len(indices)
            # Randomly select instances with replacement to oversample
            oversampled_indices = np.random.choice(indices, size=num_to_oversample, replace=True)
            # Append the oversampled instances to the original data
            X_train = np.concatenate((X_train, X_train[oversampled_indices]), axis=0)
            Y_train = np.concatenate((Y_train, Y_train[oversampled_indices]), axis=0)
    return torch.tensor(X_train), torch.tensor(Y_train)


In [84]:
def concatenate_tensors(tensor_list):
    if not tensor_list:
        raise ValueError("The tensor list is empty")

    feature_dim = tensor_list[0].shape[1]
    for tensor in tensor_list:
        if tensor.shape[1] != feature_dim:
            raise ValueError("All tensors must have the same feature dimension")
    
    concatenated_tensor = torch.cat(tensor_list, dim=0)
    
    return concatenated_tensor

<h4> Import labels and label decoder

In [85]:
file_path = "data/dump/" + dataset_path + "/labels_train.pkl"
with open(file_path, 'rb') as file:
    y_train = pickle.load(file)
y_train = torch.tensor(y_train)

file_path = "data/dump/" + dataset_path + "/labels_test.pkl"
with open(file_path, 'rb') as file:
    y_test = pickle.load(file)
y_test = torch.tensor(y_test)

file_path = "data/dump/" + dataset_path + "/labels_dev.pkl"
with open(file_path, 'rb') as file:
    y_val = pickle.load(file)
y_val = torch.tensor(y_val)
    
file_path = 'data/dump/' + dataset_path + '/label_decoder.pkl'
with open(file_path, 'rb') as file:
    label_decoder = pickle.load(file)

<h4> Import the BERT base-node outputs

first we disregard the u' and directly train the h'

In [86]:
train_file_paths = [
    "embed/" + dataset_path + "/u_prime_BERT_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_DGCN_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_edgeAttr_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv2_edgeAttr_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_RGAT_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_EGAT_train.pkl",
]

test_file_paths = [
    "embed/" + dataset_path + "/u_prime_BERT_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_DGCN_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_edgeAttr_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv2_edgeAttr_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_RGAT_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_EGAT_test.pkl",
]

val_file_paths = [
    "embed/" + dataset_path + "/u_prime_BERT_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_DGCN_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_edgeAttr_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv2_edgeAttr_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_RGAT_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_EGAT_dev.pkl",
]

dictKey = {
    0 : 'bert',
    1 : 'bert-select-few',
    2 : 'bert-select-mod',
    3 : 'bert-select-more',
    4 : 'dgcn',
    5 : 'dgcn-select',
    6 : 'gatv1',
    7 : 'gatv1-select',
    8 : 'gatv1-edge',
    9 : 'gatv1-edge-select',
    10 : 'gatv2-edge',
    11 : 'gatv2-edge-select',
    12 : 'rgat',
    13 : 'rgat-select',
    14 : 'egat',
    15 : 'egat-select',
    16 : 'bert-select-mod-dgcn',
    17 : 'bert-select-mod-gatv1',
    18 : 'bert-select-mod-gatv1-edge',
    19 : 'bert-select-mod-gatv2-edge',
    20 : 'bert-select-mod-rgat',
    21 : 'bert-select-mod-egat',
}
selected_combination = [0, 14]
# selected_combination = [3]

<h4> Getting BERT and GAT outputs for all sets

In [87]:
# trainFeaturesList[0][0].shape

In [88]:
def concatenate_tensors(tensor_list):
    return torch.cat(tensor_list, dim=0)

def import_h_prime(file_paths):
    featuresList = []
    attList = []
    for file_path in file_paths:
        with open(file_path, 'rb') as file:
            data = pickle.load(file)
#             print("Check... ", len(data))
            if isinstance(data, list):
                print(type(data), " instance of list: ", len(data))
                featuresList.append(concatenate_tensors(data))
            else:
                print(type(data), " instance of tensor, ", data.shape)
                featuresList.append(data)
                
    return featuresList
    
trainFeaturesList = import_h_prime(train_file_paths)

testFeaturesList = import_h_prime(test_file_paths)

valFeaturesList = import_h_prime(val_file_paths)


<class 'list'>  instance of list:  1588
<class 'list'>  instance of list:  1588
<class 'list'>  instance of list:  1588
<class 'list'>  instance of list:  1588
<class 'list'>  instance of list:  1588
<class 'list'>  instance of list:  1588
<class 'list'>  instance of list:  1588
<class 'list'>  instance of list:  435
<class 'list'>  instance of list:  435
<class 'list'>  instance of list:  435
<class 'list'>  instance of list:  435
<class 'list'>  instance of list:  435
<class 'list'>  instance of list:  435
<class 'list'>  instance of list:  435
<class 'list'>  instance of list:  203
<class 'list'>  instance of list:  203
<class 'list'>  instance of list:  203
<class 'list'>  instance of list:  203
<class 'list'>  instance of list:  203
<class 'list'>  instance of list:  203
<class 'list'>  instance of list:  203


In [89]:
trainFeaturesList[3].shape

torch.Size([6955, 128])

In [90]:
def getNodalAttn():
    nodalAttList = []
    nodalAttList.append(False)
    nodalAttList.append(False)
    nodalAttList.append(False)
    nodalAttList.append(False)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    nodalAttList.append(True)
    return nodalAttList

In [91]:
nodalAttn = getNodalAttn()

EDA

In [92]:
# # Checking the structure of graph
# for n in range(10):
#     tensor_data_np = tensor_utterances[n].detach().numpy()

#     # Plot the data
#     plt.figure(figsize=(10, 5))
#     plt.plot(range(len(tensor_data_np)), tensor_data_np)
#     plt.title('Line Graph of Tensor Data')
#     plt.xlabel('Index')
#     plt.ylabel('Value')
#     plt.show()


In [93]:
# # Normalize the h' (1st GAT)
# data = cherry_picked_nodes.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# # Print or analyze the similarity matrix
# # print(similarities)
# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


In [94]:
# # Normalize the h' (2nd GAT)
# data = all_node_feats.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# # Print or analyze the similarity matrix
# # print(similarities)
# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


In [95]:
# # Normalize the u' or updated_representations
# data = tensor_utterances.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


<h3> Feature Selection and creating data combination for classifiers

Define select feature function

In [96]:
def get_norm_features(encoded_features):
    scaler = MinMaxScaler()
    features_scaled = scaler.fit_transform(encoded_features)
    return torch.tensor(features_scaled)

def get_selected_features(encoded_features, labels, top_n):
    if torch.is_tensor(encoded_features):
        encoded_features = encoded_features.detach().cpu().numpy()
    
    scaler = MinMaxScaler()
    features_scaled = scaler.fit_transform(encoded_features)

    top_features_by_class = {}
    top_scores = {}

    for label in range(7):
        # Create a binary mask indicating instances belonging to the current class
        mask = (labels == label)

        selector = SelectKBest(score_func=f_classif, k=top_n) 
        selector.fit(features_scaled, mask)  

        top_features_indices = np.argsort(selector.scores_)[-top_n:]
        scores = selector.scores_[top_features_indices]

        top_features_by_class[label] = top_features_indices
        top_scores[label] = scores

    concatenated_features_set = set()
    for label, indices in top_features_by_class.items():
        concatenated_features_set.update(indices)

    concatenated_features_indices = list(concatenated_features_set)

    concatenated_features_indices = np.array(concatenated_features_indices)


    selected_features = encoded_features[:, concatenated_features_indices]

    return selected_features, concatenated_features_indices

In [97]:
class Autoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim, input_dim),
            nn.ReLU()
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

# Function to train the autoencoder
def train_autoencoder(encoded_features, hidden_dim=100, num_epochs=20, lr=0.001):
    input_dim = encoded_features.shape[1]
    autoencoder = Autoencoder(input_dim, hidden_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(autoencoder.parameters(), lr=lr)
    
    X_tensor = torch.tensor(encoded_features, dtype=torch.float32)
    train_loader = torch.utils.data.DataLoader(X_tensor, batch_size=32, shuffle=True)
    
    for epoch in tqdm(range(num_epochs), desc="Training Autoencoder", unit="epoch"):
        total_loss = 0.0
        for data in train_loader:
            inputs = data
            optimizer.zero_grad()
            _, decoded = autoencoder(inputs)
            loss = criterion(decoded, inputs)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
    
    with torch.no_grad():
        encoded_features, _ = autoencoder(torch.tensor(encoded_features, dtype=torch.float32))
    
    return encoded_features, autoencoder

def get_selected_features_autoencoder(autoencoder, encoded_features, labels, top_n=100):
    if torch.is_tensor(encoded_features):
        encoded_features = encoded_features.detach().cpu().numpy()

    reduced_features = autoencoder.encoder(torch.tensor(encoded_features, dtype=torch.float32)).detach().numpy()
    
    scaler = MinMaxScaler()
    features_scaled = scaler.fit_transform(reduced_features)
    
    top_features_by_class = {}
    top_scores = {}

    for label in range(7):
        mask = (labels == label)
        selector = chi2(features_scaled, mask)
        top_features_indices = np.argsort(selector[0])[-top_n:]
        scores = selector[0][top_features_indices]

        top_features_by_class[label] = top_features_indices
        top_scores[label] = scores

    concatenated_features_set = set()
    for label, indices in top_features_by_class.items():
        concatenated_features_set.update(indices)

    concatenated_features_indices = list(concatenated_features_set)
    concatenated_features_indices = np.array(concatenated_features_indices)

    selected_features = reduced_features[:, concatenated_features_indices]

    return selected_features, concatenated_features_indices

In [98]:
trainList[0].shape

torch.Size([6955, 768])

In [99]:
# pca = PCA(n_components=2)
# pca_result = pca.fit_transform(selected_features.detach().numpy())

# # Plot the PCA result with color-coded labels
# plt.figure(figsize=(8, 6))
# for label in np.unique(Y_train):
#     indices = Y_train == label
#     plt.scatter(pca_result[indices, 0], pca_result[indices, 1], label=f'{label_decoder[label]}', alpha=0.5)
#     plt.title('PCA Visualization of Selected Utterance Embeddings (Train) with Color-Coded Labels')
#     plt.xlabel('Principal Component 1')
#     plt.ylabel('Principal Component 2')
#     plt.legend()
#     plt.grid(True)
#     plt.show()

3d plottly

In [100]:
# X_train = selected_features
# X_train = X_train / np.linalg.norm(X_train, axis=1, keepdims=True)
# # Perform T-SNE dimensionality reduction
# tsne = TSNE(n_components=3, random_state=42)
# X_tsne = tsne.fit_transform(X_train)

# # Create a Plotly scatter plot
# fig = go.Figure(data=[go.Scatter3d(
#     x=X_tsne[:, 0],
#     y=X_tsne[:, 1],
#     z=X_tsne[:, 2],
#     mode='markers',
#     marker=dict(
#         size=3,
#         color=Y_train,  # Assuming Y_train contains labels for coloring
#         colorscale='Viridis',  # You can choose a different colorscale
#         opacity=0.8
#     )
# )])

# # Update layout
# fig.update_layout(title='3D T-SNE Plot', autosize=False,
#                   width=800, height=800)

# # Show the plot
# fig.show()

In [101]:
# Save the plot as an HTML file
# pio.write_html(fig, '3d_tsne_plot.html')

Now prepare the data that will be ued to train the classifier, there are 20 combinations. And pick top 7 combinations yielding top F1 weighted-score

In [102]:
# for item in trainFeaturesList[6][:10]:
#     print(item)

In [103]:
trainList = []
testList = []
valList = []

file_path1 = "data/dump/" + dataset_path + "/BERT_data_for_classifier/trainList.pkl"
file_path2 = "data/dump/" + dataset_path + "/BERT_data_for_classifier/testList.pkl"
file_path3 = "data/dump/" + dataset_path + "/BERT_data_for_classifier/valList.pkl"

checkFile1 = os.path.isfile(file_path1)
checkFile2 = os.path.isfile(file_path2)
checkFile3 = os.path.isfile(file_path3)

if checkFile1 and checkFile2 and checkFile3: 
    with open(file_path1, "rb") as file:
        trainList = pickle.load(file)
    with open(file_path2, "rb") as file:
        testList = pickle.load(file)
    with open(file_path3, "rb") as file:
        valList = pickle.load(file)
else:
#     trainFeaturesList.append(data)
    #1
    trainList.append(trainFeaturesList[0])
    testList.append(testFeaturesList[0])
    valList.append(valFeaturesList[0])
    #2
    selectedTrainFeatures1a, indicesFeatures1a = get_selected_features(trainFeaturesList[0], y_train, 16)
    selectedTestFeatures1a = testFeaturesList[0][:, indicesFeatures1a]
    selectedValFeatures1a = valFeaturesList[0][:, indicesFeatures1a]
    trainList.append(selectedTrainFeatures1a)
    testList.append(selectedTestFeatures1a)
    valList.append(selectedValFeatures1a)
    #3
    selectedTrainFeatures1b, indicesFeatures1b = get_selected_features(trainFeaturesList[0], y_train, 32)
    selectedTestFeatures1b = testFeaturesList[0][:, indicesFeatures1b]
    selectedValFeatures1b = valFeaturesList[0][:, indicesFeatures1b]
    trainList.append(selectedTrainFeatures1b)
    testList.append(selectedTestFeatures1b)
    valList.append(selectedValFeatures1b)
    #4
    selectedTrainFeatures1a, indicesFeatures1a = get_selected_features(trainFeaturesList[0], y_train, 64)
    selectedTestFeatures1a = testFeaturesList[0][:, indicesFeatures1a]
    selectedValFeatures1a = valFeaturesList[0][:, indicesFeatures1a]
    trainList.append(selectedTrainFeatures1a)
    testList.append(selectedTestFeatures1a)
    valList.append(selectedValFeatures1a)
    #5
    trainList.append(trainFeaturesList[1])
    testList.append(testFeaturesList[1])
    valList.append(valFeaturesList[1])
    #6
    selectedTrainFeatures2, indicesFeatures2 = get_selected_features(trainFeaturesList[1], y_train, 12)
    selectedTestFeatures2 = testFeaturesList[1][:, indicesFeatures2]
    selectedValFeatures2 = valFeaturesList[1][:, indicesFeatures2]
    trainList.append(selectedTrainFeatures2)
    testList.append(selectedTestFeatures2)
    valList.append(selectedValFeatures2)
    #7
    trainList.append(trainFeaturesList[2])
    testList.append(testFeaturesList[2])
    valList.append(valFeaturesList[2])
    #8
    selectedTrainFeatures3, indicesFeatures3 = get_selected_features(trainFeaturesList[2], y_train, 12)
    selectedTestFeatures3 = testFeaturesList[2][:, indicesFeatures3]
    selectedValFeatures3 = valFeaturesList[2][:, indicesFeatures3]
    trainList.append(selectedTrainFeatures3)
    testList.append(selectedTestFeatures3)
    valList.append(selectedValFeatures3)
    #9
    trainList.append(trainFeaturesList[3])
    testList.append(testFeaturesList[3])
    valList.append(valFeaturesList[3])
    #10
    selectedTrainFeatures4, indicesFeatures4 = get_selected_features(trainFeaturesList[3], y_train, 12)
    selectedTestFeatures4 = testFeaturesList[3][:, indicesFeatures4]
    selectedValFeatures4 = valFeaturesList[3][:, indicesFeatures4]
    trainList.append(selectedTrainFeatures4)
    testList.append(selectedTestFeatures4)
    valList.append(selectedValFeatures4)
    #11
    trainList.append(trainFeaturesList[4])
    testList.append(testFeaturesList[4])
    valList.append(valFeaturesList[4])
    #12
    selectedTrainFeatures5, indicesFeatures5 = get_selected_features(trainFeaturesList[4], y_train, 12)
    selectedTestFeatures5 = testFeaturesList[4][:, indicesFeatures5]
    selectedValFeatures5 = valFeaturesList[4][:, indicesFeatures5]
    trainList.append(selectedTrainFeatures5)
    testList.append(selectedTestFeatures5)
    valList.append(selectedValFeatures5)
    #13
    trainList.append(trainFeaturesList[5])
    testList.append(testFeaturesList[5])
    valList.append(valFeaturesList[5])
    #14
    selectedTrainFeatures6, indicesFeatures6 = get_selected_features(trainFeaturesList[5], y_train, 12)
    selectedTestFeatures6 = testFeaturesList[5][:, indicesFeatures6]
    selectedValFeatures6 = valFeaturesList[5][:, indicesFeatures6]
    trainList.append(selectedTrainFeatures6)
    testList.append(selectedTestFeatures6)
    valList.append(selectedValFeatures6)
    #15
    trainList.append(trainFeaturesList[6])
    testList.append(testFeaturesList[6])
    valList.append(valFeaturesList[6])
    #16
    selectedTrainFeatures7, indicesFeatures7 = get_selected_features(trainFeaturesList[6], y_train, 12)
    selectedTestFeatures7 = testFeaturesList[6][:, indicesFeatures7]
    selectedValFeatures7 = testFeaturesList[6][:, indicesFeatures7]
    trainList.append(selectedTrainFeatures7)
    testList.append(selectedTestFeatures7)
    valList.append(selectedValFeatures7)
    selectedNormTrainFeatures1 = get_norm_features(selectedTrainFeatures1b)
    selectedNormTestFeatures1 = get_norm_features(selectedTestFeatures1b)
    selectedNormValFeatures1 = get_norm_features(selectedValFeatures1b)

    #17
    trainNormFeatures2 = get_norm_features(trainFeaturesList[1].detach().numpy())
    testNormFeatures2 = get_norm_features(testFeaturesList[1].detach().numpy())
    valNormFeatures2 = get_norm_features(valFeaturesList[1].detach().numpy())
    concatenatedTrainFeatures2 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures2), dim=1)
    concatenatedTestFeatures2 = torch.cat((selectedNormTestFeatures1, testNormFeatures2), dim=1)
    concatenatedValFeatures2 = torch.cat((selectedNormValFeatures1, valNormFeatures2), dim=1)
    trainList.append(concatenatedTrainFeatures2)
    testList.append(concatenatedTestFeatures2)
    valList.append(concatenatedValFeatures2)
    #18
    trainNormFeatures3 = get_norm_features(trainFeaturesList[2].detach().numpy())
    testNormFeatures3 = get_norm_features(testFeaturesList[2].detach().numpy())
    valNormFeatures3 = get_norm_features(valFeaturesList[2].detach().numpy())
    concatenatedTrainFeatures3 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures3), dim=1)
    concatenatedTestFeatures3 = torch.cat((selectedNormTestFeatures1, testNormFeatures3), dim=1)
    concatenatedValFeatures3 = torch.cat((selectedNormValFeatures1, valNormFeatures3), dim=1)
    trainList.append(concatenatedTrainFeatures3)
    testList.append(concatenatedTestFeatures3)
    valList.append(concatenatedValFeatures3)
    #19
    trainNormFeatures4 = get_norm_features(trainFeaturesList[3].detach().numpy())
    testNormFeatures4 = get_norm_features(testFeaturesList[3].detach().numpy())
    valNormFeatures4 = get_norm_features(valFeaturesList[3].detach().numpy())
    concatenatedTrainFeatures4 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures4), dim=1)
    concatenatedTestFeatures4 = torch.cat((selectedNormTestFeatures1, testNormFeatures4), dim=1)
    concatenatedValFeatures4 = torch.cat((selectedNormValFeatures1, valNormFeatures4), dim=1)
    trainList.append(concatenatedTrainFeatures4)
    testList.append(concatenatedTestFeatures4)
    valList.append(concatenatedValFeatures4)
    #20
    trainNormFeatures5 = get_norm_features(trainFeaturesList[4].detach().numpy())
    testNormFeatures5 = get_norm_features(testFeaturesList[4].detach().numpy())
    valNormFeatures5 = get_norm_features(valFeaturesList[4].detach().numpy())
    concatenatedTrainFeatures5 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures5), dim=1)
    concatenatedTestFeatures5 = torch.cat((selectedNormTestFeatures1, testNormFeatures5), dim=1)
    concatenatedValFeatures5 = torch.cat((selectedNormValFeatures1, valNormFeatures5), dim=1)
    trainList.append(concatenatedTrainFeatures5)
    testList.append(concatenatedTestFeatures5)
    valList.append(concatenatedValFeatures5)

    #21
    trainNormFeatures6 = get_norm_features(trainFeaturesList[5].detach().numpy())
    testNormFeatures6 = get_norm_features(testFeaturesList[5].detach().numpy())
    valNormFeatures6 = get_norm_features(valFeaturesList[5].detach().numpy())
    concatenatedTrainFeatures6 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures6), dim=1)
    concatenatedTestFeatures6 = torch.cat((selectedNormTestFeatures1, testNormFeatures6), dim=1)
    concatenatedValFeatures6 = torch.cat((selectedNormValFeatures1, valNormFeatures6), dim=1)
    trainList.append(concatenatedTrainFeatures6)
    testList.append(concatenatedTestFeatures6)
    valList.append(concatenatedValFeatures6)

    #22
    trainNormFeatures7 = get_norm_features(trainFeaturesList[6].detach().numpy())
    testNormFeatures7 = get_norm_features(testFeaturesList[6].detach().numpy())
    valNormFeatures7 = get_norm_features(valFeaturesList[6].detach().numpy())
    concatenatedTrainFeatures7 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures7), dim=1)
    concatenatedTestFeatures7 = torch.cat((selectedNormTestFeatures1, testNormFeatures7), dim=1)
    concatenatedValFeatures7 = torch.cat((selectedNormValFeatures1, valNormFeatures7), dim=1)
    trainList.append(concatenatedTrainFeatures7)
    testList.append(concatenatedTestFeatures7)
    valList.append(concatenatedValFeatures7)

    trainList_tensors = [torch.tensor(item) for item in trainList]
    testList_tensors = [torch.tensor(item) for item in testList]
    valList_tensors = [torch.tensor(item) for item in valList]

    with open(file_path1, 'wb') as file:
        pickle.dump(trainList_tensors, file)
    with open(file_path2, 'wb') as file:
        pickle.dump(testList_tensors, file)
    with open(file_path3, 'wb') as file:
        pickle.dump(valList_tensors, file)

  trainList_tensors = [torch.tensor(item) for item in trainList]
  testList_tensors = [torch.tensor(item) for item in testList]
  valList_tensors = [torch.tensor(item) for item in valList]


1. Prep data - normalize and create data loader

In [104]:
# for item in trainFeaturesList:
#     print(item.shape)

In [105]:
def to_tensor(data):
    if isinstance(data, torch.Tensor):
        return data
    elif isinstance(data, np.ndarray):
        return torch.tensor(data)
    else:
        raise TypeError(f"Unsupported data type: {type(data)}")
        
def prep_data(features, labels, isOversample):
    num_classes = 7

    if isOversample:
        X_set, Y_set = oversample_data(features, labels, num_classes)
    else:
        X_set, Y_set = features, labels

    if isinstance(X_set, torch.Tensor):
        X_tensor = X_set.float()
    else:
        X_tensor = torch.tensor(X_set, dtype=torch.float32)
    
    if isinstance(Y_set, torch.Tensor):
        Y_tensor = Y_set.long()
    else:
        Y_tensor = torch.tensor(Y_set, dtype=torch.long)

    unique_labels, label_counts = np.unique(Y_set, return_counts=True)
    dataset = TensorDataset(X_tensor, Y_tensor)

    return X_tensor, Y_tensor

2. Training

Backup

In [106]:
# def model_train1(X_set, Y_set, num_epochs=20, batch_size=32, loss_difference_threshold=0.01, 
#                  hidden_dims=[256, 128], dropout_rate=0.5, lr=0.0001, 
#                  optimizer_class=optim.Adam, criterion_class=nn.CrossEntropyLoss, matchAtt
#                  ):
#     output_dim = 7  # Number of classes
#     model = MyNetwork(len(X_set[0]), hidden_dims, output_dim, dropout_rate)
#     criterion = criterion_class()
#     optimizer = optimizer_class(model.parameters(), lr=lr)
#     loss_history = []
#     accuracy_history = []
# #     print_interval = 1  # Print tqdm every epoch
#     previous_loss = float('inf')

#     # Create dataset and dataloader
#     dataset = TensorDataset(X_set, Y_set)
#     dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
#     epoch_num = num_epochs
#     for epoch in range(num_epochs):
#         total_loss = 0.0
#         correct_predictions = 0
#         total_instances = 0
#         with tqdm(total=len(dataloader), desc=f'Epoch {epoch+1}/{num_epochs}', leave=False) as pbar:
#             for inputs, labels in dataloader:
#                 inputs = inputs.float()  # Ensure inputs are float32
#                 labels = labels.long()   # Ensure labels are long
#                 optimizer.zero_grad()
# #                 TODO
#                 outputs = model(inputs)
#                 outputs = outputs.squeeze()
#                 labels = labels.squeeze()
#                 loss = criterion(outputs, labels)
                
#                 # Check for NaN loss values
#                 if torch.isnan(loss):
#                     print("NaN loss encountered. Skipping this batch.")
#                     break
                
#                 # Apply gradient clipping
#                 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
                
#                 loss.backward()
#                 optimizer.step()
#                 total_loss += loss.item()
#                 _, predicted = torch.max(outputs, dim=1)
#                 correct_predictions += (predicted == labels).sum().item()
#                 total_instances += labels.size(0)
#                 pbar.update(1)

#         epoch_loss = total_loss / total_instances
#         epoch_accuracy = correct_predictions / total_instances
#         loss_history.append(epoch_loss)
#         accuracy_history.append(epoch_accuracy)

#         if epoch > 0 and abs(epoch_loss - previous_loss) < loss_difference_threshold:
#             epoch_num = epoch
#             break

#         previous_loss = epoch_loss

#     return model, epoch_num


In [107]:
def shuffle_data(X_set, Y_set):
    indices = np.arange(len(X_set))
    np.random.shuffle(indices)
    return X_set[indices], Y_set[indices]

In [108]:
def model_train1(X_set=None, Y_set=None, num_epochs=50, loss_difference_threshold=0.0001, 
                 hidden_dims=[64, 32], dropout_rate=0.5, lr=0.0001, 
                 optimizer_class=optim.Adam, criterion_class=nn.CrossEntropyLoss, 
                 nodalAtt=None, umask=None, seq_len=None, no_cuda=True, ranges=None):
    
    input_dim = X_set.shape[1] if len(X_set) > 0 else 0
    model = MyNetwork(input_dim=input_dim, hidden_dims=hidden_dims, output_dim=7, dropout_rate=0.5)
    criterion = criterion_class()
    optimizer = optimizer_class(model.parameters(), lr=lr)
    loss_history = []
    accuracy_history = []
    previous_loss = float('inf')

    epoch_num = num_epochs

    for epoch in range(num_epochs):
        total_loss = 0.0
        correct_predictions = 0
        total_instances = 0
        
        # Shuffle the dataset at the beginning of each epoch
        X_set, Y_set = shuffle_data(X_set, Y_set)
        
        with tqdm(total=len(ranges), desc=f'Epoch {epoch+1}/{num_epochs}', leave=False) as pbar:
            for i, (start, end) in enumerate(ranges):
                inputs = X_set[start:end+1].float()
                labels = Y_set[start:end+1].long()
                
                optimizer.zero_grad()
                last_idx = seq_len[i][0]
                
                umask_slice = umask[i][0][:last_idx].unsqueeze(0)
#                 print("inputs: ", inputs)
#                 print("seq_len: ", seq_len[i])
#                 print("umask_slice: ", umask_slice)
                outputs = model(inputs, nodalAtt, seq_len[i], umask_slice)
                outputs = outputs.squeeze()
                labels = labels.squeeze()
                
                log_prob = F.log_softmax(outputs, 1)
                
                loss = criterion(log_prob, labels)
                
                # Check for NaN loss values
                if torch.isnan(loss):
                    print("NaN loss encountered. Skipping this batch.")
                    break
                
                # Apply gradient clipping
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
                
                if nodalAtt:
                    loss.backward(retain_graph=True)
                else:
                    loss.backward(retain_graph=False)
                
                optimizer.step()
                total_loss += loss.item()
                _, predicted = torch.max(outputs, dim=1)
                correct_predictions += (predicted == labels).sum().item()
                total_instances += labels.size(0)
                pbar.update(1)

        epoch_loss = total_loss / total_instances
        epoch_accuracy = correct_predictions / total_instances
        loss_history.append(epoch_loss)
        accuracy_history.append(epoch_accuracy)

        if epoch > 0 and abs(epoch_loss - previous_loss) < loss_difference_threshold:
            epoch_num = epoch
            break

        previous_loss = epoch_loss

    return model, epoch_num

Backup

In [109]:
def model_train2(X_set=None, Y_set=None, num_epochs=20, loss_difference_threshold=0.001, 
                 hidden_dims=128, dropout_rate=0.5, lr=0.0001, 
                 optimizer_class=optim.Adam, criterion_class=nn.CrossEntropyLoss, 
                 nodalAtt=None, umask=None, seq_len=None, no_cuda=True, ranges=None):
    
    output_dim = 7  # Number of classes
    input_dim = X_set.shape[1] if len(X_set) > 0 else 0
    model = FCClassifier(input_dim, hidden_dims, output_dim, dropout_rate)
    criterion = criterion_class()
    optimizer = optimizer_class(model.parameters(), lr=lr)
    loss_history = []
    accuracy_history = []
    previous_loss = float('inf')

    epoch_num = num_epochs

    for epoch in range(num_epochs):
        total_loss = 0.0
        correct_predictions = 0
        total_instances = 0
        
        # Shuffle the dataset at the beginning of each epoch
        X_set, Y_set = shuffle_data(X_set, Y_set)
        
        with tqdm(total=len(ranges), desc=f'Epoch {epoch+1}/{num_epochs}', leave=False) as pbar:
            for i, (start, end) in enumerate(ranges):
                inputs = X_set[start:end+1].float()
                labels = Y_set[start:end+1].long()
                
                optimizer.zero_grad()
                last_idx = seq_len[i][0]
                
                umask_slice = umask[i][0][:last_idx].unsqueeze(0)

#                 print("inputs: ", inputs)
#                 print("seq_len: ", seq_len[i])
#                 print("umask_slice: ", umask_slice)
                outputs = model(inputs, nodalAtt, seq_len[i], umask_slice)
                outputs = outputs.squeeze()
                labels = labels.squeeze()
#                 print(outputs.shape, labels.shape)
                log_prob = F.log_softmax(outputs, 1)
                
                loss = criterion(log_prob, labels)
                
                # Check for NaN loss values
                if torch.isnan(loss):
                    print("NaN loss encountered. Skipping this batch.")
                    break
                
                # Apply gradient clipping
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
                if nodalAtt:
                    loss.backward(retain_graph=True)
                else:
                    loss.backward(retain_graph=False)
                    
                optimizer.step()
                
                total_loss += loss.item()
                _, predicted = torch.max(log_prob, dim=1)
                correct_predictions += (predicted == labels).sum().item()
                total_instances += labels.size(0)
                pbar.update(1)
                
        epoch_loss = total_loss / total_instances
        epoch_accuracy = correct_predictions / total_instances
        loss_history.append(epoch_loss)
        accuracy_history.append(epoch_accuracy)
        
        if epoch > 0:
            loss_diff = abs(epoch_loss - previous_loss)
            if loss_diff < loss_difference_threshold:
                print(f"Training stopped early at epoch {epoch+1}.")
                print(f"Loss difference ({loss_diff}) is below the threshold ({loss_difference_threshold}).")
                epoch_num = epoch + 1
                break
        
        previous_loss = epoch_loss
    
    return model, epoch_num

In [110]:
def classify_emotions(model=None, X_set=None, Y_set=None, typeSet=None, 
                      isSimpleFC=False, i_dict=None,
                      nodalAtt=None, umask=None, seq_len=None, no_cuda=True, ranges=None):
    # Ensure the model is in evaluation mode
    model.eval()
    
    # Initialize empty lists to store predictions and true labels
    all_predictions = []
    all_labels = []
    i = 0
    # Iterate over the given ranges
    for start, end in ranges:
        X_batch = X_set[start:end+1].float()
        Y_batch = Y_set[start:end+1].long()

#         if X_batch.dtype != torch.float32:
#             X_batch = X_batch.float()

        # Use no_grad to save memory and computations
        with torch.no_grad():
            inputs = X_batch.to('cuda' if torch.cuda.is_available() and not no_cuda else 'cpu')
            labels = Y_batch.to('cuda' if torch.cuda.is_available() and not no_cuda else 'cpu')
            last_idx = seq_len[i][0]
            
            umask_slice = umask[i][0][:last_idx].unsqueeze(0)

            outputs = model(inputs, nodalAtt, seq_len[i], umask_slice)

            outputs = outputs.squeeze()
            labels = labels.squeeze()

            log_prob = F.log_softmax(outputs, 1)

            _, predicted = torch.max(log_prob, 1)

            # Append the predictions and labels to the lists
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            i = i+1

    df_predictions = pd.DataFrame({
        'true_label': all_labels,
        'predicted_label': all_predictions
    })
    
    
    file_name = f"data/dump/{dataset_path}/BERT_data_for_classifier/{dictKey[i_dict]}_predictedTest.pkl"
    with open(file_name, 'wb') as file:
        pickle.dump(df_predictions, file)
        
    # Generate the classification report
    report = classification_report(all_labels, all_predictions, target_names=label_decoder.values(), 
                                   output_dict=True, zero_division=0, digits=4)
    print(report)
    # Calculate the required metrics
    accuracy = report['accuracy']
    recall = report['weighted avg']['recall']
    weighted_f1 = report['weighted avg']['f1-score']
    f1_micro = report.get('micro avg', {}).get('f1-score', accuracy)
    f1_macro = report.get('macro avg', {}).get('f1-score', 0.0) 

    if typeSet == "validation":
        print("Classified: ", dictKey[i_dict])
    
    return dictKey[i_dict], typeSet, isSimpleFC, accuracy, recall, weighted_f1, f1_micro, f1_macro

In [111]:
# i = 0
# for trainSet, testSet, valSet, _, _, _ in tqdm(dataLoader, desc="Encoding Progress", unit="batch"):
#     print(i, type(trainSet))
#     if isinstance(trainSet, list):
#         print(type(trainSet[0]))
#         sample = trainSet[0]
#         print(sample.shape)
#     else:
#         print(trainSet.squeeze(0).shape)
#     i = i+1

In [112]:
def getGraphComponents(file_path):
    checkFile = os.path.isfile(file_path)
    if checkFile:
        with open(file_path, 'rb') as file:
            umask, \
            seq_lengths,\
            features, \
            edge_index, \
            edge_norm, \
            edge_type, \
            edge_index_lengths = pickle.load(file)  
    else:
        return None
    
    return umask, \
            seq_lengths,\
            features, \
            edge_index, \
            edge_norm, \
            edge_type, \
            edge_index_lengths

In [113]:
file_path1 = 'embed/' + dataset_path + '/pre_h_prime_BERT_train.pkl'
file_path2 = 'embed/' + dataset_path + '/pre_h_prime_BERT_test.pkl'
file_path3 = 'embed/' + dataset_path + '/pre_h_prime_BERT_dev.pkl'

train_umask, train_seq_lengths, train_features, \
    _, _, _, _ = getGraphComponents(file_path1)

test_umask, test_seq_lengths, test_features, \
    _, _, _, _ = getGraphComponents(file_path2)

val_umask, val_seq_lengths, val_features, \
    _, _, _, _ = getGraphComponents(file_path3)

UpdateJune 16

In [114]:
print(len(train_umask), len(train_seq_lengths))
print(train_seq_lengths[1])

1588 1588
[2]


In [115]:
def getSpeakersAndRanges(file_path):
    with open(file_path, "rb") as file:
        encodedSpeakers, ranges = pickle.load(file)
    file.close()
    return encodedSpeakers, ranges

file_path1 = "data/dump/" + dataset_path + "/speaker_encoder_train.pkl"
file_path2 = "data/dump/" + dataset_path + "/speaker_encoder_test.pkl"
file_path3 = "data/dump/" + dataset_path + "/speaker_encoder_dev.pkl"

encodedSpeakersTrain, rangesTrain = getSpeakersAndRanges(file_path1)
encodedSpeakersTest, rangesTest = getSpeakersAndRanges(file_path2)
encodedSpeakersDev, rangesDev = getSpeakersAndRanges(file_path3)

In [116]:
# for item in rangesTrain:
#     if item[0] == item[1]:
        
#         print("Dup detected", item[0], " vs ", item[1])

Sample run

Verifying the attention in batch before training

In [117]:
# class MatchingAttention(nn.Module):
#     def __init__(self, mem_dim, cand_dim, alpha_dim, att_type='general2'):
#         super(MatchingAttention, self).__init__()
#         self.mem_dim = mem_dim
#         self.cand_dim = cand_dim
#         self.alpha_dim = alpha_dim
#         self.att_type = att_type

#         if self.att_type == 'general2':
#             self.transform = nn.Linear(self.mem_dim, self.cand_dim * self.alpha_dim)

#     def forward(self, M, x, mask):
#         M_ = M.permute(1, 2, 0)  # (batch, mem_dim, seq_len)
#         x_ = self.transform(x).unsqueeze(1)  # (batch, 1, cand_dim * alpha_dim)
#         mask_ = mask.unsqueeze(2).repeat(1, 1, self.mem_dim).transpose(1, 2)  # (batch, mem_dim, seq_len)
        
#         M_ = M_ * mask_
#         alpha = torch.bmm(x_, M_)  # (batch, 1, seq_len)
        
#         alpha = F.softmax(alpha, dim=-1)  # Apply softmax to get attention weights
#         attended = torch.bmm(alpha, M.permute(1, 0, 2))  # (batch, 1, mem_dim)
        
#         return attended.squeeze(1), alpha
    
# def attentive_node_features(emotions, seq_lengths, umask, matchatt_layer):
#     max_len = max(seq_lengths)
#     batch_size = len(seq_lengths)
#     mem_dim = emotions.size(1)

#     padded_emotions = []
#     for i in range(batch_size):
#         length = seq_lengths[i]
#         # Assuming emotions is already a 2D tensor of shape (seq_len, mem_dim)
#         padded_emotion = F.pad(emotions[:length], (0, 0, 0, max_len - length), "constant", 0)
#         padded_emotions.append(padded_emotion)

#     emotions_padded = torch.stack(padded_emotions, dim=1)  # (max_len, batch_size, mem_dim)
    
#     att_emotions = []
#     alpha_list = []
#     for t in range(max_len):
#         att_em, alpha = matchatt_layer(emotions_padded, emotions_padded[t], umask)
#         att_emotions.append(att_em)
#         alpha_list.append(alpha)

#     att_emotions = torch.stack(att_emotions, dim=0)  # (max_len, batch_size, mem_dim)

#     # Remove the singleton dimension for batch size 1
#     att_emotions = att_emotions.squeeze(1)  # (seq_len, mem_dim)

#     return att_emotions, alpha_list
    
# class MyNetwork(nn.Module):
#     def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate):
#         super(MyNetwork, self).__init__()
#         self.fc1 = nn.Linear(input_dim, hidden_dims[0])  # Adjust input_dim to match flattened shape
#         self.activation1 = nn.ReLU()
#         self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
#         self.activation2 = nn.ReLU()
#         self.dropout = nn.Dropout(dropout_rate)
#         self.fc3 = nn.Linear(hidden_dims[1], output_dim)
#         self.matchatt = MatchingAttention(mem_dim=64, cand_dim=64, alpha_dim=1, att_type='general2')

#     def forward(self, x, nodalAtt, seq_lengths, umask):
#         if nodalAtt:
#             att_emotions, _ = attentive_node_features(x, seq_lengths, umask, self.matchatt)

#             # Reshape att_emotions to have a 2D shape (batch_size, input_dim)
#             att_emotions = att_emotions.view(att_emotions.size(0), -1)
        
#         x = self.fc1(att_emotions)
#         x = self.activation1(x)
#         x = self.fc2(x)
#         x = self.activation2(x)
#         x = self.dropout(x)
#         x = self.fc3(x)
        
#         return x

# class FCClassifier(torch.nn.Module):
#     def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate):
#         super(FCClassifier, self).__init__()
#         self.linear1 = nn.Linear(input_dim, hidden_dim)
#         self.dropout = nn.Dropout(dropout_rate)
#         self.linear2 = nn.Linear(hidden_dim, output_dim)
#         self.matchatt = MatchingAttention(mem_dim=input_dim, cand_dim=input_dim, alpha_dim=1, att_type='general2')
#     def forward(self, x=None, nodalAtt=None, seq_lengths=None, umask=None, no_cuda=True):
#         if nodalAtt:
#             att_emotions, _ = attentive_node_features(x, seq_lengths, umask, self.matchatt)

#             # Reshape att_emotions to have a 2D shape (batch_size, input_dim)
#             att_emotions = att_emotions.view(att_emotions.size(0), -1)
            
#         x = F.relu(self.linear1(x))
#         x = self.dropout(x)
#         x = self.linear2(x)
#         return x

part a test case (uncomment the top part to see the result)

In [118]:
# seq_lengths = [14]  # Sequence length matches the input tensor
# umask = torch.tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
# inputs = torch.randn(14, 64)  # Example inputs, shape [seq_len, input_dim]

# # Initialize MatchingAttention
# matchatt_layer = MatchingAttention(mem_dim=inputs.shape[1], cand_dim=inputs.shape[1], alpha_dim=1, att_type='general2')

# # Call attentive_node_features
# att_emotions, alpha_list = attentive_node_features(inputs, seq_lengths, umask, matchatt_layer)

# # Print shapes for verification
# print("att_emotions shape:", att_emotions.shape)  # Should be (seq_len, mem_dim)
# print("alpha_list shape:", len(alpha_list))

In [119]:
seq_lengths = [14]
umask = torch.tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
inputs = torch.randn(14, 64)  # Example inputs, shape [seq_len, input_dim]

model = MyNetwork(input_dim=inputs.shape[1], hidden_dims=[128, 30], output_dim=7, dropout_rate=0.5)
outputs = model(inputs, True, seq_lengths, umask)
print(outputs.shape)  #.Expected torch.Size([14, 7])

torch.Size([14, 7])


part b test case (uncomment the part and the function to see the result)

In [120]:
model = FCClassifier(input_dim=inputs.shape[1], hidden_dim=64, output_dim=7, dropout_rate=0.5)
outputs = model(inputs, True, seq_lengths, umask)
print(outputs.shape)  # Expected torch.Size([14, 7])

torch.Size([14, 7])


end of sample experiment

Actual run

In [121]:
selected_combination

[0, 14]

In [122]:
# for item in rangesTrain:
#     if item[0] == item[1]:
#         print(item[0], item[1])

In [123]:
dataset = FeatureEngineeredDataset(trainList, testList, valList)
dataLoader = DataLoader(dataset, batch_size=1, shuffle=False)

file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/classifier_test_no_tuning_Df.pkl"
checkFile = os.path.isfile(file_path)

if checkFile: 
    with open(file_path, "rb") as file:
        df_results_sorted = pickle.load(file)
else:
    results = []
    num_epochs = 30
    i = 0
    for trainSet, testSet, valSet in dataLoader:
        if isinstance(trainSet, list):
            trainSet = trainSet[0].squeeze(0)
            testSet = testSet[0].squeeze(0)
            valSet = valSet[0].squeeze(0)
        else:
            trainSet = trainSet.squeeze(0)
            testSet = testSet.squeeze(0)
            valSet = valSet.squeeze(0)

        X_tensor = to_tensor(trainSet)
        Y_tensor = to_tensor(y_train)
        
        print("Curerntly at", dictKey[i])
#         print(X_tensor.shape, Y_tensor.shape)
#         model1, _,  = model_train1(X_set=X_tensor, Y_set=Y_tensor, num_epochs=num_epochs, 
#                                  umask=train_umask, nodalAtt=nodalAttn[i],
#                                seq_len=train_seq_lengths, ranges=rangesTrain)

#         result1 = classify_emotions(model=model1, X_set=X_tensor, Y_set=Y_tensor,  typeSet='train', 
#                                    isSimpleFC=False, i_dict=i, 
#                                    nodalAtt=nodalAttn[i],  umask=train_umask, seq_len=train_seq_lengths,
#                                    ranges=rangesTrain )
#         results.append(result1)
        if i in selected_combination:
            model2, _,  = model_train2(X_set=X_tensor, Y_set=Y_tensor, num_epochs=num_epochs, 
                                     umask=train_umask, nodalAtt=nodalAttn[i],
                                   seq_len=train_seq_lengths, ranges=rangesTrain)

            X_tensor = to_tensor(testSet)
            Y_tensor = to_tensor(y_test)
            result2 = classify_emotions(model=model2, X_set=X_tensor, Y_set=Y_tensor,  typeSet='test', 
                                       isSimpleFC=True, i_dict=i, 
                                       nodalAtt=nodalAttn[i],  umask=test_umask, seq_len=test_seq_lengths,
                                       ranges=rangesTest)
    #         result2 = classify_emotions(model=model2, X_set=X_tensor, Y_set=Y_tensor,  typeSet='train', 
    #                                    isSimpleFC=True, i_dict=i, 
    #                                    nodalAtt=nodalAttn[i],  umask=train_umask, seq_len=train_seq_lengths,
    #                                    ranges=rangesTrain)
            results.append(result2)
        else:
            print("This is skipped")
        i = i+1

    columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', 'Weighted-F1', 'F1-micro', 'F1-macro']
    df_results = pd.DataFrame(results, columns=columns)
    df_results_sorted = df_results.sort_values(by='Weighted-F1', ascending=False)

    with open(file_path, 'wb') as file:
        pickle.dump(df_results_sorted, file)

Curerntly at bert


                                                                                                                       

Training stopped early at epoch 20.
Loss difference (0.0008872029990878993) is below the threshold (0.001).
{'anger': {'precision': 0.391304347826087, 'recall': 0.06, 'f1-score': 0.10404624277456648, 'support': 300.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 70.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 51.0}, 'joy': {'precision': 0.38961038961038963, 'recall': 0.22900763358778625, 'f1-score': 0.28846153846153844, 'support': 262.0}, 'neutral': {'precision': 0.501219512195122, 'recall': 0.9330306469920545, 'f1-score': 0.6521221737405791, 'support': 881.0}, 'sadness': {'precision': 0.6, 'recall': 0.0797872340425532, 'f1-score': 0.14084507042253522, 'support': 188.0}, 'surprise': {'precision': 0.47368421052631576, 'recall': 0.06818181818181818, 'f1-score': 0.11920529801324503, 'support': 132.0}, 'accuracy': 0.49044585987261147, 'macro avg': {'precision': 0.3365454943082735, 'recall': 0.19571533325774457, 'f1-score': 0.1863829

                                                                                                                       

Training stopped early at epoch 5.
Loss difference (0.0008307281798548005) is below the threshold (0.001).
{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 300.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 70.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 51.0}, 'joy': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 262.0}, 'neutral': {'precision': 0.4676220806794055, 'recall': 1.0, 'f1-score': 0.637251356238698, 'support': 881.0}, 'sadness': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 188.0}, 'surprise': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 132.0}, 'accuracy': 0.4676220806794055, 'macro avg': {'precision': 0.06680315438277222, 'recall': 0.14285714285714285, 'f1-score': 0.09103590803409971, 'support': 1884.0}, 'weighted avg': {'precision': 0.21867041033893644, 'recall': 0.4676220806794055, 'f1-score': 0.297992805120113, 'support': 1884.0}}
Curerntly 

In [124]:
df_results_sorted

Unnamed: 0,data_combination,typeSet,isSimpleFC,Accuracy,Recall,Weighted-F1,F1-micro,F1-macro
0,bert,test,True,0.490446,0.490446,0.384036,0.490446,0.186383
1,egat,test,True,0.467622,0.467622,0.297993,0.467622,0.091036


-----------------------------------------------------------------------

<h4> Select top 10 unique data combinations then tune

In [125]:
# top_10_combinations

In [126]:
max_iterations = 9
counter = 0
combination1 = []
combination2 = []
seen_combinations = set()

for idx, row in df_results_sorted.iterrows():
    if counter >= max_iterations:
        break

    if row['data_combination'] in seen_combinations:
        continue

    if row['isSimpleFC']:
        combination2.append(row['data_combination'])
    else:
        combination1.append(row['data_combination'])

    seen_combinations.add(row['data_combination'])
    counter += 1

# Display the results
print("Combination 1 (isSimpleFC=False):", combination1)
print("Combination 2 (isSimpleFC=True):", combination2)

Combination 1 (isSimpleFC=False): []
Combination 2 (isSimpleFC=True): ['bert', 'egat']


In [127]:
indices1 = [key for key, value in dictKey.items() if value in combination1]
indices2 = [key for key, value in dictKey.items() if value in combination2]

print("Indices for isSimpleFC=False:", indices1)
print("Indices for isSimpleFC=True:", indices2)

Indices for isSimpleFC=False: []
Indices for isSimpleFC=True: [0, 14]


In [128]:
selectedTrainDeepList = [trainList[i] for i in indices1]
selectedTestDeepList = [testList[i] for i in indices1]
selectedValDeepList = [valList[i] for i in indices1]

len(selectedTrainDeepList)

0

In [129]:
indices1

[]

In [130]:
# for trainSet in selectedTrainList:
#     print(type(trainSet))

In [131]:
selectedTrainList = [trainList[i] for i in indices2]
selectedTestList = [testList[i] for i in indices2]
selectedValList = [valList[i] for i in indices2]

len(selectedTrainList)

2

<h4> Tuning using random parameters

In [132]:
# it should call both model_train1 and 2

def objective_func(X_train, X_test, X_val, y_train, y_test, y_val, hyperparams, i_dict, isSimpleFC,
                  train_umask, test_umask, val_umask,
                  train_seq_lengths, test_seq_lengths, val_seq_lengths,
                  rangesTrain, rangesTest, rangesDev, nodalAtt):
    results = []
    hyperparams_string = (
        f'num_epochs={hyperparams["num_epochs"]} '
        f'loss_difference_threshold={hyperparams["loss_difference_threshold"]} '
        f'hidden_dims={hyperparams["hidden_dims"]} '
        f'dropout_rate={hyperparams["dropout_rate"]} '
        f'learning_rate={hyperparams["learning_rate"]} '
        f'optimizers={hyperparams["optimizers"]} '
        f'criteria={hyperparams["criteria"]}'
    )    
    print(hyperparams_string)
            
    X_train_tensor = to_tensor(X_train)
    y_train_tensor = to_tensor(y_train).long()
    X_val_tensor = to_tensor(X_val)
    y_val_tensor = to_tensor(y_val).long()
    X_test_tensor = to_tensor(X_test)
    y_test_tensor = to_tensor(y_test).long()
# train
    start_time = time.time()
    if isSimpleFC:
        model, num_epoch = model_train2(X_set=X_train_tensor, Y_set=y_train_tensor, 
                            num_epochs=hyperparams["num_epochs"], loss_difference_threshold=hyperparams["loss_difference_threshold"], 
                            hidden_dims=hyperparams["hidden_dims"], dropout_rate=hyperparams["dropout_rate"],
                            lr=hyperparams["learning_rate"], optimizer_class=hyperparams["optimizers"], 
                            criterion_class=hyperparams["criteria"],
                            umask=train_umask, seq_len=train_seq_lengths, ranges=rangesTrain, nodalAtt=nodalAtt)        
#     else:
#         model, num_epoch = model_train1(X_train_tensor, y_train_tensor, hyperparams["num_epochs"],
#                             hyperparams["batch_size"], hyperparams["loss_difference_threshold"], 
#                             hyperparams["hidden_dims"], hyperparams["dropout_rate"],
#                             hyperparams["learning_rate"], hyperparams["optimizers"], hyperparams["criteria"])
    end_time = time.time()
    elapsed_time = end_time - start_time
# val
    result = classify_emotions(model=model, X_set=X_val_tensor, Y_set=y_val_tensor, typeSet='validation', 
                               isSimpleFC=isSimpleFC, i_dict=i_dict,
                               nodalAtt=nodalAtt, umask=val_umask, seq_len=val_seq_lengths, ranges=rangesDev)
    elapsed_time = time.time() - start_time
    
    result = list(result)
# test

    hyperparams_string = f'num_epochs={hyperparams["num_epochs"]}-loss_difference_threshold={hyperparams["loss_difference_threshold"]}-hidden_dims={hyperparams["hidden_dims"]}-dropout_rate={hyperparams["dropout_rate"]}-learning_rate={hyperparams["learning_rate"]}-optimizers={hyperparams["optimizers"]}-criteria={hyperparams["criteria"]}'
    result.append(elapsed_time)
    result.append(hyperparams_string)
    result.append(num_epoch)
    results.append(result)
# test
    result = classify_emotions(model=model, X_set=X_test_tensor, Y_set=y_test_tensor, typeSet='test', 
                               isSimpleFC=isSimpleFC, i_dict=i_dict,
                               nodalAtt=nodalAtt, umask=test_umask, seq_len=test_seq_lengths, ranges=rangesTest)
    result = list(result)
    result.append(elapsed_time)
    result.append(hyperparams_string)
    result.append(num_epoch)
    results.append(result)
    
#     result = classify_emotions(model, X_test_tensor, y_test_tensor, \
#                                'test', isSimpleFC, i_dict)
    
#     result = list(result)
#     result.append(elapsed_time)
#     result.append(hyperparams_string)
#     result.append(num_epoch)
#     results.append(result)
    
    columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
               'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch']
    df = pd.DataFrame(results, columns=columns)
    df_results_sorted = df.sort_values(by='data_combination', ascending=False)
    
    return df_results_sorted


# def objective_func(X_train, X_test, X_val, 
#                y_train, y_test, y_val, hyperparams, i_dict):

In [133]:
def random_search(X_train=None, X_test=None, X_val=None, 
                  y_train=None, y_test=None, y_val=None, 
                  param_grid=None, isSimpleFC=True, i_dict=None,
                  train_umask=None, test_umask=None, val_umask=None,
                  train_seq_lengths=None, test_seq_lengths=None, val_seq_lengths=None,
                  rangesTrain=None, rangesTest=None, rangesDev=None, nodalAtt=False
                 ):
    sub_total_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
               'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)
    MAX_EVALS = 5
    for i in range(MAX_EVALS):
        hyperparams = {k: random.sample(v, 1)[0] for k, v in param_grid.items()}

        try:
            new_results = objective_func(X_train, X_test, X_val,  y_train, y_test, y_val, hyperparams, i_dict, isSimpleFC,  
                                        train_umask, test_umask, val_umask,
                                        train_seq_lengths, test_seq_lengths, val_seq_lengths,
                                        rangesTrain, rangesTest, rangesDev, nodalAtt)
            sub_total_results = pd.concat([sub_total_results, new_results], ignore_index=True)
            
        except Exception as e:
            print(f"Error with hyperparams {hyperparams}: {e}")
            continue
    
    # Sort with best score on top
    return sub_total_results 

In [134]:
param_grid1 = {
    'num_epochs': [20, 30, 40],
    'loss_difference_threshold': [0.001, 0.0001],
    'hidden_dims': [[256, 128], [128, 64], [64, 32]],
    'dropout_rate': [0.3, 0.5, 0.7],
    'learning_rate': [0.001, 0.0001, 0.00001],
    'optimizers': [optim.Adam, optim.SGD],
    'criteria': [nn.CrossEntropyLoss, nn.NLLLoss]
}
param_grid2 = {
    'num_epochs': [20, 30, 40],
    'loss_difference_threshold': [0.001, 0.0001],
    'hidden_dims': [128, 256, 512],
    'dropout_rate': [0.3, 0.5, 0.7],
    'learning_rate': [0.001, 0.0001],
    'optimizers': [optim.Adam, optim.SGD],
    'criteria': [nn.CrossEntropyLoss, nn.NLLLoss]
}

<h5> First find the best hyperparameter combination for the DeepClassifier.

In [135]:
def hyperparamTuning(X_trainSet, X_testSet, X_valSet, y_train, y_test, y_val, isSimpleFC, param_grid, indices,
                    train_umask, test_umask, val_umask,
                    train_seq_lengths, test_seq_lengths, val_seq_lengths,
                    rangesTrain, rangesTest, rangesDev, nodalAttn):
    
    total_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
               'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)
    
    for i in indices:
        print("============", dictKey[i], "============")
        X_train = X_trainSet[i]
        X_test = X_testSet[i]
        X_val = X_valSet[i]

        sub_total_results = random_search(X_train, X_test, X_val, y_train, y_test, y_val,
                     param_grid, isSimpleFC, i,
                     train_umask, test_umask, val_umask,
                     train_seq_lengths, test_seq_lengths, val_seq_lengths,
                     rangesTrain, rangesTest, rangesDev, nodalAttn[i])

        total_results = pd.concat([sub_total_results, total_results], ignore_index=True)

    return total_results


Uncomment below next time

In [136]:
# file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/deep_classifier_tuned_Df.pkl"
# checkFile = os.path.isfile(file_path)

# if checkFile: 
#     with open(file_path, "rb") as file:
#         total_results1_sorted = pickle.load(file)
# else:
#     total_results1 = hyperparamTuning(selectedTrainDeepList, selectedTestDeepList, selectedValDeepList, \
#                                  y_train, y_test, y_val, False, param_grid1, indices1)
    
#     total_results1_sorted = total_results1.sort_values(by='Weighted-F1', ascending=False)
#     with open(file_path, 'wb') as file:
#         pickle.dump(total_results1_sorted, file)

In [137]:
# pd.set_option('display.max_rows', None)  # Show all rows
# pd.set_option('display.max_columns', None)  # Show all columns
# pd.set_option('display.width', None)  # Don't limit the width of the display
# pd.set_option('display.max_colwidth', None)  # Don't truncate column content

# total_results1_sorted

In [138]:
# dataset = FeatureEngineeredDataset(trainList, testList, valList)
# dataLoader = DataLoader(dataset, batch_size=1, shuffle=False)

# file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/classifier_test_no_tuning_Df.pkl"
# checkFile = os.path.isfile(file_path)

# if checkFile: 
#     with open(file_path, "rb") as file:
#         df_results_sorted = pickle.load(file)
# else:
#     results = []
#     num_epochs = 30
#     i = 0
#     for trainSet, testSet, valSet in dataLoader:
#         if isinstance(trainSet, list):
#             trainSet = trainSet[0].squeeze(0)
#             testSet = testSet[0].squeeze(0)
#             valSet = valSet[0].squeeze(0)
#         else:
#             trainSet = trainSet.squeeze(0)
#             testSet = testSet.squeeze(0)
#             valSet = valSet.squeeze(0)

#         X_tensor = to_tensor(trainSet)
#         Y_tensor = to_tensor(y_train)

#         print("Curerntly at", dictKey[i])
# #         model1, _,  = model_train1(X_set=X_tensor, Y_set=Y_tensor, num_epochs=num_epochs, 
# #                                  umask=train_umask, nodalAtt=nodalAttn[i],
# #                                seq_len=train_seq_lengths, ranges=rangesTrain)

# #         result1 = classify_emotions(model=model1, X_set=X_tensor, Y_set=Y_tensor,  typeSet='train', 
# #                                    isSimpleFC=False, i_dict=i, 
# #                                    nodalAtt=nodalAttn[i],  umask=train_umask, seq_len=train_seq_lengths,
# #                                    ranges=rangesTrain )
# #         results.append(result1)
#         if i in selected_combination:
#             model2, _,  = model_train2(X_set=X_tensor, Y_set=Y_tensor, num_epochs=num_epochs, 
#                                      umask=train_umask, nodalAtt=nodalAttn[i],
#                                    seq_len=train_seq_lengths, ranges=rangesTrain)

#             X_tensor = to_tensor(testSet)
#             Y_tensor = to_tensor(y_test)
#             result2 = classify_emotions(model=model2, X_set=X_tensor, Y_set=Y_tensor,  typeSet='test', 
#                                        isSimpleFC=True, i_dict=i, 
#                                        nodalAtt=nodalAttn[i],  umask=test_umask, seq_len=test_seq_lengths,
#                                        ranges=rangesTest)
#     #         result2 = classify_emotions(model=model2, X_set=X_tensor, Y_set=Y_tensor,  typeSet='train', 
#     #                                    isSimpleFC=True, i_dict=i, 
#     #                                    nodalAtt=nodalAttn[i],  umask=train_umask, seq_len=train_seq_lengths,
#     #                                    ranges=rangesTrain)
#             results.append(result2)
#         else:
#             print("This is skipped")
#         i = i+1

#     columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', 'Weighted-F1', 'F1-micro', 'F1-macro']
#     df_results = pd.DataFrame(results, columns=columns)
#     df_results_sorted = df_results.sort_values(by='Weighted-F1', ascending=False)

#     with open(file_path, 'wb') as file:
#         pickle.dump(df_results_sorted, file)

In [139]:
file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/simple_classifier_tuned_Df.pkl"
checkFile = os.path.isfile(file_path)

if checkFile: 
    with open(file_path, "rb") as file:
        total_results2_sorted = pickle.load(file)
else: 

    total_results2 = hyperparamTuning(trainList, testList, valList, 
                                     y_train, y_test, y_val, True, param_grid2, indices2,
                                     train_umask, test_umask, val_umask,
                                     train_seq_lengths, test_seq_lengths, val_seq_lengths,
                                     rangesTrain, rangesTest, rangesDev, nodalAttn)
    
    total_results2_sorted = total_results2.sort_values(by='Weighted-F1', ascending=False)
    with open(file_path, 'wb') as file:
        pickle.dump(total_results2_sorted, file)

num_epochs=20 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


  sub_total_results = pd.concat([sub_total_results, new_results], ignore_index=True)


{'anger': {'precision': 0.29347826086956524, 'recall': 0.19285714285714287, 'f1-score': 0.23275862068965517, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 29.0}, 'joy': {'precision': 0.3380281690140845, 'recall': 0.18045112781954886, 'f1-score': 0.23529411764705882, 'support': 133.0}, 'neutral': {'precision': 0.43144424131627057, 'recall': 0.7539936102236422, 'f1-score': 0.5488372093023256, 'support': 313.0}, 'sadness': {'precision': 0.4090909090909091, 'recall': 0.08571428571428572, 'f1-score': 0.14173228346456693, 'support': 105.0}, 'surprise': {'precision': 0.3225806451612903, 'recall': 0.28169014084507044, 'f1-score': 0.3007518796992481, 'support': 71.0}, 'accuracy': 0.39060568603213847, 'macro avg': {'precision': 0.2563746036360171, 'recall': 0.21352947249424142, 'f1-score': 0.20848201582897924, 'support': 809.0}, 'weighted avg': {'precision': 0.35469038540618

                                                                                                                       

Training stopped early at epoch 5.
Loss difference (9.170721795903614e-05) is below the threshold (0.0001).
{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 29.0}, 'joy': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133.0}, 'neutral': {'precision': 0.3868974042027194, 'recall': 1.0, 'f1-score': 0.5579322638146168, 'support': 313.0}, 'sadness': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 105.0}, 'surprise': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 71.0}, 'accuracy': 0.3868974042027194, 'macro avg': {'precision': 0.05527105774324563, 'recall': 0.14285714285714285, 'f1-score': 0.07970460911637382, 'support': 809.0}, 'weighted avg': {'precision': 0.14968960137880244, 'recall': 0.3868974042027194, 'f1-score': 0.21586254459082208, 'support': 809.0}}
Classifie

                                                                                                                       

Training stopped early at epoch 28.
Loss difference (5.0145251600075547e-05) is below the threshold (0.0001).
{'anger': {'precision': 0.2894736842105263, 'recall': 0.07857142857142857, 'f1-score': 0.12359550561797752, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 1.0, 'recall': 0.034482758620689655, 'f1-score': 0.06666666666666667, 'support': 29.0}, 'joy': {'precision': 0.35294117647058826, 'recall': 0.22556390977443608, 'f1-score': 0.27522935779816515, 'support': 133.0}, 'neutral': {'precision': 0.4239316239316239, 'recall': 0.792332268370607, 'f1-score': 0.5523385300668151, 'support': 313.0}, 'sadness': {'precision': 0.30612244897959184, 'recall': 0.14285714285714285, 'f1-score': 0.19480519480519481, 'support': 105.0}, 'surprise': {'precision': 0.35714285714285715, 'recall': 0.2112676056338028, 'f1-score': 0.26548672566371684, 'support': 71.0}, 'accuracy': 0.39555006180469715, 'macro avg': {'precision': 0.3899

                                                                                                                       

Training stopped early at epoch 22.
Loss difference (3.8991413212452475e-05) is below the threshold (0.0001).
{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 29.0}, 'joy': {'precision': 0.5, 'recall': 0.007518796992481203, 'f1-score': 0.014814814814814815, 'support': 133.0}, 'neutral': {'precision': 0.3878562577447336, 'recall': 1.0, 'f1-score': 0.5589285714285714, 'support': 313.0}, 'sadness': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 105.0}, 'surprise': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 71.0}, 'accuracy': 0.38813349814585907, 'macro avg': {'precision': 0.12683660824924764, 'recall': 0.1439312567132116, 'f1-score': 0.08196334089191233, 'support': 809.0}, 'weighted avg': {'precision': 0.23226082654400693, 'recall': 0.38813349814585907, 'f1-score': 0.2186835763

  total_results = pd.concat([sub_total_results, total_results], ignore_index=True)


{'anger': {'precision': 0.4838709677419355, 'recall': 0.10714285714285714, 'f1-score': 0.17543859649122806, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 29.0}, 'joy': {'precision': 0.42592592592592593, 'recall': 0.17293233082706766, 'f1-score': 0.24598930481283424, 'support': 133.0}, 'neutral': {'precision': 0.4183381088825215, 'recall': 0.9329073482428115, 'f1-score': 0.5776458951533135, 'support': 313.0}, 'sadness': {'precision': 0.9, 'recall': 0.08571428571428572, 'f1-score': 0.1565217391304348, 'support': 105.0}, 'surprise': {'precision': 0.1875, 'recall': 0.04225352112676056, 'f1-score': 0.06896551724137931, 'support': 71.0}, 'accuracy': 0.4227441285537701, 'macro avg': {'precision': 0.3450907146500547, 'recall': 0.1915643347219689, 'f1-score': 0.17493729326131285, 'support': 809.0}, 'weighted avg': {'precision': 0.4488781356146457, 'recall': 0.42274412855377

                                                                                                                       

{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 29.0}, 'joy': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133.0}, 'neutral': {'precision': 0.3868974042027194, 'recall': 1.0, 'f1-score': 0.5579322638146168, 'support': 313.0}, 'sadness': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 105.0}, 'surprise': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 71.0}, 'accuracy': 0.3868974042027194, 'macro avg': {'precision': 0.05527105774324563, 'recall': 0.14285714285714285, 'f1-score': 0.07970460911637382, 'support': 809.0}, 'weighted avg': {'precision': 0.14968960137880244, 'recall': 0.3868974042027194, 'f1-score': 0.21586254459082208, 'support': 809.0}}
Classified:  egat


  sub_total_results = pd.concat([sub_total_results, new_results], ignore_index=True)


{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 300.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 70.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 51.0}, 'joy': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 262.0}, 'neutral': {'precision': 0.4676220806794055, 'recall': 1.0, 'f1-score': 0.637251356238698, 'support': 881.0}, 'sadness': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 188.0}, 'surprise': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 132.0}, 'accuracy': 0.4676220806794055, 'macro avg': {'precision': 0.06680315438277222, 'recall': 0.14285714285714285, 'f1-score': 0.09103590803409971, 'support': 1884.0}, 'weighted avg': {'precision': 0.21867041033893644, 'recall': 0.4676220806794055, 'f1-score': 0.297992805120113, 'support': 1884.0}}
num_epochs=40 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class

                                                                                                                       

Training stopped early at epoch 3.
Loss difference (0.0002765315886798514) is below the threshold (0.001).
{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 29.0}, 'joy': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133.0}, 'neutral': {'precision': 0.3868974042027194, 'recall': 1.0, 'f1-score': 0.5579322638146168, 'support': 313.0}, 'sadness': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 105.0}, 'surprise': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 71.0}, 'accuracy': 0.3868974042027194, 'macro avg': {'precision': 0.05527105774324563, 'recall': 0.14285714285714285, 'f1-score': 0.07970460911637382, 'support': 809.0}, 'weighted avg': {'precision': 0.14968960137880244, 'recall': 0.3868974042027194, 'f1-score': 0.21586254459082208, 'support': 809.0}}
Classified

                                                                                                                       

{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 29.0}, 'joy': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133.0}, 'neutral': {'precision': 0.3868974042027194, 'recall': 1.0, 'f1-score': 0.5579322638146168, 'support': 313.0}, 'sadness': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 105.0}, 'surprise': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 71.0}, 'accuracy': 0.3868974042027194, 'macro avg': {'precision': 0.05527105774324563, 'recall': 0.14285714285714285, 'f1-score': 0.07970460911637382, 'support': 809.0}, 'weighted avg': {'precision': 0.14968960137880244, 'recall': 0.3868974042027194, 'f1-score': 0.21586254459082208, 'support': 809.0}}
Classified:  egat
{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 300.0}, 'disgust': {'precis

                                                                                                                       

{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 29.0}, 'joy': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133.0}, 'neutral': {'precision': 0.3868974042027194, 'recall': 1.0, 'f1-score': 0.5579322638146168, 'support': 313.0}, 'sadness': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 105.0}, 'surprise': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 71.0}, 'accuracy': 0.3868974042027194, 'macro avg': {'precision': 0.05527105774324563, 'recall': 0.14285714285714285, 'f1-score': 0.07970460911637382, 'support': 809.0}, 'weighted avg': {'precision': 0.14968960137880244, 'recall': 0.3868974042027194, 'f1-score': 0.21586254459082208, 'support': 809.0}}
Classified:  egat
{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 300.0}, 'disgust': {'precis

                                                                                                                       

Training stopped early at epoch 4.
Loss difference (0.0006122730517027497) is below the threshold (0.001).
{'anger': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 140.0}, 'disgust': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 18.0}, 'fear': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 29.0}, 'joy': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133.0}, 'neutral': {'precision': 0.3868974042027194, 'recall': 1.0, 'f1-score': 0.5579322638146168, 'support': 313.0}, 'sadness': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 105.0}, 'surprise': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 71.0}, 'accuracy': 0.3868974042027194, 'macro avg': {'precision': 0.05527105774324563, 'recall': 0.14285714285714285, 'f1-score': 0.07970460911637382, 'support': 809.0}, 'weighted avg': {'precision': 0.14968960137880244, 'recall': 0.3868974042027194, 'f1-score': 0.21586254459082208, 'support': 809.0}}
Classified

In [140]:
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Don't limit the width of the display
pd.set_option('display.max_colwidth', None)  # Don't truncate column content
    
total_results2_sorted

Unnamed: 0,data_combination,typeSet,isSimpleFC,Accuracy,Recall,Weighted-F1,F1-micro,F1-macro,train_time,hyperparams,num_epoch
11,bert,test,True,0.47293,0.47293,0.420293,0.47293,0.246378,157.463449,num_epochs=20-loss_difference_threshold=0.001-hidden_dims=512-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.NLLLoss'>,20
15,bert,test,True,0.471868,0.471868,0.40644,0.471868,0.217844,104.404705,num_epochs=40-loss_difference_threshold=0.0001-hidden_dims=128-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.NLLLoss'>,28
19,bert,test,True,0.490446,0.490446,0.385064,0.490446,0.186268,104.471232,num_epochs=20-loss_difference_threshold=0.0001-hidden_dims=256-dropout_rate=0.7-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.NLLLoss'>,20
10,bert,validation,True,0.390606,0.390606,0.336096,0.390606,0.208482,157.463449,num_epochs=20-loss_difference_threshold=0.001-hidden_dims=512-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.NLLLoss'>,20
14,bert,validation,True,0.39555,0.39555,0.331308,0.39555,0.21116,104.404705,num_epochs=40-loss_difference_threshold=0.0001-hidden_dims=128-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.NLLLoss'>,28
18,bert,validation,True,0.422744,0.422744,0.320658,0.422744,0.174937,104.471232,num_epochs=20-loss_difference_threshold=0.0001-hidden_dims=256-dropout_rate=0.7-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.NLLLoss'>,20
17,bert,test,True,0.467091,0.467091,0.299618,0.467091,0.093476,34.293842,num_epochs=30-loss_difference_threshold=0.0001-hidden_dims=256-dropout_rate=0.7-learning_rate=0.001-optimizers=<class 'torch.optim.sgd.SGD'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,22
5,egat,test,True,0.467622,0.467622,0.297993,0.467622,0.091036,1949.611947,num_epochs=30-loss_difference_threshold=0.0001-hidden_dims=128-dropout_rate=0.7-learning_rate=0.0001-optimizers=<class 'torch.optim.sgd.SGD'>-criteria=<class 'torch.nn.modules.loss.NLLLoss'>,30
7,egat,test,True,0.467622,0.467622,0.297993,0.467622,0.091036,2965.685751,num_epochs=40-loss_difference_threshold=0.0001-hidden_dims=128-dropout_rate=0.3-learning_rate=0.001-optimizers=<class 'torch.optim.sgd.SGD'>-criteria=<class 'torch.nn.modules.loss.NLLLoss'>,40
9,egat,test,True,0.467622,0.467622,0.297993,0.467622,0.091036,159.028963,num_epochs=20-loss_difference_threshold=0.001-hidden_dims=256-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.sgd.SGD'>-criteria=<class 'torch.nn.modules.loss.NLLLoss'>,4


### Use the tuned result to predict the test

In [141]:
hyperparams = {
    'num_epochs': 30,
    'loss_difference_threshold': 0.001,
    'hidden_dims': 512,
    'dropout_rate': 0.3,
    'learning_rate': 0.0001,
    'optimizers': optim.Adam,
    'criteria': nn.NLLLoss
}

tuned_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
           'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)

new_results = objective_func(trainList[0], testList[0], valList[0],  y_train, y_test, y_val, hyperparams, 0, True,  
                            train_umask, test_umask, val_umask,
                            train_seq_lengths, test_seq_lengths, val_seq_lengths,
                            rangesTrain, rangesTest, rangesDev, nodalAttn[0])
tuned_results = pd.concat([tuned_results, new_results], ignore_index=True)
tuned_results

num_epochs=30 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


                                                                                                                       

KeyboardInterrupt: 

rerun again to correct the model name in the df

In [None]:
hyperparams = {
    'num_epochs': 20,
    'loss_difference_threshold': 0.0001,
    'hidden_dims': 128,
    'dropout_rate': 0.5,
    'learning_rate': 0.0001,
    'optimizers': optim.Adam,
    'criteria': nn.NLLLoss
}

tuned_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
           'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)

new_results = objective_func(trainList[3], testList[3], valList[3],  y_train, y_test, y_val, hyperparams, 3, True,  
                            train_umask, test_umask, val_umask,
                            train_seq_lengths, test_seq_lengths, val_seq_lengths,
                            rangesTrain, rangesTest, rangesDev, nodalAttn[3])
tuned_results = pd.concat([tuned_results, new_results], ignore_index=True)
tuned_results

In [None]:
hyperparams = {
    'num_epochs': 40,
    'loss_difference_threshold': 0.0001,
    'hidden_dims': 128,
    'dropout_rate': 0.5,
    'learning_rate': 0.0001,
    'optimizers': optim.Adam,
    'criteria': nn.NLLLoss
}

tuned_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
           'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)

new_results = objective_func(trainList[1], testList[1], valList[1],  y_train, y_test, y_val, hyperparams, 1, True,  
                            train_umask, test_umask, val_umask,
                            train_seq_lengths, test_seq_lengths, val_seq_lengths,
                            rangesTrain, rangesTest, rangesDev, nodalAttn[1])
tuned_results = pd.concat([tuned_results, new_results], ignore_index=True)
tuned_results

In [None]:
hyperparams = {
    'num_epochs': 40,
    'loss_difference_threshold': 0.001,
    'hidden_dims': 512,
    'dropout_rate': 0.7,
    'learning_rate': 0.001,
    'optimizers': optim.Adam,
    'criteria': nn.NLLLoss
}

tuned_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
           'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)

new_results = objective_func(trainList[2], testList[2], valList[2],  y_train, y_test, y_val, hyperparams, 2, True,  
                            train_umask, test_umask, val_umask,
                            train_seq_lengths, test_seq_lengths, val_seq_lengths,
                            rangesTrain, rangesTest, rangesDev, nodalAttn[2])
tuned_results = pd.concat([tuned_results, new_results], ignore_index=True)
tuned_results