"FC layers referenced from https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65"


In [88]:
import torch, time, os, pickle
import numpy as np
import torch.nn as nn
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset
import torch.optim as optim
from sklearn.metrics import precision_score, recall_score, f1_score, precision_recall_fscore_support
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest, f_classif, chi2
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objs as go
from sklearn.manifold import TSNE
import plotly.io as pio
from sklearn.utils import class_weight
import tqdm as notebook_tqdm
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report
from tqdm import tqdm
from graph_context_dataset import FeatureEngineeredDataset
import warnings
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Dense, Dropout
import random
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Make sure to specify which dataset to use

 - dataset_original
 - dataset_drop_noise
 - dataset_smote

In [2]:
dataset_path = "dataset_original"
# dataset_path = "dataset_drop_noise"
# dataset_path = "dataset_smote"

<h3> Declare functions

In [3]:
class MyNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate):
        super(MyNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.activation1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.activation2 = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.fc3 = nn.Linear(hidden_dims[1], output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.activation1(x)
        x = self.fc2(x)
        x = self.activation2(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [4]:
# class FCLayer(nn.Module):
#     def __init__(self, input_dim, output_dim):
#         super(FCLayer, self).__init__()
#         self.fc = nn.Linear(input_dim, output_dim)

#     def forward(self, x):
#         x = self.fc(x)
#         return x

# class ActivationLayer(nn.Module):
#     def __init__(self, activation_fn):
#         super(ActivationLayer, self).__init__()
#         self.activation_fn = activation_fn

#     def forward(self, x):
#         x = self.activation_fn(x)
#         return x

# def tanh(x):
#     return torch.tanh(x)

# def sigmoid(x):
#     return torch.sigmoid(x)
# # loss function and its derivative
# def mse(y_true, y_pred):
#     return np.mean(np.power(y_true - y_pred, 2))

# def mse_prime(y_true, y_pred):
#     return 2 * (y_pred - y_true) / y_true.size


In [5]:
def oversample_data(X_train, Y_train, num_classes):
    # Determine the class with the maximum number of instances
    max_class_count = np.max(np.bincount(Y_train))
    # Generate indices for oversampling each class
    indices_list = [np.where(Y_train == i)[0] for i in range(num_classes)]
    # Oversample minority classes to match the count of the majority class
    for i, indices in enumerate(indices_list):
        if len(indices) < max_class_count:
            # Calculate the number of instances to oversample for this class
            num_to_oversample = max_class_count - len(indices)
            # Randomly select instances with replacement to oversample
            oversampled_indices = np.random.choice(indices, size=num_to_oversample, replace=True)
            # Append the oversampled instances to the original data
            X_train = np.concatenate((X_train, X_train[oversampled_indices]), axis=0)
            Y_train = np.concatenate((Y_train, Y_train[oversampled_indices]), axis=0)
    return torch.tensor(X_train), torch.tensor(Y_train)


In [6]:
def concatenate_tensors(tensor_list):
    if not tensor_list:
        raise ValueError("The tensor list is empty")

    feature_dim = tensor_list[0].shape[1]
    for tensor in tensor_list:
        if tensor.shape[1] != feature_dim:
            raise ValueError("All tensors must have the same feature dimension")
    
    concatenated_tensor = torch.cat(tensor_list, dim=0)
    
    return concatenated_tensor

<h4> Import labels and label decoder

In [7]:
file_path = "data/dump/" + dataset_path + "/labels_train.pkl"
with open(file_path, 'rb') as file:
    y_train = pickle.load(file)
y_train = torch.tensor(y_train)

file_path = "data/dump/" + dataset_path + "/labels_test.pkl"
with open(file_path, 'rb') as file:
    y_test = pickle.load(file)
y_test = torch.tensor(y_test)

file_path = "data/dump/" + dataset_path + "/labels_dev.pkl"
with open(file_path, 'rb') as file:
    y_val = pickle.load(file)
y_val = torch.tensor(y_val)
    
file_path = 'data/dump/' + dataset_path + '/label_decoder.pkl'
with open(file_path, 'rb') as file:
    label_decoder = pickle.load(file)

<h4> Import the CNNBiLSTM base-node outputs

first we disregard the u' and directly train the h'

In [8]:
train_file_paths = [
    "embed/" + dataset_path + "/u_prime_BERT_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_DGCN_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_edgeAttr_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv2_edgeAttr_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_RGAT_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT-EGAT_train.pkl",
]

test_file_paths = [
    "embed/" + dataset_path + "/u_prime_BERT_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_DGCN_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_edgeAttr_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv2_edgeAttr_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_RGAT_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT-EGAT_test.pkl",
]

val_file_paths = [
    "embed/" + dataset_path + "/u_prime_BERT_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_DGCN_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_edgeAttr_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv2_edgeAttr_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_RGAT_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT-EGAT_dev.pkl",
]

dictKey = {
    0 : 'bert',
    1 : 'bert-select-few',
    2 : 'bert-select-mod',
    3 : 'bert-select-more',
    4 : 'dgcn',
    5 : 'dgcn-select',
    6 : 'gatv1',
    7 : 'gatv1-select',
    8 : 'gatv1-edge',
    9 : 'gatv1-edge-select',
    10 : 'gatv2-edge',
    11 : 'gatv2-edge-select',
    12 : 'rgat',
    13 : 'rgat-select',
    14 : 'egat',
    15 : 'egat-select',
    16 : 'bert-select-mod-dgcn',
    17 : 'bert-select-mod-gatv1',
    18 : 'bert-select-mod-gatv1-edge',
    19 : 'bert-select-mod-gatv2-edge',
    20 : 'bert-select-mod-rgat',
    21 : 'bert-select-mod-egat',
}

<h4> Getting BERT and GAT outputs for all sets

In [9]:
trainFeaturesList = []
testFeaturesList = []
valFeaturesList = []

In [10]:
def concatenate_tensors(tensor_list):
    return torch.cat(tensor_list, dim=0)

for file_path in train_file_paths:
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
        if file_path != train_file_paths[-1]: 
            trainFeaturesList.append(concatenate_tensors(data))
        else:
            trainFeaturesList.append(data)
            
for file_path in test_file_paths:
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
        if file_path != test_file_paths[-1]: 
            testFeaturesList.append(concatenate_tensors(data))
        else:
            testFeaturesList.append(data)
            
for file_path in val_file_paths:
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
        if file_path != val_file_paths[-1]: 
            valFeaturesList.append(concatenate_tensors(data))
        else:
            valFeaturesList.append(data)

EDA

In [11]:
# # Checking the structure of graph
# for n in range(10):
#     tensor_data_np = tensor_utterances[n].detach().numpy()

#     # Plot the data
#     plt.figure(figsize=(10, 5))
#     plt.plot(range(len(tensor_data_np)), tensor_data_np)
#     plt.title('Line Graph of Tensor Data')
#     plt.xlabel('Index')
#     plt.ylabel('Value')
#     plt.show()


In [12]:
# # Normalize the h' (1st GAT)
# data = cherry_picked_nodes.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# # Print or analyze the similarity matrix
# # print(similarities)
# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


In [13]:
# # Normalize the h' (2nd GAT)
# data = all_node_feats.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# # Print or analyze the similarity matrix
# # print(similarities)
# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


In [14]:
# # Normalize the u' or updated_representations
# data = tensor_utterances.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


<h3> Feature Selection and creating data combination for classifiers

Define select feature function

In [15]:
def get_norm_features(encoded_features):
    scaler = MinMaxScaler()
#       "FeatureSelected+BERT+GAT: ", concatenatedRepresentationTrain2.shape, "\n",
    features_scaled = scaler.fit_transform(encoded_features)
    return torch.tensor(features_scaled)

def get_selected_features(encoded_features, labels, top_n):
    if torch.is_tensor(encoded_features):
        encoded_features = encoded_features.detach().cpu().numpy()
    
    scaler = MinMaxScaler()
    features_scaled = scaler.fit_transform(encoded_features)

    selector = SelectKBest(score_func=f_classif, k=100)

    top_features_by_class = {}
    top_scores = {}

    for label in range(7):
        # Create a binary mask indicating instances belonging to the current class
        mask = (labels == label)

        # SelectKBest with chi2 as the scoring function
        selector = SelectKBest(score_func=chi2, k=top_n)  # Select top 20 features
        selector.fit(features_scaled, mask)  # Fit SelectKBest to the data
        # Get the indices of the top 20 features
        top_features_indices = np.argsort(selector.scores_)[-top_n:]
        scores = selector.scores_[top_features_indices]
        # Store the indices in the dictionary
        top_features_by_class[label] = top_features_indices
        top_scores[label] = scores

    concatenated_features_set = set()
    for label, indices in top_features_by_class.items():
        concatenated_features_set.update(indices)

    concatenated_features_indices = list(concatenated_features_set)

    concatenated_features_indices = np.array(concatenated_features_indices)

    # Select the desired features
    selected_features = encoded_features[:, concatenated_features_indices]
#     print(selected_features.shape)
    return selected_features, concatenated_features_indices

In [16]:
# pca = PCA(n_components=2)
# pca_result = pca.fit_transform(selected_features.detach().numpy())

# # Plot the PCA result with color-coded labels
# plt.figure(figsize=(8, 6))
# for label in np.unique(Y_train):
#     indices = Y_train == label
#     plt.scatter(pca_result[indices, 0], pca_result[indices, 1], label=f'{label_decoder[label]}', alpha=0.5)
#     plt.title('PCA Visualization of Selected Utterance Embeddings (Train) with Color-Coded Labels')
#     plt.xlabel('Principal Component 1')
#     plt.ylabel('Principal Component 2')
#     plt.legend()
#     plt.grid(True)
#     plt.show()

3d plottly

In [17]:
# X_train = selected_features
# X_train = X_train / np.linalg.norm(X_train, axis=1, keepdims=True)
# # Perform T-SNE dimensionality reduction
# tsne = TSNE(n_components=3, random_state=42)
# X_tsne = tsne.fit_transform(X_train)

# # Create a Plotly scatter plot
# fig = go.Figure(data=[go.Scatter3d(
#     x=X_tsne[:, 0],
#     y=X_tsne[:, 1],
#     z=X_tsne[:, 2],
#     mode='markers',
#     marker=dict(
#         size=3,
#         color=Y_train,  # Assuming Y_train contains labels for coloring
#         colorscale='Viridis',  # You can choose a different colorscale
#         opacity=0.8
#     )
# )])

# # Update layout
# fig.update_layout(title='3D T-SNE Plot', autosize=False,
#                   width=800, height=800)

# # Show the plot
# fig.show()

In [18]:
# Save the plot as an HTML file
# pio.write_html(fig, '3d_tsne_plot.html')

Now prepare the data that will be ued to train the classifier, there are 20 combinations. And pick top 7 combinations yielding top F1 weighted-score

In [19]:
trainList = []
testList = []
valList = []

file_path1 = "data/dump/" + dataset_path + "/BERT_data_for_classifier/trainList.pkl"
file_path2 = "data/dump/" + dataset_path + "/BERT_data_for_classifier/testList.pkl"
file_path3 = "data/dump/" + dataset_path + "/BERT_data_for_classifier/valList.pkl"

checkFile1 = os.path.isfile(file_path1)
checkFile2 = os.path.isfile(file_path2)
checkFile3 = os.path.isfile(file_path3)

if checkFile1 and checkFile2 and checkFile3: 
    with open(file_path1, "rb") as file:
        trainList = pickle.load(file)
    with open(file_path2, "rb") as file:
        testList = pickle.load(file)
    with open(file_path3, "rb") as file:
        valList = pickle.load(file)
else:
    trainFeaturesList.append(data)
    #1
    trainList.append(trainFeaturesList[0])
    testList.append(testFeaturesList[0])
    valList.append(valFeaturesList[0])
    #2
    selectedTrainFeatures1a, indicesFeatures1a = get_selected_features(trainFeaturesList[0], y_train, 16)
    selectedTestFeatures1a = testFeaturesList[0][:, indicesFeatures1a]
    selectedValFeatures1a = valFeaturesList[0][:, indicesFeatures1a]
    trainList.append(selectedTrainFeatures1a)
    testList.append(selectedTestFeatures1a)
    valList.append(selectedValFeatures1a)
    #3
    selectedTrainFeatures1b, indicesFeatures1b = get_selected_features(trainFeaturesList[0], y_train, 32)
    selectedTestFeatures1b = testFeaturesList[0][:, indicesFeatures1b]
    selectedValFeatures1b = valFeaturesList[0][:, indicesFeatures1b]
    trainList.append(selectedTrainFeatures1b)
    testList.append(selectedTestFeatures1b)
    valList.append(selectedValFeatures1b)
    #4
    selectedTrainFeatures1c, indicesFeatures1c = get_selected_features(trainFeaturesList[0], y_train, 64)
    selectedTestFeatures1c = testFeaturesList[0][:, indicesFeatures1c]
    selectedValFeatures1c = valFeaturesList[0][:, indicesFeatures1c]
    trainList.append(selectedTrainFeatures1c)
    testList.append(selectedTestFeatures1c)
    valList.append(selectedValFeatures1c)
    #5
    trainList.append(trainFeaturesList[1])
    testList.append(testFeaturesList[1])
    valList.append(valFeaturesList[1])
    #6
    selectedTrainFeatures2, indicesFeatures2 = get_selected_features(trainFeaturesList[1], y_train, 12)
    selectedTestFeatures2 = testFeaturesList[1][:, indicesFeatures2]
    selectedValFeatures2 = valFeaturesList[1][:, indicesFeatures2]
    trainList.append(selectedTrainFeatures2)
    testList.append(selectedTestFeatures2)
    valList.append(selectedValFeatures2)
    #7
    trainList.append(trainFeaturesList[2])
    testList.append(testFeaturesList[2])
    valList.append(valFeaturesList[2])
    #8
    selectedTrainFeatures3, indicesFeatures3 = get_selected_features(trainFeaturesList[2], y_train, 12)
    selectedTestFeatures3 = testFeaturesList[2][:, indicesFeatures3]
    selectedValFeatures3 = valFeaturesList[2][:, indicesFeatures3]
    trainList.append(selectedTrainFeatures3)
    testList.append(selectedTestFeatures3)
    valList.append(selectedValFeatures3)
    #9
    trainList.append(trainFeaturesList[3])
    testList.append(testFeaturesList[3])
    valList.append(valFeaturesList[3])
    #10
    selectedTrainFeatures4, indicesFeatures4 = get_selected_features(trainFeaturesList[3], y_train, 12)
    selectedTestFeatures4 = testFeaturesList[3][:, indicesFeatures4]
    selectedValFeatures4 = valFeaturesList[3][:, indicesFeatures4]
    trainList.append(selectedTrainFeatures4)
    testList.append(selectedTestFeatures4)
    valList.append(selectedValFeatures4)
    #11
    trainList.append(trainFeaturesList[4])
    testList.append(testFeaturesList[4])
    valList.append(valFeaturesList[4])
    #12
    selectedTrainFeatures5, indicesFeatures5 = get_selected_features(trainFeaturesList[4], y_train, 12)
    selectedTestFeatures5 = testFeaturesList[4][:, indicesFeatures5]
    selectedValFeatures5 = valFeaturesList[4][:, indicesFeatures5]
    trainList.append(selectedTrainFeatures5)
    testList.append(selectedTestFeatures5)
    valList.append(selectedValFeatures5)
    #13
    trainList.append(trainFeaturesList[5])
    testList.append(testFeaturesList[5])
    valList.append(valFeaturesList[5])
    #14
    selectedTrainFeatures6, indicesFeatures6 = get_selected_features(trainFeaturesList[5], y_train, 12)
    selectedTestFeatures6 = testFeaturesList[5][:, indicesFeatures6]
    selectedValFeatures6 = valFeaturesList[5][:, indicesFeatures6]
    trainList.append(selectedTrainFeatures6)
    testList.append(selectedTestFeatures6)
    valList.append(selectedValFeatures6)
    #15
    trainList.append(trainFeaturesList[6])
    testList.append(testFeaturesList[6])
    valList.append(valFeaturesList[6])
    #16
    selectedTrainFeatures7, indicesFeatures7 = get_selected_features(trainFeaturesList[6][0], y_train, 12)
    selectedTestFeatures7 = testFeaturesList[6][0][:, indicesFeatures7]
    selectedValFeatures7 = testFeaturesList[6][0][:, indicesFeatures7]
    trainList.append(selectedTrainFeatures7)
    testList.append(selectedTestFeatures7)
    valList.append(selectedValFeatures7)
    selectedNormTrainFeatures1 = get_norm_features(selectedTrainFeatures1b)
    selectedNormTestFeatures1 = get_norm_features(selectedTestFeatures1b)
    selectedNormValFeatures1 = get_norm_features(selectedValFeatures1b)

    #17
    trainNormFeatures2 = get_norm_features(trainFeaturesList[1].detach().numpy())
    testNormFeatures2 = get_norm_features(testFeaturesList[1].detach().numpy())
    valNormFeatures2 = get_norm_features(valFeaturesList[1].detach().numpy())
    concatenatedTrainFeatures2 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures2), dim=1)
    concatenatedTestFeatures2 = torch.cat((selectedNormTestFeatures1, testNormFeatures2), dim=1)
    concatenatedValFeatures2 = torch.cat((selectedNormValFeatures1, valNormFeatures2), dim=1)
    trainList.append(concatenatedTrainFeatures2)
    testList.append(concatenatedTestFeatures2)
    valList.append(concatenatedValFeatures2)
    #18
    trainNormFeatures3 = get_norm_features(trainFeaturesList[2].detach().numpy())
    testNormFeatures3 = get_norm_features(testFeaturesList[2].detach().numpy())
    valNormFeatures3 = get_norm_features(valFeaturesList[2].detach().numpy())
    concatenatedTrainFeatures3 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures3), dim=1)
    concatenatedTestFeatures3 = torch.cat((selectedNormTestFeatures1, testNormFeatures3), dim=1)
    concatenatedValFeatures3 = torch.cat((selectedNormValFeatures1, valNormFeatures3), dim=1)
    trainList.append(concatenatedTrainFeatures3)
    testList.append(concatenatedTestFeatures3)
    valList.append(concatenatedValFeatures3)
    #19
    trainNormFeatures4 = get_norm_features(trainFeaturesList[3].detach().numpy())
    testNormFeatures4 = get_norm_features(testFeaturesList[3].detach().numpy())
    valNormFeatures4 = get_norm_features(valFeaturesList[3].detach().numpy())
    concatenatedTrainFeatures4 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures4), dim=1)
    concatenatedTestFeatures4 = torch.cat((selectedNormTestFeatures1, testNormFeatures4), dim=1)
    concatenatedValFeatures4 = torch.cat((selectedNormValFeatures1, valNormFeatures4), dim=1)
    trainList.append(concatenatedTrainFeatures4)
    testList.append(concatenatedTestFeatures4)
    valList.append(concatenatedValFeatures4)
    #20
    trainNormFeatures5 = get_norm_features(trainFeaturesList[4].detach().numpy())
    testNormFeatures5 = get_norm_features(testFeaturesList[4].detach().numpy())
    valNormFeatures5 = get_norm_features(valFeaturesList[4].detach().numpy())
    concatenatedTrainFeatures5 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures5), dim=1)
    concatenatedTestFeatures5 = torch.cat((selectedNormTestFeatures1, testNormFeatures5), dim=1)
    concatenatedValFeatures5 = torch.cat((selectedNormValFeatures1, valNormFeatures5), dim=1)
    trainList.append(concatenatedTrainFeatures5)
    testList.append(concatenatedTestFeatures5)
    valList.append(concatenatedValFeatures5)

    #21
    trainNormFeatures6 = get_norm_features(trainFeaturesList[5].detach().numpy())
    testNormFeatures6 = get_norm_features(testFeaturesList[5].detach().numpy())
    valNormFeatures6 = get_norm_features(valFeaturesList[5].detach().numpy())
    concatenatedTrainFeatures6 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures6), dim=1)
    concatenatedTestFeatures6 = torch.cat((selectedNormTestFeatures1, testNormFeatures6), dim=1)
    concatenatedValFeatures6 = torch.cat((selectedNormValFeatures1, valNormFeatures6), dim=1)
    trainList.append(concatenatedTrainFeatures6)
    testList.append(concatenatedTestFeatures6)
    valList.append(concatenatedValFeatures6)

    #22
    trainNormFeatures7 = get_norm_features(trainFeaturesList[6][0].detach().numpy())
    testNormFeatures7 = get_norm_features(testFeaturesList[6][0].detach().numpy())
    valNormFeatures7 = get_norm_features(valFeaturesList[6][0].detach().numpy())
    concatenatedTrainFeatures7 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures7), dim=1)
    concatenatedTestFeatures7 = torch.cat((selectedNormTestFeatures1, testNormFeatures7), dim=1)
    concatenatedValFeatures7 = torch.cat((selectedNormValFeatures1, valNormFeatures7), dim=1)
    trainList.append(concatenatedTrainFeatures7)
    testList.append(concatenatedTestFeatures7)
    valList.append(concatenatedValFeatures7)

    with open(file_path1, 'wb') as file:
        pickle.dump(trainList, file)
    with open(file_path2, 'wb') as file:
        pickle.dump(testList, file)
    with open(file_path3, 'wb') as file:
        pickle.dump(valList, file)

1. Prep data - normalize and create data loader

In [20]:
def prep_data(features, labels, isOversample):
    num_instances = len(features)
    num_classes = 7

    # Rescale input features
    # selected_features = concatenated_representation / np.linalg.norm(concatenated_representation, axis=1, keepdims=True)

    # Apply data resampling (oversampling) to balance class distribution
    if isOversample:
        X_set, Y_set = oversample_data(features, labels, num_classes)
    else:
        X_set, Y_set = features, labels

    # Calculate class weights for class weighting
#     class_counts = np.bincount(labels)
#     total_instances = np.sum(class_counts)
    # class_weights = torch.tensor([total_instances / (num_classes * count) for count in class_counts], dtype=torch.float32)

    # Convert data to PyTorch tensors
    X_tensor = torch.tensor(X_set.clone().detach(), dtype=torch.float32).clone().detach()
    Y_tensor = torch.tensor(Y_set.clone().detach(), dtype=torch.long).clone().detach()
    # print(X_train_tensor.shape, Y_train_tensor.shape)
    # X_train_tensor = torch.tensor(selected_features)
    # Y_train_tensor = torch.tensor(y_train)

    unique_labels, label_counts = np.unique(Y_set, return_counts=True)

    # Print the counts for each unique label
#     for label, count in zip(unique_labels, label_counts):
#         print(f"Label {label_decoder[label]}: {count} occurrences")

#     print(X_tensor.shape, Y_tensor.shape)
    # Create a TensorDataset
    dataset = TensorDataset(X_tensor, Y_tensor)

    return X_tensor, Y_tensor

2. Training

In [191]:
def model_train1(X_set, Y_set, num_epochs=20, batch_size=64, loss_difference_threshold=0.01, 
                 hidden_dims=[256, 128], dropout_rate=0.5, lr=0.0001, optimizer_class=optim.Adam, criterion_class=nn.CrossEntropyLoss):
    output_dim = 7  # Number of classes
    model = MyNetwork(len(X_set[0]), hidden_dims, output_dim, dropout_rate)
    criterion = criterion_class()
    optimizer = optimizer_class(model.parameters(), lr=lr)
    loss_history = []
    accuracy_history = []
    print_interval = 1  # Print tqdm every epoch
    previous_loss = float('inf')

    # Create dataset and dataloader
    dataset = TensorDataset(X_set, Y_set)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    epoch_num = num_epochs
    for epoch in range(num_epochs):
        total_loss = 0.0
        correct_predictions = 0
        total_instances = 0
        with tqdm(total=len(dataloader), desc=f'Epoch {epoch+1}/{num_epochs}', leave=False) as pbar:
            for inputs, labels in dataloader:
                inputs = inputs.float()  # Ensure inputs are float32
                labels = labels.long()   # Ensure labels are long
                outputs = model(inputs)
                outputs = outputs.squeeze()
                labels = labels.squeeze()
                loss = criterion(outputs, labels)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
                _, predicted = torch.max(outputs, dim=1)
                correct_predictions += (predicted == labels).sum().item()
                total_instances += labels.size(0)
                pbar.update(1)

        epoch_loss = total_loss / total_instances
        epoch_accuracy = correct_predictions / total_instances
        loss_history.append(epoch_loss)
        accuracy_history.append(epoch_accuracy)

        if epoch > 0 and abs(epoch_loss - previous_loss) < loss_difference_threshold:
            epoch_num = epoch
            break

        previous_loss = epoch_loss

    return model, epoch_num

In [210]:
def model_train2(X_set, y_set, num_epochs=20, batch_size=128, early_stopping_threshold=0.01,
                 hidden_dim=128, dropout_prob=0.5, learning_rate=0.0005, optimizer_class=optim.Adam, criterion_class=nn.CrossEntropyLoss):
    input_dim = len(X_set[0])  # Size of the input features
    output_dim = 7  # Number of classes

    model = FCClassifier(input_dim, hidden_dim, output_dim, dropout_prob)
    criterion = criterion_class()
    optimizer = optimizer_class(model.parameters(), lr=learning_rate)

    # Create DataLoader for batching
    dataset = TensorDataset(X_set, y_set)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    epoch_losses = []  # List to store loss values for each epoch
    epoch_num = num_epochs
    # Training loop
    with tqdm(total=num_epochs, unit="epoch", desc="Training") as tepoch:
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0

            for batch_features, batch_labels in dataloader:
                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                outputs = model(batch_features)
                loss = criterion(outputs, batch_labels)

                # Backward pass and optimize
                loss.backward()
                optimizer.step()

                # Update running loss
                running_loss += loss.item()

            # Calculate and store average loss for the epoch
            epoch_loss = running_loss / len(dataloader)
            epoch_losses.append(epoch_loss)

            # Update tqdm description
            tepoch.set_postfix(loss=epoch_loss)
            tepoch.update()

            # Check for early stopping
            if epoch > 0 and abs(epoch_losses[-2] - epoch_losses[-1]) < early_stopping_threshold:
                epoch_num = epoch
                break

    return model, epoch_num

In [121]:
def classify_emotions(model, X_tensor, Y_tensor, typeSet, isSimpleFC, i_dict):
    # Set the model to evaluation mode
    if X_tensor.dtype != torch.float32:
        X_tensor = X_tensor.float()
        
    model.eval()

    # Predict on the data
    with torch.no_grad():
        outputs = model(X_tensor)
        _, predicted = torch.max(outputs, 1)

    # Convert predicted tensor to numpy array
    predicted = predicted.cpu().numpy()
    Y_tensor = Y_tensor.cpu().numpy()

    # Calculate classification report
    report = classification_report(Y_tensor, predicted, target_names=label_decoder.values(), output_dict=True, zero_division=0)

    # Extract metrics
    accuracy = report['accuracy']
    recall = report['weighted avg']['recall']
    weighted_f1 = report['weighted avg']['f1-score']
    f1_micro = report.get('micro avg', {}).get('f1-score', accuracy)
    f1_macro = report.get('macro avg', {}).get('f1-score', 0.0) 
    
    if typeSet == "validation":
        print("Classified: ", dictKey[i_dict])
    
    return dictKey[i_dict], typeSet, isSimpleFC, accuracy, recall, weighted_f1, f1_micro, f1_macro


In [24]:
dataset = FeatureEngineeredDataset(trainList, testList, valList)
dataLoader = DataLoader(dataset, batch_size=1, shuffle=False)

In [25]:
i = 0
for trainSet, testSet, valSet in tqdm(dataLoader, desc="Encoding Progress", unit="batch"):
    print(i, type(trainSet))
    if isinstance(trainSet, list):
        print(type(trainSet[0]))
        sample = trainSet[0]
        print(sample.shape)
    else:
        print(trainSet.squeeze(0).shape)
    i = i+1

Encoding Progress: 100%|███████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 106.76batch/s]

0 <class 'torch.Tensor'>
torch.Size([12840, 768])
1 <class 'torch.Tensor'>
torch.Size([12840, 87])
2 <class 'torch.Tensor'>
torch.Size([12840, 159])
3 <class 'torch.Tensor'>
torch.Size([12840, 287])
4 <class 'torch.Tensor'>
torch.Size([12840, 64])
5 <class 'torch.Tensor'>
torch.Size([12840, 30])
6 <class 'torch.Tensor'>
torch.Size([12840, 64])
7 <class 'torch.Tensor'>
torch.Size([12840, 46])
8 <class 'torch.Tensor'>
torch.Size([12840, 64])
9 <class 'torch.Tensor'>
torch.Size([12840, 44])
10 <class 'torch.Tensor'>
torch.Size([12840, 64])
11 <class 'torch.Tensor'>
torch.Size([12840, 45])
12 <class 'torch.Tensor'>
torch.Size([12840, 64])
13 <class 'torch.Tensor'>
torch.Size([12840, 45])
14 <class 'list'>
<class 'torch.Tensor'>
torch.Size([1, 12840, 64])
15 <class 'torch.Tensor'>
torch.Size([12840, 40])
16 <class 'torch.Tensor'>
torch.Size([12840, 223])
17 <class 'torch.Tensor'>
torch.Size([12840, 223])
18 <class 'torch.Tensor'>
torch.Size([12840, 223])
19 <class 'torch.Tensor'>
torch.Size




<b> This is where value for isSimpleFC is decided

In [150]:
file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/classifier_test_no_tuning_Df.pkl"
checkFile = os.path.isfile(file_path)

if checkFile: 
    with open(file_path, "rb") as file:
        df_results_sorted = pickle.load(file)
else:
    results = []
    num_epochs = 50
    batch_size = 64
    i = 0

    for trainSet, testSet, valSet in dataLoader:
        if isinstance(trainSet, list):
            trainSet = trainSet[0].squeeze(0)
            testSet = testSet[0].squeeze(0)
            valSet = valSet[0].squeeze(0)
        else:
            trainSet = trainSet.squeeze(0)
            testSet = testSet.squeeze(0)
            valSet = valSet.squeeze(0)

        X_tensor, Y_tensor = prep_data(trainSet.clone().detach(), y_train, False)
#         deepFC
        model, _ = model_train1(X_tensor, Y_tensor, num_epochs, batch_size)
        result = classify_emotions(model, X_tensor.clone().detach(), Y_tensor.clone().detach(), 'train', False, i)
#         results.append(result)

        X_tensor, Y_tensor = prep_data(testSet.clone().detach(), y_test, False)
        result = classify_emotions(model, X_tensor.clone().detach(), Y_tensor.clone().detach(), 'test', False, i)
        results.append(result)

        i += 1

    i = 0
    for trainSet, testSet, valSet in dataLoader:
        if isinstance(trainSet, list):
            trainSet = trainSet[0].squeeze(0)
            testSet = testSet[0].squeeze(0)
            valSet = valSet[0].squeeze(0)
        else:
            trainSet = trainSet.squeeze(0)
            testSet = testSet.squeeze(0)
            valSet = valSet.squeeze(0)

        X_tensor, Y_tensor = prep_data(trainSet, y_train, False)
#         simpleFC
        model, _ = model_train2(X_tensor, Y_tensor, num_epochs, batch_size)

        result = classify_emotions(model, X_tensor, Y_tensor, 'train', True, i)
#         results.append(result)

        X_tensor, Y_tensor = prep_data(testSet, y_test, False)
        result = classify_emotions(model, X_tensor, Y_tensor, 'test', True, i)
        results.append(result)
        i += 1

    columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', 'Weighted-F1', 'F1-micro', 'F1-macro']
    df = pd.DataFrame(results, columns=columns)
    df_results_sorted = df.sort_values(by='Weighted-F1', ascending=False)

    with open(file_path, 'wb') as file:
        pickle.dump(df_results_sorted, file)

In [65]:
# tmp
# columns = list(df_results_sorted.columns)

# # Modify the 2nd and 3rd column names
# columns[1] = 'typeSet'
# columns[2] = 'isSimpleFC'

# # Assign the new column names back to the DataFrame
# df_results_sorted.columns = columns
# df_results_sorted["typeSet"] = "test"
# with open(file_path, 'wb') as file:
#     pickle.dump(df_results_sorted, file)

In [151]:
df_results_sorted

Unnamed: 0,data_combination,typeSet,isSimpleFC,Accuracy,Recall,Weighted-F1,F1-micro,F1-macro
22,bert,test,True,0.619706,0.619706,0.572623,0.619706,0.337923
25,bert-select-more,test,True,0.614118,0.614118,0.569297,0.614118,0.337338
2,bert-select-mod,test,False,0.572353,0.572353,0.560681,0.572353,0.38867
20,bert-select-mod-rgat,test,False,0.608529,0.608529,0.557835,0.608529,0.319851
24,bert-select-mod,test,True,0.611176,0.611176,0.557144,0.611176,0.320935
16,bert-select-mod-dgcn,test,False,0.603529,0.603529,0.557021,0.603529,0.319695
3,bert-select-more,test,False,0.565294,0.565294,0.555539,0.565294,0.384001
19,bert-select-mod-gatv2-edge,test,False,0.602059,0.602059,0.554937,0.602059,0.316779
0,bert,test,False,0.580882,0.580882,0.553987,0.580882,0.388461
21,bert-select-mod-egat,test,False,0.604412,0.604412,0.552006,0.604412,0.312812


<h4> Select top 10 unique data combinations then tune

In [152]:
top_10_combinations

['bert',
 'bert-select-more',
 'gatv1',
 'gatv2-edge-select',
 'gatv1-select',
 'gatv2-edge',
 'gatv1-edge-select',
 'gatv1-edge',
 'rgat',
 'dgcn-select']

In [153]:
max_iterations = 10
counter = 0
combination1 = []
combination2 = []
seen_combinations = set()

for idx, row in df_results_sorted.iterrows():
    if counter >= max_iterations:
        break

    if row['data_combination'] in seen_combinations:
        continue

    if row['isSimpleFC']:
        combination2.append(row['data_combination'])
    else:
        combination1.append(row['data_combination'])

    seen_combinations.add(row['data_combination'])
    counter += 1

# Display the results
print("Combination 1 (isSimpleFC=False):", combination1)
print("Combination 2 (isSimpleFC=True):", combination2)

Combination 1 (isSimpleFC=False): ['bert-select-mod', 'bert-select-mod-rgat', 'bert-select-mod-dgcn', 'bert-select-mod-gatv2-edge', 'bert-select-mod-egat', 'bert-select-mod-gatv1', 'bert-select-mod-gatv1-edge']
Combination 2 (isSimpleFC=True): ['bert', 'bert-select-more', 'bert-select-few']


In [154]:
indices1 = [key for key, value in dictKey.items() if value in combination1]
indices2 = [key for key, value in dictKey.items() if value in combination2]

print("Indices for isSimpleFC=False:", indices1)
print("Indices for isSimpleFC=True:", indices2)

Indices for isSimpleFC=False: [2, 16, 17, 18, 19, 20, 21]
Indices for isSimpleFC=True: [0, 1, 3]


In [155]:
selectedTrainDeepList = [trainList[i] for i in indices1]
selectedTestDeepList = [testList[i] for i in indices1]
selectedValDeepList = [valList[i] for i in indices1]

len(selectedTrainDeepList)

7

In [156]:
indices1

[2, 16, 17, 18, 19, 20, 21]

In [157]:
for trainSet in selectedTrainList:
    print(type(trainSet))

<class 'torch.Tensor'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [158]:
selectedTrainList = [trainList[i] for i in indices2]
selectedTestList = [testList[i] for i in indices2]
selectedValList = [valList[i] for i in indices2]

len(selectedTrainList)

3

<h4> Tuning using random parameters

In [163]:
# it should call both model_train1 and 2

def objective_func(X_train, X_test, X_val, 
               y_train, y_test, y_val, hyperparams, i_dict, isSimpleFC):
    results = []
    hyperparams_string = (
        f'num_epochs={hyperparams["num_epochs"]} '
        f'batch_size={hyperparams["batch_size"]} '
        f'loss_difference_threshold={hyperparams["loss_difference_threshold"]} '
        f'hidden_dims={hyperparams["hidden_dims"]} '
        f'dropout_rate={hyperparams["dropout_rate"]} '
        f'learning_rate={hyperparams["learning_rate"]} '
        f'optimizers={hyperparams["optimizers"]} '
        f'criteria={hyperparams["criteria"]}'
    )    
    print(hyperparams_string)
    def to_tensor(data):
        if isinstance(data, torch.Tensor):
            return data
        elif isinstance(data, np.ndarray):
            return torch.tensor(data)
        else:
            raise TypeError(f"Unsupported data type: {type(data)}")
            
#     X_tensor, Y_tensor = prep_data(X_train.clone().detach(), y_train, False)
    X_train_tensor = to_tensor(X_train)
    y_train_tensor = to_tensor(y_train).long()
    X_val_tensor = to_tensor(X_val)
    y_val_tensor = to_tensor(y_val).long()
    X_test_tensor = to_tensor(X_test)
    y_test_tensor = to_tensor(y_test).long()
# train
    start_time = time.time()
    if isSimpleFC:
        model, num_epoch = model_train2(X_train_tensor, y_train_tensor, hyperparams["num_epochs"],
                            hyperparams["batch_size"], hyperparams["loss_difference_threshold"], 
                            hyperparams["hidden_dims"], hyperparams["dropout_rate"],
                            hyperparams["learning_rate"], hyperparams["optimizers"], hyperparams["criteria"])        
    else:
        model, num_epoch = model_train1(X_train_tensor, y_train_tensor, hyperparams["num_epochs"],
                            hyperparams["batch_size"], hyperparams["loss_difference_threshold"], 
                            hyperparams["hidden_dims"], hyperparams["dropout_rate"],
                            hyperparams["learning_rate"], hyperparams["optimizers"], hyperparams["criteria"])
    end_time = time.time()
    elapsed_time = end_time - start_time
# val
#     X_tensor, Y_tensor = prep_data(X_val.clone().detach(), y_val, False)
    result = classify_emotions(model, X_val_tensor, y_val_tensor, \
                               'validation', isSimpleFC, i_dict)
    elapsed_time = time.time() - start_time
    
    result = list(result)
    hyperparams_string = f'num_epochs={hyperparams["num_epochs"]}-batch_size={hyperparams["batch_size"]}-loss_difference_threshold={hyperparams["loss_difference_threshold"]}-hidden_dims={hyperparams["hidden_dims"]}-dropout_rate={hyperparams["dropout_rate"]}-learning_rate={hyperparams["learning_rate"]}-optimizers={hyperparams["optimizers"]}-criteria={hyperparams["criteria"]}'
    result.append(elapsed_time)
    result.append(hyperparams_string)
    result.append(num_epoch)
    results.append(result)
    
# test
#     X_tensor, Y_tensor = prep_data(X_test.clone().detach(), y_test, False)
    result = classify_emotions(model, X_test_tensor, y_test_tensor, \
                               'test', isSimpleFC, i_dict)
    
    result = list(result)
    result.append(elapsed_time)
    result.append(hyperparams_string)
    result.append(num_epoch)
    results.append(result)
    
    columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
               'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch']
    df = pd.DataFrame(results, columns=columns)
    df_results_sorted = df.sort_values(by='data_combination', ascending=False)
    
    return df_results_sorted


# def objective_func(X_train, X_test, X_val, 
#                y_train, y_test, y_val, hyperparams, i_dict):

In [217]:
def random_search(X_train, X_test, X_val, \
                  y_train, y_test, y_val, \
                  param_grid, isSimpleFC, i_dict, MAX_EVALS = 15):
    
    sub_total_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
               'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)
    
    for i in range(MAX_EVALS):
        hyperparams = {k: random.sample(v, 1)[0] for k, v in param_grid.items()}

        try:
            new_results = objective_func(X_train, X_test, X_val,  y_train, y_test, y_val,
                                hyperparams, i_dict, isSimpleFC)
            sub_total_results = pd.concat([sub_total_results, new_results], ignore_index=True)
            
        except Exception as e:
            print(f"Error with hyperparams {hyperparams}: {e}")
            continue
    
    # Sort with best score on top
    return sub_total_results 

In [215]:
param_grid1 = {
    'num_epochs': [50, 80, 120],
    'batch_size': [1, 4, 32, 64],
    'loss_difference_threshold': [0.01, 0.001],
    'hidden_dims': [[256, 128], [128, 64], [64, 32]],
    'dropout_rate': [0.3, 0.5, 0.7],
    'learning_rate': [0.001, 0.0001, 0.00001],
    'optimizers': [optim.Adam, optim.SGD],
    'criteria': [nn.CrossEntropyLoss, nn.NLLLoss]
}
param_grid2 = {
    'num_epochs': [50, 80, 120],
    'batch_size': [1, 4, 32, 64],
    'loss_difference_threshold': [0.01, 0.001],
    'hidden_dims': [128, 256, 512],
    'dropout_rate': [0.3, 0.5, 0.7],
    'learning_rate': [0.001, 0.0001, 0.00001],
    'optimizers': [optim.Adam, optim.SGD],
    'criteria': [nn.CrossEntropyLoss, nn.NLLLoss]
}

<h5> First find the best hyperparameter combination for the DeepClassifier.

In [222]:
def hyperparamTuning(X_trainSet, X_testSet, X_valSet, y_train, y_test, y_val, isSimpleFC, param_grid, indices):
    total_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
               'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)
    for i in range(len(indices)):
        print("============ PART ", i, "============")
        X_train = X_trainSet[i]
        X_test = X_testSet[i]
        X_val = X_valSet[i]

        sub_total_results = random_search(X_train, X_test, X_val, y_train, y_test, y_val,
                     param_grid, isSimpleFC, indices[i])
        total_results = pd.concat([sub_total_results, total_results], ignore_index=True)

    return total_results


In [232]:
file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/deep_classifier_tuned_Df.pkl"
checkFile = os.path.isfile(file_path)

if checkFile: 
    with open(file_path, "rb") as file:
        total_results1_sorted = pickle.load(file)
else:
    total_results1 = hyperparamTuning(selectedTrainDeepList, selectedTestDeepList, selectedValDeepList, \
                                 y_train, y_test, y_val, False, param_grid1, indices1)
    
    total_results1_sorted = total_results1.sort_values(by='Weighted-F1', ascending=False)
    with open(file_path, 'wb') as file:
        pickle.dump(total_results1_sorted, file)

In [231]:
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Don't limit the width of the display
pd.set_option('display.max_colwidth', None)  # Don't truncate column content

total_results1_sorted

Unnamed: 0,data_combination,typeSet,isSimpleFC,Accuracy,Recall,Weighted-F1,F1-micro,F1-macro,train_time,hyperparams,num_epoch
167,bert-select-mod,test,False,0.607059,0.607059,0.551354,0.607059,0.313182,18.30645,"num_epochs=120-batch_size=4-loss_difference_threshold=0.01-hidden_dims=[128, 64]-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",2
135,bert-select-mod-dgcn,test,False,0.598529,0.598529,0.551045,0.598529,0.311214,4.62677,"num_epochs=120-batch_size=32-loss_difference_threshold=0.001-hidden_dims=[128, 64]-dropout_rate=0.3-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",3
19,bert-select-mod-egat,test,False,0.589412,0.589412,0.544219,0.589412,0.306741,114.906927,"num_epochs=120-batch_size=4-loss_difference_threshold=0.001-hidden_dims=[256, 128]-dropout_rate=0.7-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",8
151,bert-select-mod,test,False,0.607647,0.607647,0.543439,0.607647,0.307338,22.273429,"num_epochs=120-batch_size=4-loss_difference_threshold=0.01-hidden_dims=[128, 64]-dropout_rate=0.7-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",2
3,bert-select-mod-egat,test,False,0.582059,0.582059,0.532354,0.582059,0.295252,85.379559,"num_epochs=80-batch_size=4-loss_difference_threshold=0.001-hidden_dims=[64, 32]-dropout_rate=0.5-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",13
133,bert-select-mod-dgcn,test,False,0.567353,0.567353,0.518531,0.567353,0.286114,2.392093,"num_epochs=80-batch_size=64-loss_difference_threshold=0.001-hidden_dims=[128, 64]-dropout_rate=0.5-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",3
97,bert-select-mod-gatv1-edge,test,False,0.574118,0.574118,0.515821,0.574118,0.280346,45.591364,"num_epochs=80-batch_size=4-loss_difference_threshold=0.01-hidden_dims=[256, 128]-dropout_rate=0.3-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",2
103,bert-select-mod-gatv1,test,False,0.582059,0.582059,0.512549,0.582059,0.275203,4.689491,"num_epochs=120-batch_size=32-loss_difference_threshold=0.001-hidden_dims=[256, 128]-dropout_rate=0.5-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",2
166,bert-select-mod,validation,False,0.555404,0.555404,0.494594,0.555404,0.300395,18.30645,"num_epochs=120-batch_size=4-loss_difference_threshold=0.01-hidden_dims=[128, 64]-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",2
134,bert-select-mod-dgcn,validation,False,0.543776,0.543776,0.489827,0.543776,0.293212,4.62677,"num_epochs=120-batch_size=32-loss_difference_threshold=0.001-hidden_dims=[128, 64]-dropout_rate=0.3-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>",3


In [228]:
file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/simple_classifier_tuned_Df.pkl"
checkFile = os.path.isfile(file_path)

if checkFile: 
    with open(file_path, "rb") as file:
        total_results2_sorted = pickle.load(file)
else: 
    total_results2 = hyperparamTuning(selectedTrainList, selectedTestList, selectedValList, \
                                     y_train, y_test, y_val, True, param_grid2, indices2)
    
    total_results2_sorted = total_results2.sort_values(by='Weighted-F1', ascending=False)
    with open(file_path, 'wb') as file:
        pickle.dump(total_results2_sorted, file)

In [229]:
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Don't limit the width of the display
pd.set_option('display.max_colwidth', None)  # Don't truncate column content
    
total_results2_sorted

Unnamed: 0,data_combination,typeSet,isSimpleFC,Accuracy,Recall,Weighted-F1,F1-micro,F1-macro,train_time,hyperparams,num_epoch
67,bert,test,True,0.627059,0.627059,0.592187,0.627059,0.38516,105.488689,num_epochs=120-batch_size=4-loss_difference_threshold=0.01-hidden_dims=512-dropout_rate=0.7-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,7
85,bert,test,True,0.621471,0.621471,0.577937,0.621471,0.359039,1034.278327,num_epochs=50-batch_size=1-loss_difference_threshold=0.001-hidden_dims=512-dropout_rate=0.7-learning_rate=1e-05-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,16
29,bert-select-more,test,True,0.616765,0.616765,0.575347,0.616765,0.362269,229.417084,num_epochs=50-batch_size=1-loss_difference_threshold=0.01-hidden_dims=128-dropout_rate=0.3-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,8
87,bert,test,True,0.615,0.615,0.573965,0.615,0.345269,11.471468,num_epochs=50-batch_size=32-loss_difference_threshold=0.01-hidden_dims=256-dropout_rate=0.5-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,8
17,bert-select-more,test,True,0.614118,0.614118,0.571017,0.614118,0.340444,16.709,num_epochs=80-batch_size=32-loss_difference_threshold=0.001-hidden_dims=128-dropout_rate=0.5-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,19
21,bert-select-more,test,True,0.615,0.615,0.569067,0.615,0.346742,17.810554,num_epochs=50-batch_size=64-loss_difference_threshold=0.001-hidden_dims=512-dropout_rate=0.7-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,21
89,bert,test,True,0.61,0.61,0.563295,0.61,0.329421,23.792755,num_epochs=50-batch_size=4-loss_difference_threshold=0.01-hidden_dims=512-dropout_rate=0.3-learning_rate=0.001-optimizers=<class 'torch.optim.sgd.SGD'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,7
81,bert,test,True,0.614118,0.614118,0.560698,0.614118,0.321859,460.027319,num_epochs=50-batch_size=1-loss_difference_threshold=0.001-hidden_dims=128-dropout_rate=0.7-learning_rate=1e-05-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,16
53,bert-select-few,test,True,0.609412,0.609412,0.560173,0.609412,0.348067,63.147842,num_epochs=120-batch_size=4-loss_difference_threshold=0.001-hidden_dims=512-dropout_rate=0.7-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,12
27,bert-select-more,test,True,0.602941,0.602941,0.547691,0.602941,0.310494,5.688374,num_epochs=50-batch_size=32-loss_difference_threshold=0.01-hidden_dims=256-dropout_rate=0.7-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,5
