"FC layers referenced from https://towardsdatascience.com/math-neural-network-from-scratch-in-python-d6da9f29ce65"


In [1]:
import torch, time, os, pickle
import numpy as np
import torch.nn as nn
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset
import torch.optim as optim
from sklearn.metrics import precision_score, recall_score, f1_score, precision_recall_fscore_support
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest, f_classif, chi2
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objs as go
from sklearn.manifold import TSNE
import plotly.io as pio
from sklearn.utils import class_weight
import tqdm as notebook_tqdm
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report
from tqdm import tqdm
from graph_context_dataset import FeatureEngineeredDataset
import warnings
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Dense, Dropout
import random
from model import FCClassifier, DATASET_PATH
%load_ext autoreload
%autoreload 2

Make sure to specify which dataset to use

 - dataset_original
 - dataset_drop_noise
 - dataset_smote

In [2]:
# dataset_path = "dataset_original"
# dataset_path = "dataset_drop_noise"
# dataset_path = "dataset_smote"
dataset_path = DATASET_PATH

<h3> Declare functions

In [3]:
class MyNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate):
        super(MyNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.activation1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.activation2 = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.fc3 = nn.Linear(hidden_dims[1], output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.activation1(x)
        x = self.fc2(x)
        x = self.activation2(x)
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [4]:
# class FCLayer(nn.Module):
#     def __init__(self, input_dim, output_dim):
#         super(FCLayer, self).__init__()
#         self.fc = nn.Linear(input_dim, output_dim)

#     def forward(self, x):
#         x = self.fc(x)
#         return x

# class ActivationLayer(nn.Module):
#     def __init__(self, activation_fn):
#         super(ActivationLayer, self).__init__()
#         self.activation_fn = activation_fn

#     def forward(self, x):
#         x = self.activation_fn(x)
#         return x

# def tanh(x):
#     return torch.tanh(x)

# def sigmoid(x):
#     return torch.sigmoid(x)
# # loss function and its derivative
# def mse(y_true, y_pred):
#     return np.mean(np.power(y_true - y_pred, 2))

# def mse_prime(y_true, y_pred):
#     return 2 * (y_pred - y_true) / y_true.size


In [5]:
def oversample_data(X_train, Y_train, num_classes):
    # Determine the class with the maximum number of instances
    max_class_count = np.max(np.bincount(Y_train))
    # Generate indices for oversampling each class
    indices_list = [np.where(Y_train == i)[0] for i in range(num_classes)]
    # Oversample minority classes to match the count of the majority class
    for i, indices in enumerate(indices_list):
        if len(indices) < max_class_count:
            # Calculate the number of instances to oversample for this class
            num_to_oversample = max_class_count - len(indices)
            # Randomly select instances with replacement to oversample
            oversampled_indices = np.random.choice(indices, size=num_to_oversample, replace=True)
            # Append the oversampled instances to the original data
            X_train = np.concatenate((X_train, X_train[oversampled_indices]), axis=0)
            Y_train = np.concatenate((Y_train, Y_train[oversampled_indices]), axis=0)
    return torch.tensor(X_train), torch.tensor(Y_train)


In [6]:
def concatenate_tensors(tensor_list):
    if not tensor_list:
        raise ValueError("The tensor list is empty")

    feature_dim = tensor_list[0].shape[1]
    for tensor in tensor_list:
        if tensor.shape[1] != feature_dim:
            raise ValueError("All tensors must have the same feature dimension")
    
    concatenated_tensor = torch.cat(tensor_list, dim=0)
    
    return concatenated_tensor

<h4> Import labels and label decoder

In [7]:
file_path = "data/dump/" + dataset_path + "/labels_train.pkl"
with open(file_path, 'rb') as file:
    y_train = pickle.load(file)
y_train = torch.tensor(y_train)

file_path = "data/dump/" + dataset_path + "/labels_test.pkl"
with open(file_path, 'rb') as file:
    y_test = pickle.load(file)
y_test = torch.tensor(y_test)

file_path = "data/dump/" + dataset_path + "/labels_dev.pkl"
with open(file_path, 'rb') as file:
    y_val = pickle.load(file)
y_val = torch.tensor(y_val)
    
file_path = 'data/dump/' + dataset_path + '/label_decoder.pkl'
with open(file_path, 'rb') as file:
    label_decoder = pickle.load(file)

<h4> Import the BERT base-node outputs

first we disregard the u' and directly train the h'

In [8]:
train_file_paths = [
    "embed/" + dataset_path + "/u_prime_BERT_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_DGCN_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_edgeAttr_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv2_edgeAttr_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_RGAT_train.pkl",
    "embed/" + dataset_path + "/h_prime_BERT-EGAT_train.pkl",
]

test_file_paths = [
    "embed/" + dataset_path + "/u_prime_BERT_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_DGCN_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_edgeAttr_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv2_edgeAttr_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_RGAT_test.pkl",
    "embed/" + dataset_path + "/h_prime_BERT-EGAT_test.pkl",
]

val_file_paths = [
    "embed/" + dataset_path + "/u_prime_BERT_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_DGCN_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv1_edgeAttr_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_GATv2_edgeAttr_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT_RGAT_dev.pkl",
    "embed/" + dataset_path + "/h_prime_BERT-EGAT_dev.pkl",
]

dictKey = {
    0 : 'bert',
    1 : 'bert-select-few',
    2 : 'bert-select-mod',
    3 : 'bert-select-more',
    4 : 'dgcn',
    5 : 'dgcn-select',
    6 : 'gatv1',
    7 : 'gatv1-select',
    8 : 'gatv1-edge',
    9 : 'gatv1-edge-select',
    10 : 'gatv2-edge',
    11 : 'gatv2-edge-select',
    12 : 'rgat',
    13 : 'rgat-select',
    14 : 'egat',
    15 : 'egat-select',
    16 : 'bert-select-mod-dgcn',
    17 : 'bert-select-mod-gatv1',
    18 : 'bert-select-mod-gatv1-edge',
    19 : 'bert-select-mod-gatv2-edge',
    20 : 'bert-select-mod-rgat',
    21 : 'bert-select-mod-egat',
}

<h4> Getting BERT and GAT outputs for all sets

In [9]:
trainFeaturesList = []
testFeaturesList = []
valFeaturesList = []

In [10]:
def concatenate_tensors(tensor_list):
    return torch.cat(tensor_list, dim=0)

for file_path in train_file_paths:
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
#         print(type(data))
#         if isinstance(data, list):
#             print("instance of list, ", data[0].shape)
#         else:
#             print("instance of tensor, ", data.shape)
        if file_path != train_file_paths[-1]: 
            trainFeaturesList.append(concatenate_tensors(data))
        else:
            trainFeaturesList.append(data)
            
for file_path in test_file_paths:
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
        if file_path != test_file_paths[-1]: 
            testFeaturesList.append(concatenate_tensors(data))
        else:
            testFeaturesList.append(data)
            
for file_path in val_file_paths:
    with open(file_path, 'rb') as file:
        data = pickle.load(file)
        if file_path != val_file_paths[-1]: 
            valFeaturesList.append(concatenate_tensors(data))
        else:
            valFeaturesList.append(data)

EDA

In [11]:
# # Checking the structure of graph
# for n in range(10):
#     tensor_data_np = tensor_utterances[n].detach().numpy()

#     # Plot the data
#     plt.figure(figsize=(10, 5))
#     plt.plot(range(len(tensor_data_np)), tensor_data_np)
#     plt.title('Line Graph of Tensor Data')
#     plt.xlabel('Index')
#     plt.ylabel('Value')
#     plt.show()


In [12]:
# # Normalize the h' (1st GAT)
# data = cherry_picked_nodes.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# # Print or analyze the similarity matrix
# # print(similarities)
# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


In [13]:
# # Normalize the h' (2nd GAT)
# data = all_node_feats.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# # Print or analyze the similarity matrix
# # print(similarities)
# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


In [14]:
# # Normalize the u' or updated_representations
# data = tensor_utterances.detach().numpy()
# data_normalized = data / np.linalg.norm(data, axis=1, keepdims=True)

# # Compute pairwise cosine similarities
# similarities = cosine_similarity(data_normalized)

# plt.hist(similarities.flatten(), bins=50, density=True)
# plt.title('Distribution of Cosine Similarities')
# plt.xlabel('Cosine Similarity')
# plt.ylabel('Frequency')
# plt.show()


<h3> Feature Selection and creating data combination for classifiers

Define select feature function

In [15]:
def get_norm_features(encoded_features):
    scaler = MinMaxScaler()
#       "FeatureSelected+BERT+GAT: ", concatenatedRepresentationTrain2.shape, "\n",
    features_scaled = scaler.fit_transform(encoded_features)
    return torch.tensor(features_scaled)

def get_selected_features(encoded_features, labels, top_n):
    if torch.is_tensor(encoded_features):
        encoded_features = encoded_features.detach().cpu().numpy()
    
    scaler = MinMaxScaler()
    features_scaled = scaler.fit_transform(encoded_features)

    selector = SelectKBest(score_func=f_classif, k=100)

    top_features_by_class = {}
    top_scores = {}

    for label in range(7):
        # Create a binary mask indicating instances belonging to the current class
        mask = (labels == label)

        # SelectKBest with chi2 as the scoring function
        selector = SelectKBest(score_func=chi2, k=top_n)  # Select top 20 features
        selector.fit(features_scaled, mask)  # Fit SelectKBest to the data
        # Get the indices of the top 20 features
        top_features_indices = np.argsort(selector.scores_)[-top_n:]
        scores = selector.scores_[top_features_indices]
        # Store the indices in the dictionary
        top_features_by_class[label] = top_features_indices
        top_scores[label] = scores

    concatenated_features_set = set()
    for label, indices in top_features_by_class.items():
        concatenated_features_set.update(indices)

    concatenated_features_indices = list(concatenated_features_set)

    concatenated_features_indices = np.array(concatenated_features_indices)

    # Select the desired features
    selected_features = encoded_features[:, concatenated_features_indices]
#     print(selected_features.shape)
    return selected_features, concatenated_features_indices

In [16]:
# pca = PCA(n_components=2)
# pca_result = pca.fit_transform(selected_features.detach().numpy())

# # Plot the PCA result with color-coded labels
# plt.figure(figsize=(8, 6))
# for label in np.unique(Y_train):
#     indices = Y_train == label
#     plt.scatter(pca_result[indices, 0], pca_result[indices, 1], label=f'{label_decoder[label]}', alpha=0.5)
#     plt.title('PCA Visualization of Selected Utterance Embeddings (Train) with Color-Coded Labels')
#     plt.xlabel('Principal Component 1')
#     plt.ylabel('Principal Component 2')
#     plt.legend()
#     plt.grid(True)
#     plt.show()

3d plottly

In [17]:
# X_train = selected_features
# X_train = X_train / np.linalg.norm(X_train, axis=1, keepdims=True)
# # Perform T-SNE dimensionality reduction
# tsne = TSNE(n_components=3, random_state=42)
# X_tsne = tsne.fit_transform(X_train)

# # Create a Plotly scatter plot
# fig = go.Figure(data=[go.Scatter3d(
#     x=X_tsne[:, 0],
#     y=X_tsne[:, 1],
#     z=X_tsne[:, 2],
#     mode='markers',
#     marker=dict(
#         size=3,
#         color=Y_train,  # Assuming Y_train contains labels for coloring
#         colorscale='Viridis',  # You can choose a different colorscale
#         opacity=0.8
#     )
# )])

# # Update layout
# fig.update_layout(title='3D T-SNE Plot', autosize=False,
#                   width=800, height=800)

# # Show the plot
# fig.show()

In [18]:
# Save the plot as an HTML file
# pio.write_html(fig, '3d_tsne_plot.html')

Now prepare the data that will be ued to train the classifier, there are 20 combinations. And pick top 7 combinations yielding top F1 weighted-score

In [19]:
trainList = []
testList = []
valList = []

file_path1 = "data/dump/" + dataset_path + "/BERT_data_for_classifier/trainList.pkl"
file_path2 = "data/dump/" + dataset_path + "/BERT_data_for_classifier/testList.pkl"
file_path3 = "data/dump/" + dataset_path + "/BERT_data_for_classifier/valList.pkl"

checkFile1 = os.path.isfile(file_path1)
checkFile2 = os.path.isfile(file_path2)
checkFile3 = os.path.isfile(file_path3)

if checkFile1 and checkFile2 and checkFile3: 
    with open(file_path1, "rb") as file:
        trainList = pickle.load(file)
    with open(file_path2, "rb") as file:
        testList = pickle.load(file)
    with open(file_path3, "rb") as file:
        valList = pickle.load(file)
else:
    trainFeaturesList.append(data)
    #1
    trainList.append(trainFeaturesList[0])
    testList.append(testFeaturesList[0])
    valList.append(valFeaturesList[0])
    #2
    selectedTrainFeatures1a, indicesFeatures1a = get_selected_features(trainFeaturesList[0], y_train, 16)
    selectedTestFeatures1a = testFeaturesList[0][:, indicesFeatures1a]
    selectedValFeatures1a = valFeaturesList[0][:, indicesFeatures1a]
    trainList.append(selectedTrainFeatures1a)
    testList.append(selectedTestFeatures1a)
    valList.append(selectedValFeatures1a)
    #3
    selectedTrainFeatures1b, indicesFeatures1b = get_selected_features(trainFeaturesList[0], y_train, 32)
    selectedTestFeatures1b = testFeaturesList[0][:, indicesFeatures1b]
    selectedValFeatures1b = valFeaturesList[0][:, indicesFeatures1b]
    trainList.append(selectedTrainFeatures1b)
    testList.append(selectedTestFeatures1b)
    valList.append(selectedValFeatures1b)
    #4
    selectedTrainFeatures1c, indicesFeatures1c = get_selected_features(trainFeaturesList[0], y_train, 64)
    selectedTestFeatures1c = testFeaturesList[0][:, indicesFeatures1c]
    selectedValFeatures1c = valFeaturesList[0][:, indicesFeatures1c]
    trainList.append(selectedTrainFeatures1c)
    testList.append(selectedTestFeatures1c)
    valList.append(selectedValFeatures1c)
    #5
    trainList.append(trainFeaturesList[1])
    testList.append(testFeaturesList[1])
    valList.append(valFeaturesList[1])
    #6
    selectedTrainFeatures2, indicesFeatures2 = get_selected_features(trainFeaturesList[1], y_train, 12)
    selectedTestFeatures2 = testFeaturesList[1][:, indicesFeatures2]
    selectedValFeatures2 = valFeaturesList[1][:, indicesFeatures2]
    trainList.append(selectedTrainFeatures2)
    testList.append(selectedTestFeatures2)
    valList.append(selectedValFeatures2)
    #7
    trainList.append(trainFeaturesList[2])
    testList.append(testFeaturesList[2])
    valList.append(valFeaturesList[2])
    #8
    selectedTrainFeatures3, indicesFeatures3 = get_selected_features(trainFeaturesList[2], y_train, 12)
    selectedTestFeatures3 = testFeaturesList[2][:, indicesFeatures3]
    selectedValFeatures3 = valFeaturesList[2][:, indicesFeatures3]
    trainList.append(selectedTrainFeatures3)
    testList.append(selectedTestFeatures3)
    valList.append(selectedValFeatures3)
    #9
    trainList.append(trainFeaturesList[3])
    testList.append(testFeaturesList[3])
    valList.append(valFeaturesList[3])
    #10
    selectedTrainFeatures4, indicesFeatures4 = get_selected_features(trainFeaturesList[3], y_train, 12)
    selectedTestFeatures4 = testFeaturesList[3][:, indicesFeatures4]
    selectedValFeatures4 = valFeaturesList[3][:, indicesFeatures4]
    trainList.append(selectedTrainFeatures4)
    testList.append(selectedTestFeatures4)
    valList.append(selectedValFeatures4)
    #11
    trainList.append(trainFeaturesList[4])
    testList.append(testFeaturesList[4])
    valList.append(valFeaturesList[4])
    #12
    selectedTrainFeatures5, indicesFeatures5 = get_selected_features(trainFeaturesList[4], y_train, 12)
    selectedTestFeatures5 = testFeaturesList[4][:, indicesFeatures5]
    selectedValFeatures5 = valFeaturesList[4][:, indicesFeatures5]
    trainList.append(selectedTrainFeatures5)
    testList.append(selectedTestFeatures5)
    valList.append(selectedValFeatures5)
    #13
    trainList.append(trainFeaturesList[5])
    testList.append(testFeaturesList[5])
    valList.append(valFeaturesList[5])
    #14
    selectedTrainFeatures6, indicesFeatures6 = get_selected_features(trainFeaturesList[5], y_train, 12)
    selectedTestFeatures6 = testFeaturesList[5][:, indicesFeatures6]
    selectedValFeatures6 = valFeaturesList[5][:, indicesFeatures6]
    trainList.append(selectedTrainFeatures6)
    testList.append(selectedTestFeatures6)
    valList.append(selectedValFeatures6)
    #15
    trainList.append(trainFeaturesList[6])
    testList.append(testFeaturesList[6])
    valList.append(valFeaturesList[6])
    #16
    selectedTrainFeatures7, indicesFeatures7 = get_selected_features(trainFeaturesList[6][0], y_train, 12)
    selectedTestFeatures7 = testFeaturesList[6][0][:, indicesFeatures7]
    selectedValFeatures7 = testFeaturesList[6][0][:, indicesFeatures7]
    trainList.append(selectedTrainFeatures7)
    testList.append(selectedTestFeatures7)
    valList.append(selectedValFeatures7)
    selectedNormTrainFeatures1 = get_norm_features(selectedTrainFeatures1b)
    selectedNormTestFeatures1 = get_norm_features(selectedTestFeatures1b)
    selectedNormValFeatures1 = get_norm_features(selectedValFeatures1b)

    #17
    trainNormFeatures2 = get_norm_features(trainFeaturesList[1].detach().numpy())
    testNormFeatures2 = get_norm_features(testFeaturesList[1].detach().numpy())
    valNormFeatures2 = get_norm_features(valFeaturesList[1].detach().numpy())
    concatenatedTrainFeatures2 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures2), dim=1)
    concatenatedTestFeatures2 = torch.cat((selectedNormTestFeatures1, testNormFeatures2), dim=1)
    concatenatedValFeatures2 = torch.cat((selectedNormValFeatures1, valNormFeatures2), dim=1)
    trainList.append(concatenatedTrainFeatures2)
    testList.append(concatenatedTestFeatures2)
    valList.append(concatenatedValFeatures2)
    #18
    trainNormFeatures3 = get_norm_features(trainFeaturesList[2].detach().numpy())
    testNormFeatures3 = get_norm_features(testFeaturesList[2].detach().numpy())
    valNormFeatures3 = get_norm_features(valFeaturesList[2].detach().numpy())
    concatenatedTrainFeatures3 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures3), dim=1)
    concatenatedTestFeatures3 = torch.cat((selectedNormTestFeatures1, testNormFeatures3), dim=1)
    concatenatedValFeatures3 = torch.cat((selectedNormValFeatures1, valNormFeatures3), dim=1)
    trainList.append(concatenatedTrainFeatures3)
    testList.append(concatenatedTestFeatures3)
    valList.append(concatenatedValFeatures3)
    #19
    trainNormFeatures4 = get_norm_features(trainFeaturesList[3].detach().numpy())
    testNormFeatures4 = get_norm_features(testFeaturesList[3].detach().numpy())
    valNormFeatures4 = get_norm_features(valFeaturesList[3].detach().numpy())
    concatenatedTrainFeatures4 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures4), dim=1)
    concatenatedTestFeatures4 = torch.cat((selectedNormTestFeatures1, testNormFeatures4), dim=1)
    concatenatedValFeatures4 = torch.cat((selectedNormValFeatures1, valNormFeatures4), dim=1)
    trainList.append(concatenatedTrainFeatures4)
    testList.append(concatenatedTestFeatures4)
    valList.append(concatenatedValFeatures4)
    #20
    trainNormFeatures5 = get_norm_features(trainFeaturesList[4].detach().numpy())
    testNormFeatures5 = get_norm_features(testFeaturesList[4].detach().numpy())
    valNormFeatures5 = get_norm_features(valFeaturesList[4].detach().numpy())
    concatenatedTrainFeatures5 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures5), dim=1)
    concatenatedTestFeatures5 = torch.cat((selectedNormTestFeatures1, testNormFeatures5), dim=1)
    concatenatedValFeatures5 = torch.cat((selectedNormValFeatures1, valNormFeatures5), dim=1)
    trainList.append(concatenatedTrainFeatures5)
    testList.append(concatenatedTestFeatures5)
    valList.append(concatenatedValFeatures5)

    #21
    trainNormFeatures6 = get_norm_features(trainFeaturesList[5].detach().numpy())
    testNormFeatures6 = get_norm_features(testFeaturesList[5].detach().numpy())
    valNormFeatures6 = get_norm_features(valFeaturesList[5].detach().numpy())
    concatenatedTrainFeatures6 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures6), dim=1)
    concatenatedTestFeatures6 = torch.cat((selectedNormTestFeatures1, testNormFeatures6), dim=1)
    concatenatedValFeatures6 = torch.cat((selectedNormValFeatures1, valNormFeatures6), dim=1)
    trainList.append(concatenatedTrainFeatures6)
    testList.append(concatenatedTestFeatures6)
    valList.append(concatenatedValFeatures6)

    #22
    trainNormFeatures7 = get_norm_features(trainFeaturesList[6][0].detach().numpy())
    testNormFeatures7 = get_norm_features(testFeaturesList[6][0].detach().numpy())
    valNormFeatures7 = get_norm_features(valFeaturesList[6][0].detach().numpy())
    concatenatedTrainFeatures7 = torch.cat((selectedNormTrainFeatures1, trainNormFeatures7), dim=1)
    concatenatedTestFeatures7 = torch.cat((selectedNormTestFeatures1, testNormFeatures7), dim=1)
    concatenatedValFeatures7 = torch.cat((selectedNormValFeatures1, valNormFeatures7), dim=1)
    trainList.append(concatenatedTrainFeatures7)
    testList.append(concatenatedTestFeatures7)
    valList.append(concatenatedValFeatures7)

    with open(file_path1, 'wb') as file:
        pickle.dump(trainList, file)
    with open(file_path2, 'wb') as file:
        pickle.dump(testList, file)
    with open(file_path3, 'wb') as file:
        pickle.dump(valList, file)

1. Prep data - normalize and create data loader

In [20]:
def prep_data(features, labels, isOversample):
    num_instances = len(features)
    num_classes = 7

    # Rescale input features
    # selected_features = concatenated_representation / np.linalg.norm(concatenated_representation, axis=1, keepdims=True)

    # Apply data resampling (oversampling) to balance class distribution
    if isOversample:
        X_set, Y_set = oversample_data(features, labels, num_classes)
    else:
        X_set, Y_set = features, labels

    # Calculate class weights for class weighting
#     class_counts = np.bincount(labels)
#     total_instances = np.sum(class_counts)
    # class_weights = torch.tensor([total_instances / (num_classes * count) for count in class_counts], dtype=torch.float32)

    # Convert data to PyTorch tensors
    X_tensor = torch.tensor(X_set.clone().detach(), dtype=torch.float32).clone().detach()
    Y_tensor = torch.tensor(Y_set.clone().detach(), dtype=torch.long).clone().detach()
    # print(X_train_tensor.shape, Y_train_tensor.shape)
    # X_train_tensor = torch.tensor(selected_features)
    # Y_train_tensor = torch.tensor(y_train)

    unique_labels, label_counts = np.unique(Y_set, return_counts=True)

    # Print the counts for each unique label
#     for label, count in zip(unique_labels, label_counts):
#         print(f"Label {label_decoder[label]}: {count} occurrences")

#     print(X_tensor.shape, Y_tensor.shape)
    # Create a TensorDataset
    dataset = TensorDataset(X_tensor, Y_tensor)

    return X_tensor, Y_tensor

2. Training

In [21]:
def model_train1(X_set, Y_set, num_epochs=20, batch_size=32, loss_difference_threshold=0.01, 
                 hidden_dims=[256, 128], dropout_rate=0.5, lr=0.0001, optimizer_class=optim.Adam, criterion_class=nn.CrossEntropyLoss):
    output_dim = 7  # Number of classes
    model = MyNetwork(len(X_set[0]), hidden_dims, output_dim, dropout_rate)
    criterion = criterion_class()
    optimizer = optimizer_class(model.parameters(), lr=lr)
    loss_history = []
    accuracy_history = []
    print_interval = 1  # Print tqdm every epoch
    previous_loss = float('inf')

    # Create dataset and dataloader
    dataset = TensorDataset(X_set, Y_set)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    epoch_num = num_epochs
    for epoch in range(num_epochs):
        total_loss = 0.0
        correct_predictions = 0
        total_instances = 0
        with tqdm(total=len(dataloader), desc=f'Epoch {epoch+1}/{num_epochs}', leave=False) as pbar:
            for inputs, labels in dataloader:
                inputs = inputs.float()  # Ensure inputs are float32
                labels = labels.long()   # Ensure labels are long
                outputs = model(inputs)
                outputs = outputs.squeeze()
                labels = labels.squeeze()
                loss = criterion(outputs, labels)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
                _, predicted = torch.max(outputs, dim=1)
                correct_predictions += (predicted == labels).sum().item()
                total_instances += labels.size(0)
                pbar.update(1)

        epoch_loss = total_loss / total_instances
        epoch_accuracy = correct_predictions / total_instances
        loss_history.append(epoch_loss)
        accuracy_history.append(epoch_accuracy)

        if epoch > 0 and abs(epoch_loss - previous_loss) < loss_difference_threshold:
            epoch_num = epoch
            break

        previous_loss = epoch_loss

    return model, epoch_num

In [22]:
def model_train2(X_set, y_set, num_epochs=20, batch_size=32, early_stopping_threshold=0.01,
                 hidden_dim=128, dropout_prob=0.5, learning_rate=0.0005, optimizer_class=optim.Adam, criterion_class=nn.CrossEntropyLoss):
    input_dim = len(X_set[0])  # Size of the input features
    output_dim = 7  # Number of classes

    model = FCClassifier(input_dim, hidden_dim, output_dim, dropout_prob)
    criterion = criterion_class()
    optimizer = optimizer_class(model.parameters(), lr=learning_rate)

    # Create DataLoader for batching
    dataset = TensorDataset(X_set, y_set)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    epoch_losses = []  # List to store loss values for each epoch
    epoch_num = num_epochs
    # Training loop
    with tqdm(total=num_epochs, unit="epoch", desc="Training") as tepoch:
        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0

            for batch_features, batch_labels in dataloader:
                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                outputs = model(batch_features)
                loss = criterion(outputs, batch_labels)

                # Backward pass and optimize
                loss.backward()
                optimizer.step()

                # Update running loss
                running_loss += loss.item()

            # Calculate and store average loss for the epoch
            epoch_loss = running_loss / len(dataloader)
            epoch_losses.append(epoch_loss)

            # Update tqdm description
            tepoch.set_postfix(loss=epoch_loss)
            tepoch.update()

            # Check for early stopping
            if epoch > 0 and abs(epoch_losses[-2] - epoch_losses[-1]) < early_stopping_threshold:
                epoch_num = epoch
                break

    return model, epoch_num

In [23]:
def classify_emotions(model, X_tensor, Y_tensor, typeSet, isSimpleFC, i_dict):
    # Set the model to evaluation mode
    if X_tensor.dtype != torch.float32:
        X_tensor = X_tensor.float()
        
    model.eval()

    # Predict on the data
    with torch.no_grad():
        outputs = model(X_tensor)
        _, predicted = torch.max(outputs, 1)

    # Convert predicted tensor to numpy array
    predicted = predicted.cpu().numpy()
    Y_tensor = Y_tensor.cpu().numpy()

    # Calculate classification report
    report = classification_report(Y_tensor, predicted, target_names=label_decoder.values(), output_dict=True, zero_division=0)

    # Extract metrics
    accuracy = report['accuracy']
    recall = report['weighted avg']['recall']
    weighted_f1 = report['weighted avg']['f1-score']
    f1_micro = report.get('micro avg', {}).get('f1-score', accuracy)
    f1_macro = report.get('macro avg', {}).get('f1-score', 0.0) 
    
    if typeSet == "validation":
        print("Classified: ", dictKey[i_dict])
    
    return dictKey[i_dict], typeSet, isSimpleFC, accuracy, recall, weighted_f1, f1_micro, f1_macro


In [24]:
dataset = FeatureEngineeredDataset(trainList, testList, valList)
dataLoader = DataLoader(dataset, batch_size=1, shuffle=False)

In [25]:
i = 0
for trainSet, testSet, valSet in tqdm(dataLoader, desc="Encoding Progress", unit="batch"):
    print(i, type(trainSet))
    if isinstance(trainSet, list):
        print(type(trainSet[0]))
        sample = trainSet[0]
        print(sample.shape)
    else:
        print(trainSet.squeeze(0).shape)
    i = i+1

Encoding Progress: 100%|███████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 101.44batch/s]

0 <class 'torch.Tensor'>
torch.Size([12176, 768])
1 <class 'torch.Tensor'>
torch.Size([12176, 90])
2 <class 'torch.Tensor'>
torch.Size([12176, 171])
3 <class 'torch.Tensor'>
torch.Size([12176, 304])
4 <class 'torch.Tensor'>
torch.Size([12176, 64])
5 <class 'torch.Tensor'>
torch.Size([12176, 34])
6 <class 'torch.Tensor'>
torch.Size([12176, 64])
7 <class 'torch.Tensor'>
torch.Size([12176, 41])
8 <class 'torch.Tensor'>
torch.Size([12176, 64])
9 <class 'torch.Tensor'>
torch.Size([12176, 39])
10 <class 'torch.Tensor'>
torch.Size([12176, 64])
11 <class 'torch.Tensor'>
torch.Size([12176, 39])
12 <class 'torch.Tensor'>
torch.Size([12176, 64])
13 <class 'torch.Tensor'>
torch.Size([12176, 44])
14 <class 'list'>
<class 'torch.Tensor'>
torch.Size([1, 12176, 64])
15 <class 'torch.Tensor'>
torch.Size([12176, 38])
16 <class 'torch.Tensor'>
torch.Size([12176, 235])
17 <class 'torch.Tensor'>
torch.Size([12176, 235])
18 <class 'torch.Tensor'>
torch.Size([12176, 235])
19 <class 'torch.Tensor'>
torch.Size




<b> This is where value for isSimpleFC is decided

In [26]:
def to_tensor(data):
    if isinstance(data, torch.Tensor):
        return data
    elif isinstance(data, np.ndarray):
        return torch.tensor(data)
    else:
        raise TypeError(f"Unsupported data type: {type(data)}")

In [27]:
file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/classifier_test_no_tuning_Df.pkl"
checkFile = os.path.isfile(file_path)

if checkFile: 
    with open(file_path, "rb") as file:
        df_results_sorted = pickle.load(file)
else:
    results = []
    num_epochs = 50
    batch_size = 8
    i = 0

    for trainSet, testSet, valSet in dataLoader:
        if isinstance(trainSet, list):
            trainSet = trainSet[0].squeeze(0)
            testSet = testSet[0].squeeze(0)
            valSet = valSet[0].squeeze(0)
        else:
            trainSet = trainSet.squeeze(0)
            testSet = testSet.squeeze(0)
            valSet = valSet.squeeze(0)

        X_tensor, Y_tensor = prep_data(trainSet.clone().detach(), y_train, False)
#         deepFC
        model, _ = model_train1(X_tensor, Y_tensor, num_epochs, batch_size)
        result = classify_emotions(model, X_tensor.clone().detach(), Y_tensor.clone().detach(), 'train', False, i)
#         results.append(result)

        X_tensor, Y_tensor = prep_data(testSet.clone().detach(), y_test, False)
        result = classify_emotions(model, X_tensor.clone().detach(), Y_tensor.clone().detach(), 'test', False, i)
        results.append(result)

        i += 1

    i = 0
    for trainSet, testSet, valSet in dataLoader:
        if isinstance(trainSet, list):
            trainSet = trainSet[0].squeeze(0)
            testSet = testSet[0].squeeze(0)
            valSet = valSet[0].squeeze(0)
        else:
            trainSet = trainSet.squeeze(0)
            testSet = testSet.squeeze(0)
            valSet = valSet.squeeze(0)

        X_tensor, Y_tensor = prep_data(trainSet, y_train, False)
#         simpleFC
        model, _ = model_train2(X_tensor, Y_tensor, num_epochs, batch_size)

        result = classify_emotions(model, X_tensor, Y_tensor, 'train', True, i)
#         results.append(result)

        X_tensor, Y_tensor = prep_data(testSet, y_test, False)
        result = classify_emotions(model, X_tensor, Y_tensor, 'test', True, i)
        results.append(result)
        i += 1

    columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', 'Weighted-F1', 'F1-micro', 'F1-macro']
    df = pd.DataFrame(results, columns=columns)
    df_results_sorted = df.sort_values(by='Weighted-F1', ascending=False)

    with open(file_path, 'wb') as file:
        pickle.dump(df_results_sorted, file)

In [28]:
# tmp
# columns = list(df_results_sorted.columns)

# # Modify the 2nd and 3rd column names
# columns[1] = 'typeSet'
# columns[2] = 'isSimpleFC'

# # Assign the new column names back to the DataFrame
# df_results_sorted.columns = columns
# df_results_sorted["typeSet"] = "test"
# with open(file_path, 'wb') as file:
#     pickle.dump(df_results_sorted, file)

In [29]:
df_results_sorted

Unnamed: 0,data_combination,typeSet,isSimpleFC,Accuracy,Recall,Weighted-F1,F1-micro,F1-macro
22,bert,test,True,0.510217,0.510217,0.407871,0.510217,0.20337
24,bert-select-mod,test,True,0.501858,0.501858,0.389807,0.501858,0.180678
0,bert,test,False,0.500619,0.500619,0.38253,0.500619,0.172942
23,bert-select-few,test,True,0.495666,0.495666,0.380247,0.495666,0.173107
25,bert-select-more,test,True,0.499071,0.499071,0.379425,0.499071,0.171555
3,bert-select-more,test,False,0.489474,0.489474,0.346339,0.489474,0.135263
39,bert-select-mod-gatv1,test,True,0.479876,0.479876,0.314911,0.479876,0.101491
2,bert-select-mod,test,False,0.477709,0.477709,0.311506,0.477709,0.095291
37,egat-select,test,True,0.477709,0.477709,0.310341,0.477709,0.094153
6,gatv1,test,False,0.47709,0.47709,0.308193,0.47709,0.092284


<h4> Select top 10 unique data combinations then tune

In [30]:
# top_10_combinations

In [31]:
max_iterations = 15
counter = 0
combination1 = []
combination2 = []
seen_combinations = set()

for idx, row in df_results_sorted.iterrows():
    if counter >= max_iterations:
        break

    if row['data_combination'] in seen_combinations:
        continue

    if row['isSimpleFC']:
        combination2.append(row['data_combination'])
    else:
        combination1.append(row['data_combination'])

    seen_combinations.add(row['data_combination'])
    counter += 1

# Display the results
print("Combination 1 (isSimpleFC=False):", combination1)
print("Combination 2 (isSimpleFC=True):", combination2)

Combination 1 (isSimpleFC=False): ['gatv1']
Combination 2 (isSimpleFC=True): ['bert', 'bert-select-mod', 'bert-select-few', 'bert-select-more', 'bert-select-mod-gatv1', 'egat-select', 'gatv2-edge-select', 'gatv1-select', 'gatv1-edge', 'gatv1-edge-select', 'gatv2-edge', 'rgat-select', 'rgat', 'egat']


In [32]:
indices1 = [key for key, value in dictKey.items() if value in combination1]
indices2 = [key for key, value in dictKey.items() if value in combination2]

print("Indices for isSimpleFC=False:", indices1)
print("Indices for isSimpleFC=True:", indices2)

Indices for isSimpleFC=False: [6]
Indices for isSimpleFC=True: [0, 1, 2, 3, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17]


In [33]:
selectedTrainDeepList = [trainList[i] for i in indices1]
selectedTestDeepList = [testList[i] for i in indices1]
selectedValDeepList = [valList[i] for i in indices1]

len(selectedTrainDeepList)

1

In [34]:
indices1

[6]

In [35]:
# for trainSet in selectedTrainList:
#     print(type(trainSet))

In [36]:
selectedTrainList = [trainList[i] for i in indices2]
selectedTestList = [testList[i] for i in indices2]
selectedValList = [valList[i] for i in indices2]

len(selectedTrainList)

14

<h4> Tuning using random parameters

In [37]:
# it should call both model_train1 and 2

def objective_func(X_train, X_test, X_val, 
               y_train, y_test, y_val, hyperparams, i_dict, isSimpleFC):
    results = []
    hyperparams_string = (
        f'num_epochs={hyperparams["num_epochs"]} '
        f'batch_size={hyperparams["batch_size"]} '
        f'loss_difference_threshold={hyperparams["loss_difference_threshold"]} '
        f'hidden_dims={hyperparams["hidden_dims"]} '
        f'dropout_rate={hyperparams["dropout_rate"]} '
        f'learning_rate={hyperparams["learning_rate"]} '
        f'optimizers={hyperparams["optimizers"]} '
        f'criteria={hyperparams["criteria"]}'
    )    
    print(hyperparams_string)
            
#     X_tensor, Y_tensor = prep_data(X_train.clone().detach(), y_train, False)
    X_train_tensor = to_tensor(X_train)
    y_train_tensor = to_tensor(y_train).long()
    X_val_tensor = to_tensor(X_val)
    y_val_tensor = to_tensor(y_val).long()
    X_test_tensor = to_tensor(X_test)
    y_test_tensor = to_tensor(y_test).long()
# train
    start_time = time.time()
    if isSimpleFC:
        model, num_epoch = model_train2(X_train_tensor, y_train_tensor, hyperparams["num_epochs"],
                            hyperparams["batch_size"], hyperparams["loss_difference_threshold"], 
                            hyperparams["hidden_dims"], hyperparams["dropout_rate"],
                            hyperparams["learning_rate"], hyperparams["optimizers"], hyperparams["criteria"])        
    else:
        model, num_epoch = model_train1(X_train_tensor, y_train_tensor, hyperparams["num_epochs"],
                            hyperparams["batch_size"], hyperparams["loss_difference_threshold"], 
                            hyperparams["hidden_dims"], hyperparams["dropout_rate"],
                            hyperparams["learning_rate"], hyperparams["optimizers"], hyperparams["criteria"])
    end_time = time.time()
    elapsed_time = end_time - start_time
# val
#     X_tensor, Y_tensor = prep_data(X_val.clone().detach(), y_val, False)
    result = classify_emotions(model, X_val_tensor, y_val_tensor, \
                               'validation', isSimpleFC, i_dict)
    elapsed_time = time.time() - start_time
    
    result = list(result)
    hyperparams_string = f'num_epochs={hyperparams["num_epochs"]}-batch_size={hyperparams["batch_size"]}-loss_difference_threshold={hyperparams["loss_difference_threshold"]}-hidden_dims={hyperparams["hidden_dims"]}-dropout_rate={hyperparams["dropout_rate"]}-learning_rate={hyperparams["learning_rate"]}-optimizers={hyperparams["optimizers"]}-criteria={hyperparams["criteria"]}'
    result.append(elapsed_time)
    result.append(hyperparams_string)
    result.append(num_epoch)
    results.append(result)
    
# test
#     X_tensor, Y_tensor = prep_data(X_test.clone().detach(), y_test, False)
    result = classify_emotions(model, X_test_tensor, y_test_tensor, \
                               'test', isSimpleFC, i_dict)
    
    result = list(result)
    result.append(elapsed_time)
    result.append(hyperparams_string)
    result.append(num_epoch)
    results.append(result)
    
    columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
               'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch']
    df = pd.DataFrame(results, columns=columns)
    df_results_sorted = df.sort_values(by='data_combination', ascending=False)
    
    return df_results_sorted


# def objective_func(X_train, X_test, X_val, 
#                y_train, y_test, y_val, hyperparams, i_dict):

In [38]:
def random_search(X_train, X_test, X_val, \
                  y_train, y_test, y_val, \
                  param_grid, isSimpleFC, i_dict, MAX_EVALS = 15):
    
    sub_total_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
               'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)
    
    for i in range(MAX_EVALS):
        hyperparams = {k: random.sample(v, 1)[0] for k, v in param_grid.items()}

        try:
            new_results = objective_func(X_train, X_test, X_val,  y_train, y_test, y_val,
                                hyperparams, i_dict, isSimpleFC)
            sub_total_results = pd.concat([sub_total_results, new_results], ignore_index=True)
            
        except Exception as e:
            print(f"Error with hyperparams {hyperparams}: {e}")
            continue
    
    # Sort with best score on top
    return sub_total_results 

In [39]:
param_grid1 = {
    'num_epochs': [50, 80, 120],
    'batch_size': [1, 4, 32, 64],
    'loss_difference_threshold': [0.01, 0.001],
    'hidden_dims': [[256, 128], [128, 64], [64, 32]],
    'dropout_rate': [0.3, 0.5, 0.7],
    'learning_rate': [0.001, 0.0001, 0.00001],
    'optimizers': [optim.Adam, optim.SGD],
    'criteria': [nn.CrossEntropyLoss, nn.NLLLoss]
}
param_grid2 = {
    'num_epochs': [50, 80, 120],
    'batch_size': [1, 4, 32, 64],
    'loss_difference_threshold': [0.01, 0.001],
    'hidden_dims': [128, 256, 512],
    'dropout_rate': [0.3, 0.5, 0.7],
    'learning_rate': [0.001, 0.0001, 0.00001],
    'optimizers': [optim.Adam, optim.SGD],
    'criteria': [nn.CrossEntropyLoss, nn.NLLLoss]
}

<h5> First find the best hyperparameter combination for the DeepClassifier.

In [40]:
def hyperparamTuning(X_trainSet, X_testSet, X_valSet, y_train, y_test, y_val, isSimpleFC, param_grid, indices):
    total_results = pd.DataFrame(columns = ['data_combination', 'typeSet', 'isSimpleFC', 'Accuracy', 'Recall', \
               'Weighted-F1', 'F1-micro', 'F1-macro', 'train_time', 'hyperparams', 'num_epoch'],)
    for i in range(len(indices)):
        print("============ PART ", i, "============")
        X_train = X_trainSet[i]
        X_test = X_testSet[i]
        X_val = X_valSet[i]

        sub_total_results = random_search(X_train, X_test, X_val, y_train, y_test, y_val,
                     param_grid, isSimpleFC, indices[i])
        total_results = pd.concat([sub_total_results, total_results], ignore_index=True)

    return total_results


In [41]:
file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/deep_classifier_tuned_Df.pkl"
checkFile = os.path.isfile(file_path)

if checkFile: 
    with open(file_path, "rb") as file:
        total_results1_sorted = pickle.load(file)
else:
    total_results1 = hyperparamTuning(selectedTrainDeepList, selectedTestDeepList, selectedValDeepList, \
                                 y_train, y_test, y_val, False, param_grid1, indices1)
    
    total_results1_sorted = total_results1.sort_values(by='Weighted-F1', ascending=False)
    with open(file_path, 'wb') as file:
        pickle.dump(total_results1_sorted, file)

In [42]:
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Don't limit the width of the display
pd.set_option('display.max_colwidth', None)  # Don't truncate column content

total_results1_sorted

Unnamed: 0,data_combination,typeSet,isSimpleFC,Accuracy,Recall,Weighted-F1,F1-micro,F1-macro,train_time,hyperparams,num_epoch


In [43]:
file_path = "data/dump/" + dataset_path + "/BERT_data_for_classifier/results/simple_classifier_tuned_Df.pkl"
checkFile = os.path.isfile(file_path)

if checkFile: 
    with open(file_path, "rb") as file:
        total_results2_sorted = pickle.load(file)
else: 
    total_results2 = hyperparamTuning(selectedTrainList, selectedTestList, selectedValList, \
                                     y_train, y_test, y_val, True, param_grid2, indices2)
    
    total_results2_sorted = total_results2.sort_values(by='Weighted-F1', ascending=False)
    with open(file_path, 'wb') as file:
        pickle.dump(total_results2_sorted, file)

num_epochs=80 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 80/80 [00:30<00:00,  2.64epoch/s, loss=-31.2]


Classified:  bert
num_epochs=80 batch_size=32 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 80/80 [01:02<00:00,  1.28epoch/s, loss=-1.11e+5]


Classified:  bert
num_epochs=80 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   9%|█████▏                                                     | 7/80 [00:44<07:42,  6.34s/epoch, loss=1.43]


Classified:  bert
num_epochs=80 batch_size=32 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 80/80 [00:26<00:00,  2.97epoch/s, loss=nan]


Classified:  bert
num_epochs=80 batch_size=4 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 80/80 [07:57<00:00,  5.97s/epoch, loss=-2.26e+4]


Classified:  bert
num_epochs=120 batch_size=1 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  19%|██████████▉                                              | 23/120 [10:54<46:01, 28.47s/epoch, loss=1.07]


Classified:  bert
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████| 120/120 [31:15<00:00, 15.63s/epoch, loss=-2.26e+5]


Classified:  bert
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████████| 80/80 [10:40<00:00,  8.01s/epoch, loss=-inf]


Classified:  bert
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▍                                                      | 7/120 [00:48<13:03,  6.93s/epoch, loss=1.38]


Classified:  bert
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  12%|███████                                                    | 6/50 [01:24<10:20, 14.10s/epoch, loss=1.48]


Classified:  bert
num_epochs=120 batch_size=4 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████| 120/120 [04:32<00:00,  2.27s/epoch, loss=-1.68e+22]


Classified:  bert
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [26:29<00:00, 31.79s/epoch, loss=-1.06e+7]


Classified:  bert
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  34%|███████████████████▋                                      | 17/50 [02:07<04:07,  7.50s/epoch, loss=1.41]


Classified:  bert
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   7%|███▉                                                       | 8/120 [00:23<05:27,  2.92s/epoch, loss=1.4]


Classified:  bert
num_epochs=80 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                        | 4/80 [00:02<00:54,  1.40epoch/s, loss=1.55]


Classified:  bert
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   4%|██▍                                                       | 5/120 [00:04<01:41,  1.14epoch/s, loss=1.47]


Classified:  bert-select-few
num_epochs=80 batch_size=1 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▋                                                       | 5/80 [01:11<17:50, 14.28s/epoch, loss=1.58]


Classified:  bert-select-few
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▋                                                       | 5/80 [02:08<32:11, 25.75s/epoch, loss=1.46]


Classified:  bert-select-few
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  30%|█████████████████▍                                        | 15/50 [00:04<00:10,  3.41epoch/s, loss=1.58]


Classified:  bert-select-few
num_epochs=80 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 80/80 [00:33<00:00,  2.38epoch/s, loss=nan]


Classified:  bert-select-few
num_epochs=80 batch_size=32 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████| 80/80 [00:50<00:00,  1.59epoch/s, loss=-2.4e+6]


Classified:  bert-select-few
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  10%|█████▋                                                   | 12/120 [00:05<00:45,  2.40epoch/s, loss=1.55]


Classified:  bert-select-few
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                       | 6/120 [00:04<01:32,  1.23epoch/s, loss=1.54]


Classified:  bert-select-few
num_epochs=120 batch_size=64 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   4%|██▍                                                       | 5/120 [00:01<00:43,  2.64epoch/s, loss=1.46]


Classified:  bert-select-few
num_epochs=80 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▋                                                       | 5/80 [00:21<05:19,  4.26s/epoch, loss=1.48]


Classified:  bert-select-few
num_epochs=50 batch_size=64 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  10%|██████                                                      | 5/50 [00:01<00:15,  2.95epoch/s, loss=1.6]


Classified:  bert-select-few
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  12%|███████                                                    | 6/50 [00:11<01:27,  1.99s/epoch, loss=1.58]


Classified:  bert-select-few
num_epochs=120 batch_size=64 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                       | 6/120 [00:02<00:45,  2.50epoch/s, loss=1.51]


Classified:  bert-select-few
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  40%|███████████████████████▏                                  | 20/50 [01:19<01:59,  3.97s/epoch, loss=1.48]


Classified:  bert-select-few
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  18%|██████████▌                                                | 9/50 [04:41<21:21, 31.26s/epoch, loss=1.43]


Classified:  bert-select-few
num_epochs=80 batch_size=4 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  24%|█████████████▊                                            | 19/80 [00:38<02:04,  2.05s/epoch, loss=1.56]


Classified:  bert-select-mod
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████| 120/120 [01:34<00:00,  1.27epoch/s, loss=-1.9e+5]


Classified:  bert-select-mod
num_epochs=80 batch_size=32 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  19%|██████████▉                                               | 15/80 [00:06<00:28,  2.30epoch/s, loss=1.75]


Classified:  bert-select-mod
num_epochs=120 batch_size=64 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                       | 6/120 [00:02<00:51,  2.22epoch/s, loss=1.46]


Classified:  bert-select-mod
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [00:23<00:00,  2.10epoch/s, loss=-94.2]


Classified:  bert-select-mod
num_epochs=80 batch_size=4 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▋                                                       | 5/80 [00:19<04:55,  3.95s/epoch, loss=1.58]


Classified:  bert-select-mod
num_epochs=50 batch_size=32 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [00:20<00:00,  2.44epoch/s, loss=-1.2e+10]


Classified:  bert-select-mod
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [04:09<00:00,  5.00s/epoch, loss=-3.13e+5]


Classified:  bert-select-mod
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [16:39<00:00, 19.99s/epoch, loss=-3.89e+6]


Classified:  bert-select-mod
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▍                                                       | 7/120 [00:03<01:02,  1.81epoch/s, loss=1.6]


Classified:  bert-select-mod
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 120/120 [00:35<00:00,  3.35epoch/s, loss=nan]


Classified:  bert-select-mod
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  26%|███████████████                                           | 13/50 [00:04<00:11,  3.20epoch/s, loss=1.59]


Classified:  bert-select-mod
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████| 120/120 [02:11<00:00,  1.09s/epoch, loss=-3.09e+7]


Classified:  bert-select-mod
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  32%|██████████████████▌                                       | 16/50 [00:07<00:14,  2.29epoch/s, loss=1.45]


Classified:  bert-select-mod
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▍                                                      | 7/120 [00:05<01:25,  1.33epoch/s, loss=1.44]


Classified:  bert-select-mod
num_epochs=80 batch_size=4 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  20%|███████████▌                                              | 16/80 [00:37<02:31,  2.37s/epoch, loss=1.46]


Classified:  bert-select-more
num_epochs=120 batch_size=4 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   9%|█████▏                                                   | 11/120 [00:54<09:01,  4.97s/epoch, loss=1.43]


Classified:  bert-select-more
num_epochs=120 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   3%|█▊                                                      | 4/120 [02:14<1:05:06, 33.67s/epoch, loss=1.48]


Classified:  bert-select-more
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████| 50/50 [01:11<00:00,  1.42s/epoch, loss=-7.4e+6]


Classified:  bert-select-more
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  10%|██████                                                      | 5/50 [00:38<05:48,  7.75s/epoch, loss=1.5]


Classified:  bert-select-more
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  12%|███████                                                    | 6/50 [00:46<05:38,  7.68s/epoch, loss=1.48]


Classified:  bert-select-more
num_epochs=80 batch_size=4 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████| 80/80 [03:02<00:00,  2.28s/epoch, loss=-2.36e+13]


Classified:  bert-select-more
num_epochs=50 batch_size=64 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [00:21<00:00,  2.31epoch/s, loss=-2.52]


Classified:  bert-select-more
num_epochs=120 batch_size=64 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▍                                                      | 7/120 [00:05<01:27,  1.30epoch/s, loss=1.36]


Classified:  bert-select-more
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   3%|█▉                                                        | 4/120 [00:01<00:38,  3.00epoch/s, loss=1.58]


Classified:  bert-select-more
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                       | 6/120 [00:06<02:01,  1.07s/epoch, loss=1.41]


Classified:  bert-select-more
num_epochs=120 batch_size=1 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████| 120/120 [37:14<00:00, 18.62s/epoch, loss=-2.69e+5]


Classified:  bert-select-more
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  36%|████████████████████▉                                     | 18/50 [01:30<02:41,  5.04s/epoch, loss=1.44]


Classified:  bert-select-more
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  10%|█████▉                                                     | 8/80 [01:06<09:55,  8.28s/epoch, loss=1.55]


Classified:  bert-select-more
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [00:44<00:00,  1.13epoch/s, loss=0.712]


Classified:  bert-select-more
num_epochs=120 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   3%|█▉                                                        | 4/120 [00:29<14:21,  7.43s/epoch, loss=1.56]


Classified:  gatv1-select
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  14%|████████▎                                                  | 7/50 [00:14<01:29,  2.08s/epoch, loss=1.59]


Classified:  gatv1-select
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   2%|█▍                                                        | 3/120 [00:10<06:45,  3.47s/epoch, loss=1.54]


Classified:  gatv1-select
num_epochs=50 batch_size=64 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▌                                                       | 3/50 [00:01<00:22,  2.07epoch/s, loss=1.54]


Classified:  gatv1-select
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████| 80/80 [09:35<00:00,  7.20s/epoch, loss=-1.3e+7]


Classified:  gatv1-select
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   2%|█▍                                                        | 3/120 [00:01<01:15,  1.55epoch/s, loss=1.55]


Classified:  gatv1-select
num_epochs=80 batch_size=4 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████| 80/80 [02:44<00:00,  2.06s/epoch, loss=-7.69e+18]


Classified:  gatv1-select
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▌                                                       | 3/50 [00:22<05:49,  7.44s/epoch, loss=1.55]


Classified:  gatv1-select
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  26%|███████████████                                           | 13/50 [00:09<00:27,  1.33epoch/s, loss=1.55]


Classified:  gatv1-select
num_epochs=120 batch_size=1 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████| 120/120 [23:42<00:00, 11.85s/epoch, loss=-1.96e+6]


Classified:  gatv1-select
num_epochs=120 batch_size=64 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   9%|█████▏                                                   | 11/120 [00:03<00:31,  3.49epoch/s, loss=1.61]


Classified:  gatv1-select
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  12%|███████▏                                                 | 15/120 [00:04<00:33,  3.16epoch/s, loss=1.57]


Classified:  gatv1-select
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████| 120/120 [00:57<00:00,  2.09epoch/s, loss=-2.09e+3]


Classified:  gatv1-select
num_epochs=50 batch_size=64 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████| 50/50 [00:21<00:00,  2.30epoch/s, loss=-5.3e+3]


Classified:  gatv1-select
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▌                                                       | 3/50 [00:45<11:51, 15.13s/epoch, loss=1.55]


Classified:  gatv1-select
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   2%|█▍                                                        | 3/120 [00:10<07:01,  3.61s/epoch, loss=1.55]


Classified:  gatv1-edge
num_epochs=80 batch_size=4 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                        | 4/80 [00:41<13:02, 10.30s/epoch, loss=1.56]


Classified:  gatv1-edge
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 50/50 [06:04<00:00,  7.28s/epoch, loss=nan]


Classified:  gatv1-edge
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   8%|████▎                                                     | 9/120 [00:30<06:15,  3.39s/epoch, loss=1.59]


Classified:  gatv1-edge
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  14%|████████▎                                                  | 7/50 [02:14<13:47, 19.24s/epoch, loss=1.56]


Classified:  gatv1-edge
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [02:57<00:00,  3.55s/epoch, loss=-1.04e+6]


Classified:  gatv1-edge
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 80/80 [17:45<00:00, 13.32s/epoch, loss=nan]


Classified:  gatv1-edge
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   3%|█▉                                                        | 4/120 [00:15<07:17,  3.77s/epoch, loss=1.55]


Classified:  gatv1-edge
num_epochs=50 batch_size=64 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   4%|██▎                                                        | 2/50 [00:05<02:21,  2.94s/epoch, loss=1.93]


Classified:  gatv1-edge
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [08:29<00:00, 10.19s/epoch, loss=-4.15e+5]


Classified:  gatv1-edge
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [05:54<00:00,  7.09s/epoch, loss=-5.92]


Classified:  gatv1-edge
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  14%|████████▎                                                  | 7/50 [01:04<06:33,  9.16s/epoch, loss=1.54]


Classified:  gatv1-edge
num_epochs=80 batch_size=1 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 80/80 [18:20<00:00, 13.75s/epoch, loss=nan]


Classified:  gatv1-edge
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training:   8%|████▌                                                    | 4/50 [00:11<02:17,  3.00s/epoch, loss=-.0562]


Classified:  gatv1-edge
num_epochs=80 batch_size=4 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 80/80 [09:15<00:00,  6.94s/epoch, loss=-26.8]


Classified:  gatv1-edge
num_epochs=80 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   4%|██▏                                                        | 3/80 [00:06<02:38,  2.06s/epoch, loss=1.55]


Classified:  gatv1-edge-select
num_epochs=80 batch_size=4 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   4%|██▏                                                        | 3/80 [00:05<02:33,  1.99s/epoch, loss=1.55]


Classified:  gatv1-edge-select
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  22%|████████████▊                                             | 11/50 [00:05<00:18,  2.14epoch/s, loss=1.55]


Classified:  gatv1-edge-select
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▌                                                       | 3/50 [00:12<03:09,  4.04s/epoch, loss=1.55]


Classified:  gatv1-edge-select
num_epochs=120 batch_size=1 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                       | 6/120 [01:10<22:12, 11.69s/epoch, loss=1.54]


Classified:  gatv1-edge-select
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [12:27<00:00, 14.95s/epoch, loss=-1.45e+8]


Classified:  gatv1-edge-select
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████| 120/120 [06:38<00:00,  3.32s/epoch, loss=-7.99e+7]


Classified:  gatv1-edge-select
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▋                                                       | 5/80 [00:58<14:31, 11.62s/epoch, loss=1.55]


Classified:  gatv1-edge-select
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training:  12%|██████▉                                                | 15/120 [00:04<00:28,  3.69epoch/s, loss=0.0296]


Classified:  gatv1-edge-select
num_epochs=80 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████| 80/80 [00:23<00:00,  3.43epoch/s, loss=-4.96e+11]


Classified:  gatv1-edge-select
num_epochs=50 batch_size=64 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [00:22<00:00,  2.26epoch/s, loss=-40.3]


Classified:  gatv1-edge-select
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▌                                                       | 3/50 [00:45<11:54, 15.21s/epoch, loss=1.57]


Classified:  gatv1-edge-select
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████| 120/120 [00:43<00:00,  2.74epoch/s, loss=-.501]


Classified:  gatv1-edge-select
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  11%|██████▏                                                  | 13/120 [00:09<01:18,  1.36epoch/s, loss=1.52]


Classified:  gatv1-edge-select
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   3%|█▉                                                        | 4/120 [00:16<07:45,  4.01s/epoch, loss=1.56]


Classified:  gatv1-edge-select
num_epochs=80 batch_size=32 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 80/80 [04:36<00:00,  3.46s/epoch, loss=-1.48e+6]


Classified:  gatv2-edge
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  10%|█████▉                                                     | 5/50 [00:15<02:15,  3.01s/epoch, loss=1.92]


Classified:  gatv2-edge
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████████| 50/50 [10:43<00:00, 12.87s/epoch, loss=-inf]


Classified:  gatv2-edge
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▍                                                      | 7/120 [00:24<06:42,  3.56s/epoch, loss=1.55]


Classified:  gatv2-edge
num_epochs=80 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 80/80 [04:05<00:00,  3.07s/epoch, loss=-85]


Classified:  gatv2-edge
num_epochs=120 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   4%|██▍                                                       | 5/120 [02:24<55:25, 28.92s/epoch, loss=1.56]


Classified:  gatv2-edge
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [07:10<00:00,  8.61s/epoch, loss=-2.06e+5]


Classified:  gatv2-edge
num_epochs=120 batch_size=64 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   8%|████▊                                                    | 10/120 [00:30<05:31,  3.02s/epoch, loss=1.85]


Classified:  gatv2-edge
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   4%|██▎                                                        | 2/50 [00:05<02:18,  2.89s/epoch, loss=1.91]


Classified:  gatv2-edge
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   8%|████▎                                                     | 9/120 [00:30<06:16,  3.39s/epoch, loss=1.56]


Classified:  gatv2-edge
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████| 50/50 [06:00<00:00,  7.22s/epoch, loss=-7.44e+15]


Classified:  gatv2-edge
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  15%|███████▊                                            | 18/120 [1:44:57<9:54:45, 349.86s/epoch, loss=1.51]


Classified:  gatv2-edge
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▍                                                      | 7/120 [00:58<15:38,  8.30s/epoch, loss=1.58]


Classified:  gatv2-edge
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  26%|██████████████▋                                          | 31/120 [01:47<05:09,  3.47s/epoch, loss=1.63]


Classified:  gatv2-edge
num_epochs=80 batch_size=4 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  61%|████████████████████████████████████▏                      | 49/80 [06:41<04:13,  8.19s/epoch, loss=1.6]


Classified:  gatv2-edge
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▌                                                       | 3/50 [00:38<10:06, 12.90s/epoch, loss=1.56]


Classified:  gatv2-edge-select
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████| 50/50 [01:39<00:00,  1.98s/epoch, loss=-3.18e+13]


Classified:  gatv2-edge-select
num_epochs=120 batch_size=1 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████| 120/120 [24:29<00:00, 12.24s/epoch, loss=-5.89e+8]


Classified:  gatv2-edge-select
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████████| 120/120 [01:12<00:00,  1.65epoch/s, loss=-403]


Classified:  gatv2-edge-select
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [00:30<00:00,  1.64epoch/s, loss=-9.49e+5]


Classified:  gatv2-edge-select
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training:  73%|█████████████████████████████████████████               | 88/120 [00:27<00:10,  3.16epoch/s, loss=-.248]


Classified:  gatv2-edge-select
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████| 120/120 [07:57<00:00,  3.98s/epoch, loss=-2.74e+4]


Classified:  gatv2-edge-select
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  10%|█████▉                                                     | 5/50 [00:03<00:31,  1.42epoch/s, loss=1.55]


Classified:  gatv2-edge-select
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 120/120 [00:39<00:00,  3.05epoch/s, loss=-49]


Classified:  gatv2-edge-select
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [00:24<00:00,  2.07epoch/s, loss=-.455]


Classified:  gatv2-edge-select
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  10%|█████▉                                                     | 5/50 [00:37<05:36,  7.47s/epoch, loss=1.55]


Classified:  gatv2-edge-select
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  14%|████████▎                                                  | 7/50 [01:28<09:01, 12.60s/epoch, loss=1.53]


Classified:  gatv2-edge-select
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [00:20<00:00,  2.38epoch/s, loss=-17.3]


Classified:  gatv2-edge-select
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████| 50/50 [00:20<00:00,  2.50epoch/s, loss=-9.72e+16]


Classified:  gatv2-edge-select
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  18%|██████████▌                                                | 9/50 [01:06<05:01,  7.36s/epoch, loss=1.59]


Classified:  gatv2-edge-select
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  53%|██████████████████████████████▉                           | 64/120 [03:15<02:51,  3.06s/epoch, loss=1.7]


Classified:  rgat
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████| 120/120 [06:51<00:00,  3.43s/epoch, loss=-1.46e+23]


Classified:  rgat
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 50/50 [06:01<00:00,  7.22s/epoch, loss=nan]


Classified:  rgat
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████| 120/120 [06:47<00:00,  3.40s/epoch, loss=-34.3]


Classified:  rgat
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [02:48<00:00,  3.36s/epoch, loss=-9.34e+8]


Classified:  rgat
num_epochs=50 batch_size=32 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  20%|███████████▌                                              | 10/50 [00:33<02:15,  3.38s/epoch, loss=1.59]


Classified:  rgat
num_epochs=80 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   4%|██▏                                                        | 3/80 [01:03<27:15, 21.24s/epoch, loss=1.54]


Classified:  rgat
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▌                                                       | 3/50 [00:42<11:02, 14.09s/epoch, loss=1.55]


Classified:  rgat
num_epochs=80 batch_size=1 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   2%|█▌                                                          | 2/80 [00:55<36:19, 27.94s/epoch, loss=1.6]


Classified:  rgat
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████████| 80/80 [17:47<00:00, 13.34s/epoch, loss=-inf]


Classified:  rgat
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   2%|█▍                                                        | 3/120 [00:09<06:06,  3.14s/epoch, loss=1.93]


Classified:  rgat
num_epochs=120 batch_size=32 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|████████████████████████████████████████████████████| 120/120 [07:15<00:00,  3.63s/epoch, loss=-1.71e+4]


Classified:  rgat
num_epochs=80 batch_size=64 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   2%|█▍                                                         | 2/80 [00:06<04:00,  3.08s/epoch, loss=1.91]


Classified:  rgat
num_epochs=50 batch_size=32 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [03:01<00:00,  3.63s/epoch, loss=-28.2]


Classified:  rgat
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  23%|█████████████▎                                           | 28/120 [01:29<04:54,  3.20s/epoch, loss=1.57]


Classified:  rgat
num_epochs=80 batch_size=4 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  16%|█████████▍                                                | 13/80 [00:43<03:44,  3.35s/epoch, loss=1.55]


Classified:  rgat-select
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  18%|██████████▌                                                | 9/50 [00:18<01:25,  2.08s/epoch, loss=1.59]


Classified:  rgat-select
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   2%|▉                                                         | 2/120 [00:00<00:39,  3.02epoch/s, loss=1.92]


Classified:  rgat-select
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  16%|█████████▍                                                 | 8/50 [01:51<09:46, 13.97s/epoch, loss=1.54]


Classified:  rgat-select
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████| 120/120 [03:47<00:00,  1.90s/epoch, loss=-7.84]


Classified:  rgat-select
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   6%|███▌                                                       | 3/50 [00:10<02:45,  3.53s/epoch, loss=1.55]


Classified:  rgat-select
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                        | 4/80 [00:49<15:49, 12.49s/epoch, loss=1.54]


Classified:  rgat-select
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  36%|████████████████████▉                                     | 18/50 [00:39<01:09,  2.19s/epoch, loss=1.55]


Classified:  rgat-select
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 80/80 [19:08<00:00, 14.36s/epoch, loss=-2.11e+8]


Classified:  rgat-select
num_epochs=50 batch_size=64 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  24%|█████████████▉                                            | 12/50 [00:04<00:13,  2.90epoch/s, loss=1.62]


Classified:  rgat-select
num_epochs=50 batch_size=32 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [00:34<00:00,  1.44epoch/s, loss=-49.8]


Classified:  rgat-select
num_epochs=50 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  10%|█████▉                                                     | 5/50 [00:36<05:28,  7.31s/epoch, loss=1.56]


Classified:  rgat-select
num_epochs=80 batch_size=4 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  15%|████████▋                                                 | 12/80 [00:36<03:25,  3.03s/epoch, loss=1.54]


Classified:  rgat-select
num_epochs=50 batch_size=64 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████| 50/50 [00:21<00:00,  2.31epoch/s, loss=-2.08e+5]


Classified:  rgat-select
num_epochs=80 batch_size=4 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   2%|█▍                                                         | 2/80 [00:04<02:37,  2.02s/epoch, loss=1.95]


Classified:  rgat-select
num_epochs=80 batch_size=64 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>
Error with hyperparams {'num_epochs': 80, 'batch_size': 64, 'loss_difference_threshold': 0.001, 'hidden_dims': 512, 'dropout_rate': 0.5, 'learning_rate': 0.001, 'optimizers': <class 'torch.optim.adam.Adam'>, 'criteria': <class 'torch.nn.modules.loss.CrossEntropyLoss'>}: Unsupported data type: <class 'list'>
num_epochs=120 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>
Error with hyperparams {'num_epochs': 120, 'batch_size': 64, 'loss_difference_threshold': 0.001, 'hidden_dims': 256, 'dropout_rate': 0.3, 'learning_rate': 0.0001, 'optimizers': <class 'torch.optim.adam.Adam'>, 'criteria': <class 'torch.

Training:  16%|█████████▍                                                 | 8/50 [00:25<02:12,  3.15s/epoch, loss=1.55]


Error with hyperparams {'num_epochs': 50, 'batch_size': 4, 'loss_difference_threshold': 0.001, 'hidden_dims': 128, 'dropout_rate': 0.5, 'learning_rate': 1e-05, 'optimizers': <class 'torch.optim.adam.Adam'>, 'criteria': <class 'torch.nn.modules.loss.CrossEntropyLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  21%|████████████▎                                             | 17/80 [02:04<07:42,  7.33s/epoch, loss=1.57]


Error with hyperparams {'num_epochs': 80, 'batch_size': 1, 'loss_difference_threshold': 0.001, 'hidden_dims': 256, 'dropout_rate': 0.7, 'learning_rate': 1e-05, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.CrossEntropyLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=80 batch_size=32 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 80/80 [00:41<00:00,  1.92epoch/s, loss=-80.7]


Error with hyperparams {'num_epochs': 80, 'batch_size': 32, 'loss_difference_threshold': 0.01, 'hidden_dims': 128, 'dropout_rate': 0.7, 'learning_rate': 1e-05, 'optimizers': <class 'torch.optim.adam.Adam'>, 'criteria': <class 'torch.nn.modules.loss.NLLLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  26%|███████████████▎                                           | 13/50 [01:38<04:40,  7.58s/epoch, loss=1.5]


Error with hyperparams {'num_epochs': 50, 'batch_size': 1, 'loss_difference_threshold': 0.001, 'hidden_dims': 512, 'dropout_rate': 0.3, 'learning_rate': 0.001, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.CrossEntropyLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 120/120 [03:58<00:00,  1.99s/epoch, loss=nan]


Error with hyperparams {'num_epochs': 120, 'batch_size': 4, 'loss_difference_threshold': 0.01, 'hidden_dims': 512, 'dropout_rate': 0.7, 'learning_rate': 0.001, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.NLLLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=80 batch_size=64 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 80/80 [00:23<00:00,  3.40epoch/s, loss=-17.9]


Error with hyperparams {'num_epochs': 80, 'batch_size': 64, 'loss_difference_threshold': 0.01, 'hidden_dims': 256, 'dropout_rate': 0.7, 'learning_rate': 0.0001, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.NLLLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=50 batch_size=4 loss_difference_threshold=0.01 hidden_dims=512 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  10%|█████▉                                                     | 5/50 [00:10<01:37,  2.18s/epoch, loss=1.55]


Error with hyperparams {'num_epochs': 50, 'batch_size': 4, 'loss_difference_threshold': 0.01, 'hidden_dims': 512, 'dropout_rate': 0.3, 'learning_rate': 0.0001, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.CrossEntropyLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████| 50/50 [01:39<00:00,  1.98s/epoch, loss=-3.05e+16]


Error with hyperparams {'num_epochs': 50, 'batch_size': 4, 'loss_difference_threshold': 0.001, 'hidden_dims': 512, 'dropout_rate': 0.7, 'learning_rate': 0.0001, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.NLLLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=50 batch_size=4 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  18%|██████████▊                                                 | 9/50 [00:29<02:14,  3.28s/epoch, loss=1.5]


Error with hyperparams {'num_epochs': 50, 'batch_size': 4, 'loss_difference_threshold': 0.001, 'hidden_dims': 128, 'dropout_rate': 0.5, 'learning_rate': 0.001, 'optimizers': <class 'torch.optim.adam.Adam'>, 'criteria': <class 'torch.nn.modules.loss.CrossEntropyLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=120 batch_size=4 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   2%|█▍                                                        | 3/120 [00:05<03:50,  1.97s/epoch, loss=1.54]


Error with hyperparams {'num_epochs': 120, 'batch_size': 4, 'loss_difference_threshold': 0.01, 'hidden_dims': 256, 'dropout_rate': 0.3, 'learning_rate': 0.001, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.CrossEntropyLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [00:13<00:00,  3.71epoch/s, loss=-2.52]


Error with hyperparams {'num_epochs': 50, 'batch_size': 64, 'loss_difference_threshold': 0.001, 'hidden_dims': 128, 'dropout_rate': 0.5, 'learning_rate': 0.0001, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.NLLLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=120 batch_size=4 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   8%|████▊                                                    | 10/120 [00:20<03:45,  2.05s/epoch, loss=1.56]


Error with hyperparams {'num_epochs': 120, 'batch_size': 4, 'loss_difference_threshold': 0.001, 'hidden_dims': 256, 'dropout_rate': 0.5, 'learning_rate': 0.0001, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.CrossEntropyLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 50/50 [00:28<00:00,  1.76epoch/s, loss=-68]


Error with hyperparams {'num_epochs': 50, 'batch_size': 32, 'loss_difference_threshold': 0.001, 'hidden_dims': 256, 'dropout_rate': 0.7, 'learning_rate': 1e-05, 'optimizers': <class 'torch.optim.adam.Adam'>, 'criteria': <class 'torch.nn.modules.loss.NLLLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  12%|███████                                                    | 6/50 [00:43<05:19,  7.26s/epoch, loss=1.55]


Error with hyperparams {'num_epochs': 50, 'batch_size': 1, 'loss_difference_threshold': 0.001, 'hidden_dims': 128, 'dropout_rate': 0.3, 'learning_rate': 0.0001, 'optimizers': <class 'torch.optim.sgd.SGD'>, 'criteria': <class 'torch.nn.modules.loss.CrossEntropyLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=80 batch_size=64 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 80/80 [00:26<00:00,  3.06epoch/s, loss=-25.6]


Error with hyperparams {'num_epochs': 80, 'batch_size': 64, 'loss_difference_threshold': 0.01, 'hidden_dims': 128, 'dropout_rate': 0.7, 'learning_rate': 1e-05, 'optimizers': <class 'torch.optim.adam.Adam'>, 'criteria': <class 'torch.nn.modules.loss.NLLLoss'>}: Found input variables with inconsistent numbers of samples: [1373, 3230]
num_epochs=80 batch_size=32 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.3 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 80/80 [00:38<00:00,  2.09epoch/s, loss=nan]


Classified:  bert-select-mod-gatv1
num_epochs=80 batch_size=4 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  16%|█████████▍                                                | 13/80 [00:26<02:14,  2.01s/epoch, loss=1.61]


Classified:  bert-select-mod-gatv1
num_epochs=120 batch_size=1 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 120/120 [14:26<00:00,  7.22s/epoch, loss=nan]


Classified:  bert-select-mod-gatv1
num_epochs=80 batch_size=64 loss_difference_threshold=0.01 hidden_dims=128 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                        | 4/80 [00:01<00:32,  2.34epoch/s, loss=1.53]


Classified:  bert-select-mod-gatv1
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████████| 50/50 [00:24<00:00,  2.01epoch/s, loss=-209]


Classified:  bert-select-mod-gatv1
num_epochs=80 batch_size=32 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.5 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 80/80 [00:34<00:00,  2.34epoch/s, loss=nan]


Classified:  bert-select-mod-gatv1
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training: 100%|███████████████████████████████████████████████████████████| 50/50 [00:21<00:00,  2.37epoch/s, loss=1.7]


Classified:  bert-select-mod-gatv1
num_epochs=120 batch_size=32 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|███████████████████████████████████████████████████████| 120/120 [00:46<00:00,  2.56epoch/s, loss=-12.3]


Classified:  bert-select-mod-gatv1
num_epochs=80 batch_size=1 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  12%|███████▎                                                  | 10/80 [01:18<09:11,  7.88s/epoch, loss=1.49]


Classified:  bert-select-mod-gatv1
num_epochs=50 batch_size=32 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████████| 50/50 [00:20<00:00,  2.48epoch/s, loss=-.811]


Classified:  bert-select-mod-gatv1
num_epochs=50 batch_size=1 loss_difference_threshold=0.001 hidden_dims=512 dropout_rate=0.5 learning_rate=1e-05 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|█████████████████████████████████████████████████████| 50/50 [06:12<00:00,  7.46s/epoch, loss=-5.99e+20]


Classified:  bert-select-mod-gatv1
num_epochs=50 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.5 learning_rate=0.0001 optimizers=<class 'torch.optim.sgd.SGD'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████████| 50/50 [00:13<00:00,  3.59epoch/s, loss=-501]


Classified:  bert-select-mod-gatv1
num_epochs=80 batch_size=64 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.7 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.NLLLoss'>


Training: 100%|██████████████████████████████████████████████████████████| 80/80 [00:41<00:00,  1.92epoch/s, loss=-422]


Classified:  bert-select-mod-gatv1
num_epochs=80 batch_size=64 loss_difference_threshold=0.01 hidden_dims=256 dropout_rate=0.3 learning_rate=1e-05 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:   5%|██▉                                                        | 4/80 [00:01<00:36,  2.09epoch/s, loss=1.56]


Classified:  bert-select-mod-gatv1
num_epochs=80 batch_size=64 loss_difference_threshold=0.001 hidden_dims=128 dropout_rate=0.7 learning_rate=0.001 optimizers=<class 'torch.optim.adam.Adam'> criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>


Training:  20%|███████████▌                                              | 16/80 [00:08<00:34,  1.83epoch/s, loss=1.51]

Classified:  bert-select-mod-gatv1





In [44]:
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', None)  # Don't limit the width of the display
pd.set_option('display.max_colwidth', None)  # Don't truncate column content
    
total_results2_sorted

Unnamed: 0,data_combination,typeSet,isSimpleFC,Accuracy,Recall,Weighted-F1,F1-micro,F1-macro,train_time,hyperparams,num_epoch
341,bert,test,True,0.48452,0.48452,0.420014,0.48452,0.239557,654.921181,num_epochs=120-batch_size=1-loss_difference_threshold=0.01-hidden_dims=128-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,22
257,bert-select-more,test,True,0.497214,0.497214,0.415728,0.497214,0.217695,5.421106,num_epochs=120-batch_size=64-loss_difference_threshold=0.01-hidden_dims=512-dropout_rate=0.5-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,6
269,bert-select-more,test,True,0.448916,0.448916,0.410792,0.448916,0.244696,44.204244,num_epochs=50-batch_size=64-loss_difference_threshold=0.001-hidden_dims=512-dropout_rate=0.3-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,50
335,bert,test,True,0.487307,0.487307,0.398079,0.487307,0.195706,44.373164,num_epochs=80-batch_size=1-loss_difference_threshold=0.01-hidden_dims=256-dropout_rate=0.3-learning_rate=0.001-optimizers=<class 'torch.optim.sgd.SGD'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,6
261,bert-select-more,test,True,0.498142,0.498142,0.384236,0.498142,0.17861,6.43791,num_epochs=120-batch_size=32-loss_difference_threshold=0.01-hidden_dims=512-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,5
347,bert,test,True,0.501858,0.501858,0.381919,0.501858,0.175449,48.527019,num_epochs=120-batch_size=4-loss_difference_threshold=0.01-hidden_dims=128-dropout_rate=0.5-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,6
243,bert-select-more,test,True,0.497833,0.497833,0.38122,0.497833,0.175786,54.693893,num_epochs=120-batch_size=4-loss_difference_threshold=0.001-hidden_dims=128-dropout_rate=0.7-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,10
355,bert,test,True,0.497523,0.497523,0.379254,0.497523,0.170724,127.475587,num_epochs=50-batch_size=4-loss_difference_threshold=0.001-hidden_dims=256-dropout_rate=0.5-learning_rate=1e-05-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,16
340,bert,validation,True,0.441369,0.441369,0.375797,0.441369,0.229673,654.921181,num_epochs=120-batch_size=1-loss_difference_threshold=0.01-hidden_dims=128-dropout_rate=0.3-learning_rate=0.0001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,22
357,bert,test,True,0.504644,0.504644,0.374891,0.504644,0.168534,23.422105,num_epochs=120-batch_size=32-loss_difference_threshold=0.01-hidden_dims=512-dropout_rate=0.7-learning_rate=0.001-optimizers=<class 'torch.optim.adam.Adam'>-criteria=<class 'torch.nn.modules.loss.CrossEntropyLoss'>,7
