# Pytorch Training UI

### Import Libraries

In [3]:
# detect whether this is a google environment

COLAB_ENVIRONMENT = False

try:
    from google.colab import drive
    drive.mount('/content/drive')
    COLAB_ENVIRONMENT = True
except:
    pass

In [4]:
import sys
import os
import pickle

if COLAB_ENVIRONMENT:
    py_file_location = "./drive/MyDrive/LAB/COMP90051-A1__Groupwork__Py/PrivatePackages/pytorch" # my private packages are stored here
    home_directory = './drive/MyDrive/LAB/COMP90051-A1__Groupwork__Py/' # my home directory is stored in ./LAB of google drive
    !pip install einops
else:
    py_file_location = './PrivatePackages/pytorch'
    home_directory = './'

sys.path.append(os.path.abspath(py_file_location))

from environment import *
from utils import *

from sklearn.model_selection import train_test_split

In [5]:
from model.model_class import LSTM, BERT, LSTM_DANN, BERT_DANN

### Set Seed and Load Data

In [6]:
SEED = 2608

In [7]:
data1 = []
with open(home_directory + '/data/raw/comp90051-2024s1-project-1/domain1_train_data.json', 'r') as f:
    for line in f:
        data1.append(json.loads(line))

data2 = []
with open(home_directory + './data/raw/comp90051-2024s1-project-1/domain2_train_data.json', 'r') as f:
    for line in f:
        data2.append(json.loads(line))

data_test = []
with open(home_directory + '/data/raw/comp90051-2024s1-project-1/test_data.json', 'r') as f:
    for line in f:
        data_test.append(json.loads(line))

# create domain labels for data
for i in range(len(data1)):
    data1[i]['domain'] = 0
for i in range(len(data2)):
    data2[i]['domain'] = 1

In [8]:
# Train Val Test Split

# get labels for stratification
label1 = [instance['label'] for instance in data1]
label2 = [instance['label'] for instance in data2]

train_ix_1, val_test_ix_1 = train_test_split(range(len(data1)), test_size=0.3, random_state=SEED, stratify = label1)
train_ix_2, val_test_ix_2 = train_test_split(range(len(data2)), test_size=0.3, random_state=SEED, stratify = label2)
val_ix_1, test_ix_1 = train_test_split(val_test_ix_1, test_size=0.5, random_state=SEED, stratify = [data1[i]['label'] for i in val_test_ix_1])
val_ix_2, test_ix_2 = train_test_split(val_test_ix_2, test_size=0.5, random_state=SEED, stratify = [data2[i]['label'] for i in val_test_ix_2])

# split data according to the index from train_test_split
train_data_1 = [data1[i] for i in train_ix_1]
val_data_1 = [data1[i] for i in val_ix_1]
test_data_1 = [data1[i] for i in test_ix_1]
train_data_2 = [data2[i] for i in train_ix_2]
val_data_2 = [data2[i] for i in val_ix_2]
test_data_2 = [data2[i] for i in test_ix_2]

# combine the data
train_data = train_data_1 + train_data_2
val_data = val_data_1 + val_data_2
test_data = test_data_1 + test_data_2

---
Preprocess data

In [9]:
def W2V_DataFactory(data: list, context_window: int, seed: int, raw_token_pytorch_map: dict, k) -> list:

    """ Get W2V training data """
    
    assert context_window % 2 == 1, 'context window must be odd'

    np.random.seed(seed)

    MAX_SAMPLED_NEGATIVE_TOKENS = 10000

    retokenised_keys = list(raw_token_pytorch_map.keys())

    negative_tokens = np.random.choice(retokenised_keys, MAX_SAMPLED_NEGATIVE_TOKENS)

    negative_up_to = 0

    w2v_data = []

    for instance in tqdm(data):
        tokens = [context_window//2 * 'CLS'] + instance['text'] + [context_window//2 * raw_token_pytorch_map['PAD']]

        for i in range(context_window//2, len(tokens) - context_window//2):
            
            focus_token_retokenised = raw_token_pytorch_map.get(tokens[i], raw_token_pytorch_map['UNK'])
            context_words = set()

            for j in range(-context_window//2, context_window//2+1):
                if j != 0: # don't want to make positive sample with self
                    if tokens[j] in context_words: # CLS and Padding (being start and end) being repeated
                        continue 
                    
                    new_instance = {'token': focus_token_retokenised, 'context': raw_token_pytorch_map.get(tokens[j], raw_token_pytorch_map['UNK']), 'label': 1}
                    w2v_data.append(new_instance)
                    context_words.add(tokens[j])
            
            for j in range(len(context_words)): # sample the same number of negatives
                # TODO: different for each round?
                while True:
                    
                    if negative_up_to == MAX_SAMPLED_NEGATIVE_TOKENS:
                        negative_up_to = 0
                        #TODO: shuffle

                    sampled_negative_retokenised = negative_tokens[negative_up_to]
                    negative_up_to += 1
                    if sampled_negative_retokenised not in context_words: # didn't sample a positive case
                        break

                new_instance = {'token': focus_token_retokenised, 'context': sampled_negative_retokenised, 'label': 0}
                w2v_data.append(new_instance)
    
    return w2v_data

In [10]:
def BERT_pretrain_Generation(data: list, seed: int, raw_token_pytorch_map: dict, MAX_SENTENCE_LENGTH):
    
    np.random.seed(seed)

    MAX_SAMPLED_PROBS = 10000

    mask_randomness = np.random.uniform(0, 1, size=MAX_SAMPLED_PROBS)

    negative_up_to = 0

    bert_data = []

    for instance in tqdm(data):
        

        tokens = instance['text']

        tokens = [raw_token_pytorch_map['CLS']] + [raw_token_pytorch_map.get(token, raw_token_pytorch_map['UNK']) for token in tokens]
        tokens = tokens[:MAX_SENTENCE_LENGTH]

        # 15% of tokens are random
        masked_token_positions = np.random.choice(range(len(tokens)), int(0.15 * len(tokens)), False)
        
        # 80% becomes [MASK], 15% becomes random, 10% unchanged ith token, 10% random token
        for masked_token_position in masked_token_positions:
            if mask_randomness[negative_up_to] < 0.8:
                tokens[masked_token_position] = raw_token_pytorch_map['MASK']
            elif mask_randomness[negative_up_to] > 0.9:
                tokens[masked_token_position] = np.random.choice(tokens)
            
        
        tokens = tokens + [raw_token_pytorch_map['PAD']] * (MAX_SENTENCE_LENGTH - len(tokens))

        for masked_token_position in masked_token_positions:
            new_instance = {}
            new_instance['input'] = tokens
            new_instance['label'] = tokens[masked_token_position]
            new_instance['mask'] = masked_token_position
            new_instance['domain'] = instance['domain']
            bert_data.append(new_instance)
    
    return bert_data


In [11]:
def BERT_pretrain_DataFactory(train_data, val_data, seed, raw_token_pytorch_map, MAX_SENTENCE_LENGTH):
        
    train_data = BERT_pretrain_Generation(train_data, seed, raw_token_pytorch_map, MAX_SENTENCE_LENGTH)
    val_data = BERT_pretrain_Generation(val_data, seed, raw_token_pytorch_map, MAX_SENTENCE_LENGTH)

    train_x = [instance['input'] for instance in train_data]
    train_y = [instance['label'] for instance in train_data]
    train_mask = [instance['mask'] for instance in train_data]
    train_domain = [instance['domain'] for instance in train_data]
    val_x = [instance['input'] for instance in val_data]
    val_y = [instance['label'] for instance in val_data]
    val_mask = [instance['mask'] for instance in val_data]
    val_domain = [instance['domain'] for instance in val_data]
    

    return train_x, train_y, train_mask, train_domain, val_x, val_y, val_mask, val_domain

In [16]:
pretrain_x, pretrain_y, pretrain_mask, pretrain_dom, preval_x, preval_y, preval_mask, preval_dom = BERT_pretrain_DataFactory(train_data, val_data, SEED, raw_token_pytorch_map, MAX_SENTENCE_LENGTH)

100%|██████████| 12600/12600 [00:02<00:00, 5352.26it/s]
100%|██████████| 2700/2700 [00:00<00:00, 4138.13it/s]


In [12]:
MAX_SENTENCE_LENGTH = 512
MIN_FREQUENCY = 40 # because 40 is statistical sample requirement
MAKE_CROPPED_REMAINS_INTO_NEW_INSTANCE = False
LOW_FREQ_TOKEN = False
PAD_FRONT = False
W2V_CONTEXT_WINDOW = 5 # 2 to left, 2 to right

In [14]:
cropped_train_data = crop_sentence_length(train_data, max_sentence_length = MAX_SENTENCE_LENGTH, make_cropped_remains_into_new_instance = MAKE_CROPPED_REMAINS_INTO_NEW_INSTANCE)
cropped_val_data = crop_sentence_length(val_data, max_sentence_length =  MAX_SENTENCE_LENGTH, make_cropped_remains_into_new_instance = False)
cropped_test_data = crop_sentence_length(test_data, max_sentence_length = MAX_SENTENCE_LENGTH, make_cropped_remains_into_new_instance = False)
cropped_future_data = crop_sentence_length(data_test, max_sentence_length = MAX_SENTENCE_LENGTH, make_cropped_remains_into_new_instance = False)

100%|██████████| 12600/12600 [00:00<00:00, 63954.51it/s]
100%|██████████| 2700/2700 [00:00<00:00, 140970.97it/s]
100%|██████████| 2700/2700 [00:00<00:00, 97740.63it/s]
100%|██████████| 4000/4000 [00:00<00:00, 102430.01it/s]


In [15]:
raw_token_pytorch_map = get_raw_token_pytorch_map(data = cropped_train_data, min_frequency = MIN_FREQUENCY) 

100%|██████████| 12600/12600 [00:00<00:00, 31785.90it/s]


In [19]:
train_x, train_y, val_x, val_y, test_x, test_y, train_dom, val_dom, test_dom, future_x = Data_Factory(cropped_train_data, \
                                                              cropped_val_data, \
                                                                cropped_test_data, \
                                                                    cropped_future_data, \
                                                                        MAX_SENTENCE_LENGTH, \
                                                                            raw_token_pytorch_map, \
                                                                                CLS=True, \
                                                                                    low_freq_special_token=LOW_FREQ_TOKEN, \
                                                                                        pad_front=PAD_FRONT)

100%|██████████| 12600/12600 [00:00<00:00, 71599.40it/s]
100%|██████████| 2700/2700 [00:00<00:00, 72250.99it/s]
100%|██████████| 2700/2700 [00:00<00:00, 72182.38it/s]
100%|██████████| 4000/4000 [00:00<00:00, 72797.24it/s]


In [12]:
train_x = train_x[:200]
train_y = train_y[:200]
train_dom = train_dom[:200]
val_x = val_x[:200]
val_y = val_y[:200]
val_dom = val_dom[:200]

In [13]:
pos_prior, neg_prior = get_distribution(train_y)
pos_prior, neg_prior

(0.53, 0.47)

In [14]:
pos_dom_prior, neg_dom_prior = get_distribution(train_dom)
pos_dom_prior, neg_dom_prior

(0.0, 1.0)

In [None]:
# class Dataset():
#     """ Pytorch style dataset """

#     def __init__(self, data, maxlen):
#         self.data = data
#         self.maxlen = maxlen
    
#     def __len__(self):
#         return len(self.data)
    
#     def __getitem__(self, index):
#         return self.data[index]['text'], self.data[index]['label']
#         # return self.data[index]['text'], self.data[index]['label'], self.data[index]['domain']

---
### Models

#### 1. Prediction only

In [None]:
# LSTM

class LSTM_config:
    # ----------------- architectual hyperparameters ----------------- #
    d_model = 256
    n_recurrent_layers = 1
    bidirectional = True
    n_heads = 8
    dropout = 0.1
    n_mlp_layers = 0
    flatten = False
    activation = nn.ReLU()
    res_learning = False
    mask_flag = False # TODO
    train_embedding = False
    # ----------------- optimisation hyperparameters ----------------- #
    random_state = SEED
    batch_size = 128
    epochs = 32
    lr = 1e-3
    patience = 5
    # loss = nn.BCELoss()
    loss = nn.BCELoss(weight=torch.FloatTensor([pos_prior, neg_prior]))
    # validation_loss = nn.BCELoss()
    validation_loss = nn.BCELoss(weight=torch.FloatTensor([pos_prior, neg_prior]))
    regularisation_loss = None
    scheduler = True
    grad_clip = True
    # ----------------- operation hyperparameters ----------------- #
    d_output = 2
    seq_len = MAX_SENTENCE_LENGTH
    n_unique_tokens = len(raw_token_pytorch_map)
    # ----------------- saving hyperparameters ----------------- #
    rootpath = home_directory + './'
    saving_address = home_directory +  f'./results/'
    name = f'LSTM_Classifier'
    


model = LSTM(LSTM_config) # initialise the model

# train the model (all cells except this one will print training log and evaluation at each batch)
best_epoch = model.fit(train_x, train_y, train_dom, val_x, val_y, val_dom)
print()

# as model automatically saves best epoch, will now load the best epoch and evaluate on test set
model.load()
model.eval(val_x, val_y, val_dom, best_epoch, evaluation_mode = True)

  from .autonotebook import tqdm as notebook_tqdm
  0%|          | 0/2 [00:06<?, ?it/s]


KeyboardInterrupt: 

In [16]:
# BERT

class BERT_config:
    # ----------------- architectual hyperparameters ----------------- #
    d_model = 128
    d_ff = 512 # = 4* d_model
    n_heads = 8
    dropout = 0.1
    e_layers = 3 
    embedding_aggregation = 'cls' # TODO
    n_mlp_layers = 0
    res_learning = False
    activation = nn.ReLU()
    mask_flag = False # causal mask
    train_embedding = False
    # ----------------- optimisation hyperparameters ----------------- #
    random_state = SEED
    batch_size = 128
    epochs = 2
    lr = 1e-3
    patience = 2
    # loss = nn.BCELoss()
    loss = nn.BCELoss(weight=torch.FloatTensor([pos_prior, neg_prior]))
    # validation_loss = nn.BCELoss()
    validation_loss = nn.BCELoss(weight=torch.FloatTensor([pos_prior, neg_prior]))
    regularisation_loss = None
    scheduler = False
    grad_clip = False
    # ----------------- operation hyperparameters ----------------- #
    d_output = 2
    seq_len = MAX_SENTENCE_LENGTH
    n_unique_tokens = len(raw_token_pytorch_map)
    # ----------------- saving hyperparameters ----------------- #
    rootpath = home_directory + './'
    saving_address = home_directory + f'./results/'
    name = f'BERT_Classifier'
    


model = BERT(BERT_config) # initialise the model

# train the model (all cells except this one will print training log and evaluation at each batch)
best_epoch = model.fit(train_x, train_y, train_dom, val_x, val_y, val_dom)
print()

# as model automatically saves best epoch, will now load the best epoch and evaluate on test set
model.load()
model.eval(val_x, val_y, val_dom, best_epoch, evaluation_mode = True)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 2/2 [00:03<00:00,  1.81s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 1 Train | Loss:  0.7894 | Accuracy:  0.4900| F1:  0.5641 | Balanced Accuracy:  0.4815 |  
                    Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.5641 | Domain 1 Balanced Accuracy:  0.4815 | 
                    Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 1 Val   | Loss:  0.5774 | Accuracy:  0.5100| F1:  0.0000 | Balanced Accuracy:  0.5000 | 
                Domain 1 Accuracy:  0.5100| Domain 1 F1:  0.0000 | Domain 1 Balanced Accuracy:  0.5000 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.60s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 2 Train | Loss:  0.4621 | Accuracy:  0.5200| F1:  0.4000 | Balanced Accuracy:  0.5339 |  
                    Domain 1 Accuracy:  0.5200| Domain 1 F1:  0.4000 | Domain 1 Balanced Accuracy:  0.5339 | 
                    Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 2 Val   | Loss:  0.5292 | Accuracy:  0.4900| F1:  0.6577 | Balanced Accuracy:  0.5000 | 
                Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.6577 | Domain 1 Balanced Accuracy:  0.5000 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan

Epoch 2 Val   | Loss:  0.5292 | Accuracy:  0.4900| F1:  0.6577 | Balanced Accuracy:  0.5000 | 
                Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.6577 | Domain 1 Balanced Accuracy:  0.5000 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


---
# DANN

In [None]:
# LSTM_DANN

class LSTM_DANN_config:
    # ----------------- architectual hyperparameters ----------------- #
    d_model = 256
    n_recurrent_layers = 1
    bidirectional = True
    n_heads = 0
    dropout = 0.1
    n_mlp_clf_layers = 0
    n_mlp_dom_layers = 0
    flatten = False
    activation = nn.ReLU()
    res_learning = False
    mask_flag = False # TODO
    train_embedding = False
    # ----------------- optimisation hyperparameters ----------------- #
    random_state = SEED
    batch_size = 128
    epochs = 32
    lr = 1e-3
    patience = 5
    # loss = nn.BCELoss()
    loss = nn.BCELoss(weight=torch.FloatTensor([pos_prior, neg_prior]))
    # validation_loss = nn.BCELoss()
    validation_loss = nn.BCELoss(weight=torch.FloatTensor([pos_prior, neg_prior]))
    domain_loss = nn.BCELoss()
    alpha = 0.5
    regularisation_loss = None
    scheduler = True
    grad_clip = True
    # ----------------- operation hyperparameters ----------------- #
    d_output = 2
    seq_len = MAX_SENTENCE_LENGTH
    n_unique_tokens = len(raw_token_pytorch_map)
    # ----------------- saving hyperparameters ----------------- #
    rootpath = home_directory + './'
    saving_address = home_directory +  f'./results/'
    name = f'LSTM_DANN_Classifier'
    


model = LSTM_DANN(LSTM_DANN_config) # initialise the model

# train the model (all cells except this one will print training log and evaluation at each batch)
best_epoch = model.fit(train_x, train_y, train_dom, val_x, val_y, val_dom)
print()

# as model automatically saves best epoch, will now load the best epoch and evaluate on test set
model.load()
model.eval(val_x, val_y, val_dom, best_epoch, evaluation_mode = True)

  from .autonotebook import tqdm as notebook_tqdm
  nn.utils.clip_grad_norm(self.model.parameters(), 2)
100%|██████████| 2/2 [00:03<00:00,  1.70s/it]


Epoch 1 Train | Classification Loss:  0.3456 | Accuracy:  0.4900| F1:  0.6047 | Balanced Accuracy:  0.4743 | Domain Loss:  0.3872 | Domain Accuracy:  0.4900




Epoch 1 Val   | Classification Loss:  0.3059 | Accuracy:  0.9400| F1:  0.9691 | Balanced Accuracy:  0.9400 | Domain Loss:  1.0035 | Domain Accuracy:  0.0600


  nn.utils.clip_grad_norm(self.model.parameters(), 2)
100%|██████████| 2/2 [00:03<00:00,  1.66s/it]


Epoch 2 Train | Classification Loss:  0.3411 | Accuracy:  0.6050| F1:  0.7189 | Balanced Accuracy:  0.5828 | Domain Loss:  0.5406 | Domain Accuracy:  0.6050




Epoch 2 Val   | Classification Loss:  0.3100 | Accuracy:  0.9350| F1:  0.9664 | Balanced Accuracy:  0.9350 | Domain Loss:  1.2413 | Domain Accuracy:  0.0600


  nn.utils.clip_grad_norm(self.model.parameters(), 2)
100%|██████████| 2/2 [00:03<00:00,  1.57s/it]


Epoch 3 Train | Classification Loss:  0.3373 | Accuracy:  0.6200| F1:  0.7305 | Balanced Accuracy:  0.5976 | Domain Loss:  0.6193 | Domain Accuracy:  0.6200




Epoch 3 Val   | Classification Loss:  0.3428 | Accuracy:  0.3550| F1:  0.5240 | Balanced Accuracy:  0.3550 | Domain Loss:  1.1867 | Domain Accuracy:  0.0400


  nn.utils.clip_grad_norm(self.model.parameters(), 2)
100%|██████████| 2/2 [00:03<00:00,  1.69s/it]


Epoch 4 Train | Classification Loss:  0.3349 | Accuracy:  0.5900| F1:  0.6095 | Balanced Accuracy:  0.5891 | Domain Loss:  0.5586 | Domain Accuracy:  0.5900




Epoch 4 Val   | Classification Loss:  0.3087 | Accuracy:  0.9300| F1:  0.9637 | Balanced Accuracy:  0.9300 | Domain Loss:  0.9228 | Domain Accuracy:  0.0250


  nn.utils.clip_grad_norm(self.model.parameters(), 2)
100%|██████████| 2/2 [00:03<00:00,  1.61s/it]


Epoch 5 Train | Classification Loss:  0.3307 | Accuracy:  0.6200| F1:  0.7305 | Balanced Accuracy:  0.5976 | Domain Loss:  0.4404 | Domain Accuracy:  0.6200




Epoch 5 Val   | Classification Loss:  0.3042 | Accuracy:  0.9450| F1:  0.9717 | Balanced Accuracy:  0.9450 | Domain Loss:  0.7634 | Domain Accuracy:  0.0250


  nn.utils.clip_grad_norm(self.model.parameters(), 2)
100%|██████████| 2/2 [00:03<00:00,  1.63s/it]


Epoch 6 Train | Classification Loss:  0.3298 | Accuracy:  0.6350| F1:  0.7420 | Balanced Accuracy:  0.6123 | Domain Loss:  0.3594 | Domain Accuracy:  0.6350




Epoch 6 Val   | Classification Loss:  0.3162 | Accuracy:  0.8600| F1:  0.9247 | Balanced Accuracy:  0.8600 | Domain Loss:  0.5915 | Domain Accuracy:  1.0000


  nn.utils.clip_grad_norm(self.model.parameters(), 2)
100%|██████████| 2/2 [00:03<00:00,  1.58s/it]


Epoch 7 Train | Classification Loss:  0.3306 | Accuracy:  0.6600| F1:  0.7500 | Balanced Accuracy:  0.6407 | Domain Loss:  0.2772 | Domain Accuracy:  0.6600




Epoch 7 Val   | Classification Loss:  0.3336 | Accuracy:  0.7350| F1:  0.8473 | Balanced Accuracy:  0.7350 | Domain Loss:  0.4540 | Domain Accuracy:  1.0000

Epoch 5 Val   | Classification Loss:  0.3042 | Accuracy:  0.9450| F1:  0.9717 | Balanced Accuracy:  0.9450 | Domain Loss:  0.7634 | Domain Accuracy:  0.0250




In [17]:
# BERT

class BERT_DANN_config:
    # ----------------- architectual hyperparameters ----------------- #
    d_model = 128
    d_ff = 512 # = 4* d_model
    n_heads = 8
    dropout = 0.1
    e_layers = 3 
    embedding_aggregation = 'cls' # TODO
    n_mlp_clf_layers = 0
    n_mlp_dom_layers = 0
    res_learning = False
    activation = nn.ReLU()
    mask_flag = False # causal mask
    train_embedding = False
    # ----------------- optimisation hyperparameters ----------------- #
    random_state = SEED
    batch_size = 128
    epochs = 32
    lr = 1e-3
    patience = 32
    # loss = nn.BCELoss()
    loss = nn.BCELoss(weight=torch.FloatTensor([pos_prior, neg_prior]))
    # validation_loss = nn.BCELoss()
    validation_loss = nn.BCELoss(weight=torch.FloatTensor([pos_prior, neg_prior]))
    domain_loss = nn.BCELoss()
    alpha = 0.5
    regularisation_loss = None
    scheduler = False
    grad_clip = False
    # ----------------- operation hyperparameters ----------------- #
    d_output = 2
    seq_len = MAX_SENTENCE_LENGTH
    n_unique_tokens = len(raw_token_pytorch_map)
    # ----------------- saving hyperparameters ----------------- #
    rootpath = home_directory + './'
    saving_address = home_directory + f'./results/'
    name = f'BERT_Classifier'
    


model = BERT_DANN(BERT_DANN_config) # initialise the model

# train the model (all cells except this one will print training log and evaluation at each batch)
best_epoch = model.fit(train_x, train_y, train_dom, val_x, val_y, val_dom)
print()

# as model automatically saves best epoch, will now load the best epoch and evaluate on test set
model.load()
model.eval(val_x, val_y, val_dom, best_epoch, evaluation_mode = True)

100%|██████████| 2/2 [00:02<00:00,  1.37s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 1 Train | Classification Loss:  0.3791 | Accuracy:  0.5100| F1:  0.4494 | Balanced Accuracy:  0.5185 | 
                Domain Loss:  1.3177 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5100| Domain 1 F1:  0.4494 | Domain 1 Balanced Accuracy:  0.5185 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 1 Val   | Classification Loss:  0.3434 | Accuracy:  0.5550| F1:  0.5616 | Balanced Accuracy:  0.5555 |  
                            Domain Loss:  6.8106 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.5616 | Domain 1 Balanced Accuracy:  0.5555 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.62s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 2 Train | Classification Loss:  0.3695 | Accuracy:  0.5000| F1:  0.4048 | Balanced Accuracy:  0.5114 | 
                Domain Loss:  3.6131 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5000| Domain 1 F1:  0.4048 | Domain 1 Balanced Accuracy:  0.5114 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 2 Val   | Classification Loss:  0.3552 | Accuracy:  0.5100| F1:  0.0000 | Balanced Accuracy:  0.5000 |  
                            Domain Loss:  8.4002 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5100| Domain 1 F1:  0.0000 | Domain 1 Balanced Accuracy:  0.5000 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:02<00:00,  1.33s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 3 Train | Classification Loss:  0.3515 | Accuracy:  0.4900| F1:  0.2917 | Balanced Accuracy:  0.5086 | 
                Domain Loss:  4.1389 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.2917 | Domain 1 Balanced Accuracy:  0.5086 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 3 Val   | Classification Loss:  0.3472 | Accuracy:  0.4600| F1:  0.6250 | Balanced Accuracy:  0.4690 |  
                            Domain Loss:  8.4252 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4600| Domain 1 F1:  0.6250 | Domain 1 Balanced Accuracy:  0.4690 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.62s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 4 Train | Classification Loss:  0.3429 | Accuracy:  0.5400| F1:  0.6515 | Balanced Accuracy:  0.5227 | 
                Domain Loss:  4.0955 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5400| Domain 1 F1:  0.6515 | Domain 1 Balanced Accuracy:  0.5227 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 4 Val   | Classification Loss:  0.3450 | Accuracy:  0.5100| F1:  0.0392 | Balanced Accuracy:  0.5004 |  
                            Domain Loss:  8.1359 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5100| Domain 1 F1:  0.0392 | Domain 1 Balanced Accuracy:  0.5004 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:04<00:00,  2.08s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 5 Train | Classification Loss:  0.3488 | Accuracy:  0.5250| F1:  0.3165 | Balanced Accuracy:  0.5453 | 
                Domain Loss:  3.9569 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5250| Domain 1 F1:  0.3165 | Domain 1 Balanced Accuracy:  0.5453 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 5 Val   | Classification Loss:  0.3463 | Accuracy:  0.5100| F1:  0.0000 | Balanced Accuracy:  0.5000 |  
                            Domain Loss:  7.8314 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5100| Domain 1 F1:  0.0000 | Domain 1 Balanced Accuracy:  0.5000 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.88s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 6 Train | Classification Loss:  0.3477 | Accuracy:  0.5400| F1:  0.3867 | Balanced Accuracy:  0.5570 | 
                Domain Loss:  3.8133 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5400| Domain 1 F1:  0.3867 | Domain 1 Balanced Accuracy:  0.5570 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 6 Val   | Classification Loss:  0.3460 | Accuracy:  0.4600| F1:  0.6197 | Balanced Accuracy:  0.4686 |  
                            Domain Loss:  7.5245 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4600| Domain 1 F1:  0.6197 | Domain 1 Balanced Accuracy:  0.4686 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.95s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 7 Train | Classification Loss:  0.3453 | Accuracy:  0.5150| F1:  0.6312 | Balanced Accuracy:  0.4979 | 
                Domain Loss:  3.6628 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5150| Domain 1 F1:  0.6312 | Domain 1 Balanced Accuracy:  0.4979 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 7 Val   | Classification Loss:  0.3492 | Accuracy:  0.4900| F1:  0.6577 | Balanced Accuracy:  0.5000 |  
                            Domain Loss:  7.2081 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.6577 | Domain 1 Balanced Accuracy:  0.5000 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:04<00:00,  2.04s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 8 Train | Classification Loss:  0.3415 | Accuracy:  0.5350| F1:  0.6826 | Balanced Accuracy:  0.5089 | 
                Domain Loss:  3.5102 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5350| Domain 1 F1:  0.6826 | Domain 1 Balanced Accuracy:  0.5089 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 8 Val   | Classification Loss:  0.3449 | Accuracy:  0.5550| F1:  0.5528 | Balanced Accuracy:  0.5551 |  
                            Domain Loss:  6.8872 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.5528 | Domain 1 Balanced Accuracy:  0.5551 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.64s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 9 Train | Classification Loss:  0.3513 | Accuracy:  0.5000| F1:  0.4186 | Balanced Accuracy:  0.5102 | 
                Domain Loss:  3.3524 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5000| Domain 1 F1:  0.4186 | Domain 1 Balanced Accuracy:  0.5102 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 9 Val   | Classification Loss:  0.3457 | Accuracy:  0.5100| F1:  0.0000 | Balanced Accuracy:  0.5000 |  
                            Domain Loss:  6.5507 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5100| Domain 1 F1:  0.0000 | Domain 1 Balanced Accuracy:  0.5000 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.72s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 10 Train | Classification Loss:  0.3506 | Accuracy:  0.4700| F1:  0.3614 | Balanced Accuracy:  0.4819 | 
                Domain Loss:  3.1837 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.4700| Domain 1 F1:  0.3614 | Domain 1 Balanced Accuracy:  0.4819 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 10 Val   | Classification Loss:  0.3455 | Accuracy:  0.5550| F1:  0.6616 | Balanced Accuracy:  0.5615 |  
                            Domain Loss:  6.2120 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.6616 | Domain 1 Balanced Accuracy:  0.5615 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:02<00:00,  1.48s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 11 Train | Classification Loss:  0.3492 | Accuracy:  0.5250| F1:  0.6494 | Balanced Accuracy:  0.5055 | 
                Domain Loss:  3.0194 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5250| Domain 1 F1:  0.6494 | Domain 1 Balanced Accuracy:  0.5055 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 11 Val   | Classification Loss:  0.3477 | Accuracy:  0.4900| F1:  0.6577 | Balanced Accuracy:  0.5000 |  
                            Domain Loss:  5.8782 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.6577 | Domain 1 Balanced Accuracy:  0.5000 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:02<00:00,  1.41s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 12 Train | Classification Loss:  0.3432 | Accuracy:  0.5350| F1:  0.6714 | Balanced Accuracy:  0.5119 | 
                Domain Loss:  2.8586 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5350| Domain 1 F1:  0.6714 | Domain 1 Balanced Accuracy:  0.5119 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 12 Val   | Classification Loss:  0.3457 | Accuracy:  0.4750| F1:  0.6263 | Balanced Accuracy:  0.4833 |  
                            Domain Loss:  5.5524 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4750| Domain 1 F1:  0.6263 | Domain 1 Balanced Accuracy:  0.4833 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:02<00:00,  1.40s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 13 Train | Classification Loss:  0.3508 | Accuracy:  0.4850| F1:  0.5830 | Balanced Accuracy:  0.4726 | 
                Domain Loss:  2.6977 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.4850| Domain 1 F1:  0.5830 | Domain 1 Balanced Accuracy:  0.4726 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 13 Val   | Classification Loss:  0.3452 | Accuracy:  0.5450| F1:  0.6513 | Balanced Accuracy:  0.5513 |  
                            Domain Loss:  5.2349 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5450| Domain 1 F1:  0.6513 | Domain 1 Balanced Accuracy:  0.5513 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.91s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 14 Train | Classification Loss:  0.3439 | Accuracy:  0.5250| F1:  0.5992 | Balanced Accuracy:  0.5158 | 
                Domain Loss:  2.5428 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5250| Domain 1 F1:  0.5992 | Domain 1 Balanced Accuracy:  0.5158 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 14 Val   | Classification Loss:  0.3476 | Accuracy:  0.4900| F1:  0.6577 | Balanced Accuracy:  0.5000 |  
                            Domain Loss:  4.9279 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.6577 | Domain 1 Balanced Accuracy:  0.5000 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.88s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 15 Train | Classification Loss:  0.3424 | Accuracy:  0.5400| F1:  0.6593 | Balanced Accuracy:  0.5209 | 
                Domain Loss:  2.3933 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5400| Domain 1 F1:  0.6593 | Domain 1 Balanced Accuracy:  0.5209 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 15 Val   | Classification Loss:  0.3483 | Accuracy:  0.4900| F1:  0.6577 | Balanced Accuracy:  0.5000 |  
                            Domain Loss:  4.6290 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.6577 | Domain 1 Balanced Accuracy:  0.5000 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.89s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 16 Train | Classification Loss:  0.3455 | Accuracy:  0.5250| F1:  0.6735 | Balanced Accuracy:  0.4995 | 
                Domain Loss:  2.2474 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5250| Domain 1 F1:  0.6735 | Domain 1 Balanced Accuracy:  0.4995 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 16 Val   | Classification Loss:  0.3503 | Accuracy:  0.4900| F1:  0.6577 | Balanced Accuracy:  0.5000 |  
                            Domain Loss:  4.3383 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.6577 | Domain 1 Balanced Accuracy:  0.5000 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.71s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 17 Train | Classification Loss:  0.3422 | Accuracy:  0.5250| F1:  0.6758 | Balanced Accuracy:  0.4989 | 
                Domain Loss:  2.1047 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5250| Domain 1 F1:  0.6758 | Domain 1 Balanced Accuracy:  0.4989 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 17 Val   | Classification Loss:  0.3474 | Accuracy:  0.4750| F1:  0.6441 | Balanced Accuracy:  0.4847 |  
                            Domain Loss:  4.0546 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4750| Domain 1 F1:  0.6441 | Domain 1 Balanced Accuracy:  0.4847 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:02<00:00,  1.48s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 18 Train | Classification Loss:  0.3373 | Accuracy:  0.5550| F1:  0.6833 | Balanced Accuracy:  0.5326 | 
                Domain Loss:  1.9664 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.6833 | Domain 1 Balanced Accuracy:  0.5326 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 18 Val   | Classification Loss:  0.3476 | Accuracy:  0.4750| F1:  0.6441 | Balanced Accuracy:  0.4847 |  
                            Domain Loss:  3.7808 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4750| Domain 1 F1:  0.6441 | Domain 1 Balanced Accuracy:  0.4847 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.63s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 19 Train | Classification Loss:  0.3457 | Accuracy:  0.5550| F1:  0.6942 | Balanced Accuracy:  0.5296 | 
                Domain Loss:  1.8322 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.6942 | Domain 1 Balanced Accuracy:  0.5296 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 19 Val   | Classification Loss:  0.3477 | Accuracy:  0.4750| F1:  0.6441 | Balanced Accuracy:  0.4847 |  
                            Domain Loss:  3.5164 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4750| Domain 1 F1:  0.6441 | Domain 1 Balanced Accuracy:  0.4847 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.97s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 20 Train | Classification Loss:  0.3370 | Accuracy:  0.5800| F1:  0.6866 | Balanced Accuracy:  0.5616 | 
                Domain Loss:  1.7028 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5800| Domain 1 F1:  0.6866 | Domain 1 Balanced Accuracy:  0.5616 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 20 Val   | Classification Loss:  0.3437 | Accuracy:  0.5900| F1:  0.6639 | Balanced Accuracy:  0.5946 |  
                            Domain Loss:  3.2588 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5900| Domain 1 F1:  0.6639 | Domain 1 Balanced Accuracy:  0.5946 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.83s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 21 Train | Classification Loss:  0.3404 | Accuracy:  0.5550| F1:  0.6307 | Balanced Accuracy:  0.5447 | 
                Domain Loss:  1.5760 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.6307 | Domain 1 Balanced Accuracy:  0.5447 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 21 Val   | Classification Loss:  0.3488 | Accuracy:  0.4750| F1:  0.6416 | Balanced Accuracy:  0.4845 |  
                            Domain Loss:  3.0173 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4750| Domain 1 F1:  0.6416 | Domain 1 Balanced Accuracy:  0.4845 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:02<00:00,  1.42s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 22 Train | Classification Loss:  0.3385 | Accuracy:  0.5550| F1:  0.6877 | Balanced Accuracy:  0.5314 | 
                Domain Loss:  1.4590 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.6877 | Domain 1 Balanced Accuracy:  0.5314 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 22 Val   | Classification Loss:  0.3564 | Accuracy:  0.4900| F1:  0.6577 | Balanced Accuracy:  0.5000 |  
                            Domain Loss:  2.7839 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4900| Domain 1 F1:  0.6577 | Domain 1 Balanced Accuracy:  0.5000 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.78s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 23 Train | Classification Loss:  0.3489 | Accuracy:  0.5200| F1:  0.6800 | Balanced Accuracy:  0.4918 | 
                Domain Loss:  1.3457 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5200| Domain 1 F1:  0.6800 | Domain 1 Balanced Accuracy:  0.4918 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 23 Val   | Classification Loss:  0.3429 | Accuracy:  0.5800| F1:  0.6410 | Balanced Accuracy:  0.5836 |  
                            Domain Loss:  2.5513 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5800| Domain 1 F1:  0.6410 | Domain 1 Balanced Accuracy:  0.5836 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.86s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 24 Train | Classification Loss:  0.3429 | Accuracy:  0.5300| F1:  0.5300 | Balanced Accuracy:  0.5319 | 
                Domain Loss:  1.2282 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5300| Domain 1 F1:  0.5300 | Domain 1 Balanced Accuracy:  0.5319 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 24 Val   | Classification Loss:  0.3417 | Accuracy:  0.5550| F1:  0.4331 | Balanced Accuracy:  0.5509 |  
                            Domain Loss:  2.3307 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.4331 | Domain 1 Balanced Accuracy:  0.5509 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.96s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 25 Train | Classification Loss:  0.3428 | Accuracy:  0.5950| F1:  0.6124 | Balanced Accuracy:  0.5944 | 
                Domain Loss:  1.1276 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5950| Domain 1 F1:  0.6124 | Domain 1 Balanced Accuracy:  0.5944 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 25 Val   | Classification Loss:  0.3607 | Accuracy:  0.4750| F1:  0.6441 | Balanced Accuracy:  0.4847 |  
                            Domain Loss:  2.1449 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.4750| Domain 1 F1:  0.6441 | Domain 1 Balanced Accuracy:  0.4847 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.72s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 26 Train | Classification Loss:  0.3351 | Accuracy:  0.5550| F1:  0.6983 | Balanced Accuracy:  0.5284 | 
                Domain Loss:  1.0362 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.6983 | Domain 1 Balanced Accuracy:  0.5284 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 26 Val   | Classification Loss:  0.3482 | Accuracy:  0.5350| F1:  0.6568 | Balanced Accuracy:  0.5423 |  
                            Domain Loss:  1.9612 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5350| Domain 1 F1:  0.6568 | Domain 1 Balanced Accuracy:  0.5423 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.83s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 27 Train | Classification Loss:  0.3311 | Accuracy:  0.5950| F1:  0.6773 | Balanced Accuracy:  0.5818 | 
                Domain Loss:  0.9472 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5950| Domain 1 F1:  0.6773 | Domain 1 Balanced Accuracy:  0.5818 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 27 Val   | Classification Loss:  0.3409 | Accuracy:  0.5700| F1:  0.5376 | Balanced Accuracy:  0.5688 |  
                            Domain Loss:  1.7936 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5700| Domain 1 F1:  0.5376 | Domain 1 Balanced Accuracy:  0.5688 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.77s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 28 Train | Classification Loss:  0.3289 | Accuracy:  0.6100| F1:  0.6139 | Balanced Accuracy:  0.6116 | 
                Domain Loss:  0.8681 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.6100| Domain 1 F1:  0.6139 | Domain 1 Balanced Accuracy:  0.6116 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 28 Val   | Classification Loss:  0.3456 | Accuracy:  0.5600| F1:  0.6480 | Balanced Accuracy:  0.5652 |  
                            Domain Loss:  1.6533 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5600| Domain 1 F1:  0.6480 | Domain 1 Balanced Accuracy:  0.5652 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.74s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 29 Train | Classification Loss:  0.3306 | Accuracy:  0.5950| F1:  0.6989 | Balanced Accuracy:  0.5764 | 
                Domain Loss:  0.7999 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.5950| Domain 1 F1:  0.6989 | Domain 1 Balanced Accuracy:  0.5764 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 29 Val   | Classification Loss:  0.3506 | Accuracy:  0.5400| F1:  0.6541 | Balanced Accuracy:  0.5468 |  
                            Domain Loss:  1.5201 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5400| Domain 1 F1:  0.6541 | Domain 1 Balanced Accuracy:  0.5468 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:02<00:00,  1.40s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 30 Train | Classification Loss:  0.3139 | Accuracy:  0.6500| F1:  0.7266 | Balanced Accuracy:  0.6355 | 
                Domain Loss:  0.7353 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.6500| Domain 1 F1:  0.7266 | Domain 1 Balanced Accuracy:  0.6355 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 30 Val   | Classification Loss:  0.3445 | Accuracy:  0.5550| F1:  0.5083 | Balanced Accuracy:  0.5533 |  
                            Domain Loss:  1.3859 | Domain Accuracy:  0.0000 |  
                            Domain 1 Accuracy:  0.5550| Domain 1 F1:  0.5083 | Domain 1 Balanced Accuracy:  0.5533 |  
                            Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


100%|██████████| 2/2 [00:03<00:00,  1.54s/it]
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 31 Train | Classification Loss:  0.3133 | Accuracy:  0.6750| F1:  0.6701 | Balanced Accuracy:  0.6783 | 
                Domain Loss:  0.6697 | Domain Accuracy:  0.0000 | 
                Domain 1 Accuracy:  0.6750| Domain 1 F1:  0.6701 | Domain 1 Balanced Accuracy:  0.6783 | 
                Domain 2 Accuracy:     nan| Domain 2 F1:  0.0000 | Domain 2 Balanced Accuracy:     nan


KeyboardInterrupt: 

---
# Evaluation and Inference

In [None]:
model.load()
model.eval(val_x, val_y, best_epoch, evaluation_mode = True)
model.eval(test_x, test_y, best_epoch, evaluation_mode = True)

In [None]:
EXPERIMENT_NAME = '2lstm_unidir_512d_8_512t40_wbce'

In [None]:
future_pred_y = model.predict(future_x)

future_pred_y = [1 if x[1] > x[0] else 0 for x in future_pred_y]

predictions = pd.DataFrame({'id': range(len(future_pred_y)), 'class': future_pred_y})
predictions.to_csv(home_directory + f'predictions/{EXPERIMENT_NAME}_classification.csv', index=False)