In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import CLIPTokenizer, CLIPTextModel, CLIPVisionModel
from torch.utils.data import DataLoader, Dataset
import PIL
import os
from tqdm import tqdm
import pandas as pd
import torchvision.transforms as transforms
import regex
import numpy as np
from sklearn.model_selection import train_test_split
import random
from collections import Counter
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [30]:
def comp_type_map(comp_type):
    type_to_label = {'Direct': 0, 'Metaphorical': 1, 'Semantic list': 2, 'Reduplication': 3, 'Single': 4}
    return type_to_label[comp_type]

def label_to_comp_type(label):
    label_to_type = {0: 'Direct', 1: 'Metaphorical', 2: 'Semantic list', 3: 'Reduplication', 4: 'Single'}
    return label_to_type[label]

In [62]:
class EmojiCLIP(nn.Module):
    def __init__(self, vit_model=CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32")):
        super().__init__()
        self.vit = vit_model

        # freeze ViT in early training
        for param in self.vit.parameters():
            param.requires_grad = False
    def extract_embedding(self, image):
        with torch.no_grad():
            return self.vit(pixel_values=image).pooler_output
    
    def forward(self, image):
        image_embedding = self.extract_embedding(image)
        return image_embedding
    
class EmojiImageDataset(Dataset):
    def __init__(self, image_dir, transform=transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])):
        super().__init__()
        self.image_dir = image_dir
        self.images = os.listdir(image_dir)
        self.transform = transform
    def __len__(self):
        return len(self.images)
    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.images[idx])
        image = PIL.Image.open(image_path)
        if self.transform:
            image = self.transform(image)
        return image

In [58]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
emoji_image_dataset = EmojiImageDataset('combine_em_images')
model = EmojiCLIP().to(device)
all_embeddings = []
for i, image_embed in tqdm(enumerate(emoji_image_dataset), desc='Extracting embeddings', total=len(emoji_image_dataset)):
        batch_embeddings = model(image_embed.unsqueeze(0).to(device))
        all_embeddings.extend(batch_embeddings)

Extracting embeddings: 100%|██████████| 1655/1655 [04:18<00:00,  6.40it/s]


In [73]:
class EmojisDataset(Dataset):
    def __init__(self, elco_df, image_embeds, em_max_len=3, text_max_len=6, tokenizer='openai/clip-vit-base-patch32'):
        super().__init__()
        self.transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor()])
        self.elco_df = elco_df
        self.image_embeds = image_embeds
        self.emoji_descriptions = [self.preprocess_emoji_description(desc) for desc in elco_df["Description"]]
        self.raw_emoji_descriptions = elco_df["Description"].values
        self.em_max_len = em_max_len
        self.text_max_len = text_max_len
        self.clip_tokenizer = CLIPTokenizer.from_pretrained(tokenizer)
        self.composition_type = [comp_type_map(t) for t in elco_df['Composition strategy'].values]
    
    def preprocess_emoji_description(self, text):
        text = text.replace('\'\'', '').lower()
        split_text = regex.findall(r'\':?(.*?):?\'', text)
        return split_text
    def preprocess_en(self, text):
        return "This is" + text.lower().strip()
    
    def __len__(self):
        return len(self.emoji_descriptions)
    
    def __getitem__(self, index):
        emoji_descs = self.emoji_descriptions[index]
        image_embed = self.image_embeds[index]
        en_text = self.preprocess_en(self.elco_df['EN'].values[index])
        composition_type = self.composition_type[index]
        
        
        emoji_desc = " ".join(emoji_descs)
        desc_tokens = self.clip_tokenizer(emoji_desc, 
                                  truncation=True, 
                                  padding='max_length',
                                  return_tensors='pt',
                                  max_length=64)
        
        en_tokens = self.clip_tokenizer(en_text, 
                                  truncation=True, 
                                  padding='max_length',
                                  return_tensors='pt',
                                  max_length=self.text_max_len)
        en_tokens = {k: v.squeeze(0) for k, v in en_tokens.items()}
        desc_tokens = {k: v.squeeze(0) for k, v in desc_tokens.items()}
        return (image_embed, desc_tokens, en_tokens), composition_type
        

In [32]:
class TypeClassifier(nn.Module):
    def __init__(self, input_dim=1024, hidden_dim=512, num_classes=5, dropout=0.1):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, num_classes),
        )
        
    def forward(self, x):
        return self.model(x)

In [78]:
class EmojiCompositionModel(nn.Module):
    def __init__(self, text_encoder='openai/clip-vit-base-patch32',
                 image_embed_dim=768, projection_dim=512, freeze_clip=False, text_ratio=0.5, visual_ratio=0.5):
        super().__init__()


        # self.emoji_transformer = ImageEmbedTransformer(embedding_dim=image_embed_dim, num_layers=num_layers)

        self.text_encoder = CLIPTextModel.from_pretrained(text_encoder).text_model
        self.text_hidden_dim = self.text_encoder.config.hidden_size
        self.desc_embed_size = int(projection_dim * text_ratio)
        self.image_embed_size = int(projection_dim * visual_ratio)
        self.text_proj_dim = self.desc_embed_size + self.image_embed_size
        
        if freeze_clip:
            for name, param in self.text_encoder.named_parameters():
                    # if "encoder.layers.10" in name or "encoder.layers.11" in name:
                    #     param.requires_grad = True
                    # else:
                    param.requires_grad = False
                    
        
        self.text_proj = nn.Sequential(
            nn.Linear(self.text_hidden_dim, self.text_proj_dim),
            nn.ReLU(),
            nn.LayerNorm(self.text_proj_dim),
            nn.Dropout(0.1),
            )
        
        self.desc_proj = nn.Sequential(
            nn.Linear(self.text_hidden_dim, self.desc_embed_size),
            nn.ReLU(),
            nn.LayerNorm(self.desc_embed_size),
            nn.Dropout(0.1),
            )
        
        self.image_proj = nn.Sequential(
            nn.Linear(image_embed_dim, self.image_embed_size),
            nn.ReLU(),
            nn.LayerNorm(self.image_embed_size),
            nn.Dropout(0.1)
            )
        
        input_dim = self.text_proj_dim * 3
        self.classifier = TypeClassifier(input_dim=input_dim, hidden_dim=projection_dim * 2, num_classes=5)
        
    def forward(self, image_embed, desc_input, text_input):
        desc_out = self.text_encoder(**desc_input).pooler_output
        z_desc = self.desc_proj(desc_out)
        z_image = self.image_proj(image_embed)
        
        z_emoji = torch.concatenate([z_desc, z_image], dim=-1)
        
        text_out = self.text_encoder(**text_input).pooler_output
        z_text = self.text_proj(text_out)

        z_emoji = F.normalize(z_emoji, dim=-1)
        z_text = F.normalize(z_text, dim=-1)
        z_concat = torch.cat([z_emoji, z_text, torch.abs(z_emoji - z_text)], dim=-1)
        output = self.classifier(z_concat)

        return (z_emoji, z_text), output

In [34]:
elco_df = pd.read_csv('../../data/ELCo.csv')

In [74]:
emojis_train_dataset = EmojisDataset(elco_df, all_embeddings)
train_dataset, validate_dataset = train_test_split(emojis_train_dataset, test_size=0.08, shuffle=True, random_state=42)
emojis_train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, drop_last=True)
emojis_validate_dataloader = DataLoader(validate_dataset, batch_size=32, shuffle=False, drop_last=True)
# emojis_test_dataset = EmojisDataset(emoji_embed_dict, test_df)
# emojis_test_dataloader = DataLoader(emojis_test_dataset, batch_size=32, shuffle=False, drop_last=True)

In [36]:
class EarlyStopping:
    def __init__(self, patience=2, delta=0.0):
        self.patience = patience
        self.delta = delta
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False

    def __call__(self, val_loss, accuracy):
        if val_loss < self.best_loss - self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [81]:
def train(text_ratio, visual_ratio, verbose=False):
    # reconstruct text dataloader -> each bacth is a list of all possible en for the emojis in given batch
    labels = [data[1] for data in emojis_train_dataset]
    class_counts = Counter(labels)
    class_weights = torch.tensor([len(labels) / count for label, count in class_counts.items()])

    en_em_model = EmojiCompositionModel(freeze_clip=True, text_ratio=text_ratio, visual_ratio=visual_ratio).to(device)
    optimizer = torch.optim.AdamW(en_em_model.parameters(), lr=2e-5)
    early_stopping = EarlyStopping(patience=3)
    # criterion = nn.CrossEntropyLoss(weight=class_weights)
    num_epochs = 45
    results = []

    for epoch in tqdm(range(num_epochs), total=num_epochs, desc=f'Training at p_text={text_ratio}'):
        total_loss = 0.0
        en_em_model.train()
        incorrect_labels = {'Direct': 0, 'Metaphorical': 0, 'Semantic list': 0, 'Reduplication': 0, 'Single': 0}
        correct_count_train = 0
        
        for (image, desc_input, en_input), ctype in emojis_train_dataloader:
            (z_emojis, z_text), output = en_em_model(image, desc_input, en_input)
            
            classifier_loss = F.cross_entropy(output, ctype, weight=class_weights, reduction='none')
            
            classifier_pred = torch.argmax(output, dim=-1)
            correct_count_train += torch.sum(classifier_pred == ctype).item()
            # for i, pred in enumerate(clssifier_pred):
            #     if pred != ctype[i]:
            #         incorrect_labels[label_to_comp_type(ctype[i].item())] += 1
            
            
            target_class = torch.tensor([0, 1])
            is_target_class = torch.isin(ctype, target_class)
            is_incorrect = (classifier_pred != ctype)
            should_double = is_target_class & is_incorrect
            # print(classifier_loss,"classifier_loss")
            weights = torch.ones_like(classifier_loss)
            weights[should_double] = 2
            adjusted_loss = classifier_loss * weights
            classifier_loss = adjusted_loss.mean()
                    
            match_labels = torch.arange(emojis_train_dataloader.batch_size).to(device)
            loss_per_emojis = z_emojis @ z_text.T
            loss_per_text = loss_per_emojis.T
            loss_em = F.cross_entropy(loss_per_emojis, match_labels)
            loss_text = F.cross_entropy(loss_per_text, match_labels)

            loss = classifier_loss + (loss_em + loss_text) / 2
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        total_loss /= len(emojis_train_dataloader)
        train_accuracy = correct_count_train / len(emojis_train_dataset)
            
        en_em_model.eval()
        correct_count = 0
        precision_scores = []
        recall_scores = []
        f1_scores = []
        with torch.no_grad():
            for (image, desc_input, en_input), ctype in emojis_validate_dataloader:
                _, output = en_em_model(image, desc_input, en_input)
                prediction = torch.argmax(output, dim=-1)
                correct_count += torch.sum(prediction == ctype).item()
                precision_scores.append(precision_score(ctype.cpu(), prediction.cpu(), average='macro', zero_division=0))
                recall_scores.append(recall_score(ctype.cpu(), prediction.cpu(), average='macro', zero_division=0))
                f1_scores.append(f1_score(ctype.cpu(), prediction.cpu(), average='macro', zero_division=0))
                for i, pred in enumerate(prediction):
                    # print(f"Pred: {pred}, True: {ctype[i]}")
                    if pred != ctype[i]:
                        incorrect_labels[label_to_comp_type(ctype[i].item())] += 1
            validation_accuracy = correct_count / len(emojis_validate_dataset)
            precision = np.mean(precision_scores)
            recall = np.mean(recall_scores)
            f1 = np.mean(f1_scores)
            
        if verbose:    
            print(f'Incorrect labels: {incorrect_labels}')
            print(f'Epoch {epoch} - Train Loss: {total_loss}, Train accuracy: {train_accuracy}')
            print(f'Validation Accuracy: {validation_accuracy}, recall: {recall}, precision: {precision}, f1: {f1}')
            print("=============================")
        result = {
                "train_accuracy": train_accuracy,
                "validation_accuracy": validation_accuracy,
                "recall": recall,
                "precision": precision,
                "f1": f1,
                "incorrect_labels": incorrect_labels,
            }
        results.append(result)
        
        early_stopping(total_loss, validation_accuracy)
        if early_stopping.early_stop:
            if verbose:
                print(f'Early stopping at epoch {epoch}')
            break
    return results, en_em_model

In [82]:
train(0.5, 0.5, verbose=True)

Training at p_text=0.5:   2%|▏         | 1/45 [00:51<37:38, 51.33s/it]

Incorrect labels: {'Direct': 7, 'Metaphorical': 27, 'Semantic list': 7, 'Reduplication': 6, 'Single': 7}
Epoch 0 - Train Loss: 13.37894078518482, Train accuracy: 0.3861027190332326
Validation Accuracy: 0.556390977443609, recall: 0.3145299145299145, precision: 0.2671776288726753, f1: 0.27859013184652787


Training at p_text=0.5:   4%|▍         | 2/45 [01:28<30:53, 43.11s/it]

Incorrect labels: {'Direct': 8, 'Metaphorical': 31, 'Semantic list': 7, 'Reduplication': 6, 'Single': 7}
Epoch 1 - Train Loss: 13.142476852903975, Train accuracy: 0.44410876132930516
Validation Accuracy: 0.518796992481203, recall: 0.28976266788766786, precision: 0.24555735930735928, f1: 0.25463244568192095


Training at p_text=0.5:   7%|▋         | 3/45 [02:05<28:15, 40.37s/it]

Incorrect labels: {'Direct': 6, 'Metaphorical': 35, 'Semantic list': 7, 'Reduplication': 6, 'Single': 7}
Epoch 2 - Train Loss: 13.123263663433967, Train accuracy: 0.43564954682779455
Validation Accuracy: 0.5037593984962406, recall: 0.2846535409035409, precision: 0.2437878787878788, f1: 0.24449836497799146


Training at p_text=0.5:   9%|▉         | 4/45 [02:43<26:47, 39.20s/it]

Incorrect labels: {'Direct': 6, 'Metaphorical': 41, 'Semantic list': 6, 'Reduplication': 6, 'Single': 7}
Epoch 3 - Train Loss: 12.998592052053898, Train accuracy: 0.4453172205438066
Validation Accuracy: 0.46616541353383456, recall: 0.2671573565323565, precision: 0.28305924510272334, f1: 0.2342469218785008


Training at p_text=0.5:  11%|█         | 5/45 [03:20<25:39, 38.49s/it]

Incorrect labels: {'Direct': 8, 'Metaphorical': 56, 'Semantic list': 2, 'Reduplication': 6, 'Single': 7}
Epoch 4 - Train Loss: 12.877846636670702, Train accuracy: 0.4700906344410876
Validation Accuracy: 0.3684210526315789, recall: 0.30737408424908425, precision: 0.18192640692640694, f1: 0.1827280322017164


Training at p_text=0.5:  13%|█▎        | 6/45 [03:57<24:38, 37.92s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 54, 'Semantic list': 1, 'Reduplication': 6, 'Single': 7}
Epoch 5 - Train Loss: 12.788621029955275, Train accuracy: 0.4719033232628399
Validation Accuracy: 0.3609022556390977, recall: 0.322615231990232, precision: 0.225481214691741, f1: 0.19575581830200825


Training at p_text=0.5:  16%|█▌        | 7/45 [04:34<23:53, 37.72s/it]

Incorrect labels: {'Direct': 14, 'Metaphorical': 52, 'Semantic list': 1, 'Reduplication': 6, 'Single': 7}
Epoch 6 - Train Loss: 12.897010356821912, Train accuracy: 0.4368580060422961
Validation Accuracy: 0.3609022556390977, recall: 0.3206707875457876, precision: 0.2680301928463693, f1: 0.206953971895677


Training at p_text=0.5:  18%|█▊        | 8/45 [05:11<23:06, 37.48s/it]

Incorrect labels: {'Direct': 10, 'Metaphorical': 43, 'Semantic list': 1, 'Reduplication': 6, 'Single': 7}
Epoch 7 - Train Loss: 12.624122538465135, Train accuracy: 0.4501510574018127
Validation Accuracy: 0.45864661654135336, recall: 0.3714476495726496, precision: 0.31923701298701296, f1: 0.2716568046676406


Training at p_text=0.5:  20%|██        | 9/45 [05:48<22:25, 37.38s/it]

Incorrect labels: {'Direct': 14, 'Metaphorical': 47, 'Semantic list': 1, 'Reduplication': 6, 'Single': 7}
Epoch 8 - Train Loss: 12.340368352037794, Train accuracy: 0.4652567975830816
Validation Accuracy: 0.39849624060150374, recall: 0.33678800366300365, precision: 0.2714155982905983, f1: 0.2334872592685093


Training at p_text=0.5:  22%|██▏       | 10/45 [06:25<21:44, 37.28s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 41, 'Semantic list': 1, 'Reduplication': 6, 'Single': 6}
Epoch 9 - Train Loss: 12.027336709042812, Train accuracy: 0.4809667673716012
Validation Accuracy: 0.46616541353383456, recall: 0.43091804029304026, precision: 0.3396546513342179, f1: 0.321980547388517


Training at p_text=0.5:  24%|██▍       | 11/45 [07:02<21:03, 37.17s/it]

Incorrect labels: {'Direct': 11, 'Metaphorical': 49, 'Semantic list': 1, 'Reduplication': 6, 'Single': 6}
Epoch 10 - Train Loss: 11.61053355196689, Train accuracy: 0.5027190332326285
Validation Accuracy: 0.41353383458646614, recall: 0.4098634004884005, precision: 0.34640664160401, f1: 0.2881497157812947


Training at p_text=0.5:  27%|██▋       | 12/45 [07:39<20:27, 37.18s/it]

Incorrect labels: {'Direct': 10, 'Metaphorical': 45, 'Semantic list': 1, 'Reduplication': 6, 'Single': 6}
Epoch 11 - Train Loss: 11.048124110445062, Train accuracy: 0.5262839879154079
Validation Accuracy: 0.45112781954887216, recall: 0.42700320512820517, precision: 0.3559683299156983, f1: 0.3122659907061435


Training at p_text=0.5:  29%|██▉       | 13/45 [08:16<19:47, 37.12s/it]

Incorrect labels: {'Direct': 11, 'Metaphorical': 42, 'Semantic list': 1, 'Reduplication': 6, 'Single': 6}
Epoch 12 - Train Loss: 10.510837849150313, Train accuracy: 0.5389728096676737
Validation Accuracy: 0.46616541353383456, recall: 0.43352029914529916, precision: 0.3657926065162907, f1: 0.3305803295478471


Training at p_text=0.5:  31%|███       | 14/45 [08:53<19:08, 37.05s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 46, 'Semantic list': 2, 'Reduplication': 6, 'Single': 4}
Epoch 13 - Train Loss: 9.999664468968168, Train accuracy: 0.5341389728096677
Validation Accuracy: 0.43609022556390975, recall: 0.44073946886446885, precision: 0.36015184019832186, f1: 0.3411936387458209


Training at p_text=0.5:  33%|███▎      | 15/45 [09:30<18:29, 36.97s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 47, 'Semantic list': 2, 'Reduplication': 6, 'Single': 3}
Epoch 14 - Train Loss: 9.46601452725999, Train accuracy: 0.5770392749244713
Validation Accuracy: 0.43609022556390975, recall: 0.45323946886446886, precision: 0.37569444444444444, f1: 0.33225924395526224


Training at p_text=0.5:  36%|███▌      | 16/45 [10:07<17:51, 36.94s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 46, 'Semantic list': 2, 'Reduplication': 6, 'Single': 3}
Epoch 15 - Train Loss: 9.074388118500405, Train accuracy: 0.5673716012084592
Validation Accuracy: 0.44360902255639095, recall: 0.4574061355311355, precision: 0.3483312447786132, f1: 0.33728376797933246


Training at p_text=0.5:  38%|███▊      | 17/45 [10:44<17:16, 37.02s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 46, 'Semantic list': 2, 'Reduplication': 6, 'Single': 3}
Epoch 16 - Train Loss: 8.619037110754784, Train accuracy: 0.5788519637462236
Validation Accuracy: 0.44360902255639095, recall: 0.4574061355311355, precision: 0.36069055213792056, f1: 0.3456297515340446


Training at p_text=0.5:  40%|████      | 18/45 [11:21<16:41, 37.08s/it]

Incorrect labels: {'Direct': 13, 'Metaphorical': 39, 'Semantic list': 2, 'Reduplication': 6, 'Single': 3}
Epoch 17 - Train Loss: 8.245330678655746, Train accuracy: 0.5975830815709969
Validation Accuracy: 0.48872180451127817, recall: 0.47926968864468866, precision: 0.3879570863433092, f1: 0.3854976489028213


Training at p_text=0.5:  42%|████▏     | 19/45 [11:58<16:04, 37.10s/it]

Incorrect labels: {'Direct': 17, 'Metaphorical': 31, 'Semantic list': 2, 'Reduplication': 6, 'Single': 3}
Epoch 18 - Train Loss: 7.967877743091989, Train accuracy: 0.6054380664652568
Validation Accuracy: 0.518796992481203, recall: 0.49416895604395605, precision: 0.4254844501903326, f1: 0.42936072263845876


Training at p_text=0.5:  44%|████▍     | 20/45 [12:35<15:26, 37.07s/it]

Incorrect labels: {'Direct': 13, 'Metaphorical': 37, 'Semantic list': 2, 'Reduplication': 6, 'Single': 3}
Epoch 19 - Train Loss: 7.633478174818323, Train accuracy: 0.6187311178247734
Validation Accuracy: 0.5037593984962406, recall: 0.4845047313797314, precision: 0.42456349206349203, f1: 0.4227615663120756


Training at p_text=0.5:  47%|████▋     | 21/45 [13:12<14:48, 37.03s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 41, 'Semantic list': 2, 'Reduplication': 6, 'Single': 3}
Epoch 20 - Train Loss: 7.391809646119463, Train accuracy: 0.6356495468277945
Validation Accuracy: 0.48120300751879697, recall: 0.47738476800976803, precision: 0.42444444444444446, f1: 0.4074166099657214


Training at p_text=0.5:  49%|████▉     | 22/45 [13:50<14:13, 37.12s/it]

Incorrect labels: {'Direct': 11, 'Metaphorical': 41, 'Semantic list': 2, 'Reduplication': 6, 'Single': 3}
Epoch 21 - Train Loss: 7.1139248989998025, Train accuracy: 0.6549848942598188
Validation Accuracy: 0.48872180451127817, recall: 0.48123092185592187, precision: 0.4212092731829574, f1: 0.41489341250854406


Training at p_text=0.5:  51%|█████     | 23/45 [14:27<13:37, 37.15s/it]

Incorrect labels: {'Direct': 11, 'Metaphorical': 42, 'Semantic list': 2, 'Reduplication': 5, 'Single': 3}
Epoch 22 - Train Loss: 6.920947429981638, Train accuracy: 0.6549848942598188
Validation Accuracy: 0.48872180451127817, recall: 0.5026594932844932, precision: 0.47452380952380957, f1: 0.444043257229041


Training at p_text=0.5:  53%|█████▎    | 24/45 [15:05<13:03, 37.31s/it]

Incorrect labels: {'Direct': 14, 'Metaphorical': 35, 'Semantic list': 2, 'Reduplication': 5, 'Single': 3}
Epoch 23 - Train Loss: 6.718915452348425, Train accuracy: 0.6634441087613293
Validation Accuracy: 0.518796992481203, recall: 0.5159394078144078, precision: 0.48798146561256084, f1: 0.47114756258234514


Training at p_text=0.5:  56%|█████▌    | 25/45 [15:41<12:23, 37.16s/it]

Incorrect labels: {'Direct': 11, 'Metaphorical': 39, 'Semantic list': 2, 'Reduplication': 5, 'Single': 3}
Epoch 24 - Train Loss: 6.523873258144297, Train accuracy: 0.6652567975830815
Validation Accuracy: 0.5112781954887218, recall: 0.513816391941392, precision: 0.4940068922305764, f1: 0.4658621057934805


Training at p_text=0.5:  58%|█████▊    | 26/45 [16:19<11:45, 37.13s/it]

Incorrect labels: {'Direct': 16, 'Metaphorical': 33, 'Semantic list': 2, 'Reduplication': 5, 'Single': 3}
Epoch 25 - Train Loss: 6.314811300724111, Train accuracy: 0.6827794561933535
Validation Accuracy: 0.518796992481203, recall: 0.5133997252747253, precision: 0.48353624214837454, f1: 0.4723672794250005


Training at p_text=0.5:  60%|██████    | 27/45 [16:56<11:11, 37.29s/it]

Incorrect labels: {'Direct': 11, 'Metaphorical': 44, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 26 - Train Loss: 6.17153955013194, Train accuracy: 0.6858006042296072
Validation Accuracy: 0.48120300751879697, recall: 0.519921398046398, precision: 0.49557957393483715, f1: 0.4643358405701924


Training at p_text=0.5:  62%|██████▏   | 28/45 [17:33<10:31, 37.17s/it]

Incorrect labels: {'Direct': 14, 'Metaphorical': 38, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 27 - Train Loss: 6.023402437250665, Train accuracy: 0.6870090634441087
Validation Accuracy: 0.5037593984962406, recall: 0.5308661477411478, precision: 0.5113035652857635, f1: 0.48957420402178464


Training at p_text=0.5:  64%|██████▍   | 29/45 [18:10<09:54, 37.13s/it]

Incorrect labels: {'Direct': 14, 'Metaphorical': 39, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 28 - Train Loss: 5.879421386312931, Train accuracy: 0.7075528700906344
Validation Accuracy: 0.49624060150375937, recall: 0.5255456349206349, precision: 0.4980380639097744, f1: 0.4777903225806452


Training at p_text=0.5:  67%|██████▋   | 30/45 [19:40<13:12, 52.86s/it]

Incorrect labels: {'Direct': 10, 'Metaphorical': 42, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 29 - Train Loss: 5.736694579428815, Train accuracy: 0.7087613293051359
Validation Accuracy: 0.5037593984962406, recall: 0.5327648046398047, precision: 0.5303038847117795, f1: 0.5016615160556976


Training at p_text=0.5:  69%|██████▉   | 31/45 [20:58<14:07, 60.54s/it]

Incorrect labels: {'Direct': 14, 'Metaphorical': 39, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 30 - Train Loss: 5.664535370278866, Train accuracy: 0.7148036253776435
Validation Accuracy: 0.49624060150375937, recall: 0.5266765873015874, precision: 0.5268575083542189, f1: 0.4982257104454685


Training at p_text=0.5:  71%|███████   | 32/45 [21:35<11:35, 53.48s/it]

Incorrect labels: {'Direct': 14, 'Metaphorical': 38, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 31 - Train Loss: 5.517680868189386, Train accuracy: 0.7274924471299093
Validation Accuracy: 0.5037593984962406, recall: 0.5321253052503052, precision: 0.5198534018870706, f1: 0.5054071844471564


Training at p_text=0.5:  73%|███████▎  | 33/45 [22:12<09:42, 48.51s/it]

Incorrect labels: {'Direct': 16, 'Metaphorical': 29, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 32 - Train Loss: 5.435856859734717, Train accuracy: 0.7280966767371602
Validation Accuracy: 0.556390977443609, recall: 0.55560057997558, precision: 0.5528787878787879, f1: 0.5378397051474263


Training at p_text=0.5:  76%|███████▌  | 34/45 [22:49<08:15, 45.04s/it]

Incorrect labels: {'Direct': 14, 'Metaphorical': 33, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 33 - Train Loss: 5.32913728470498, Train accuracy: 0.743202416918429
Validation Accuracy: 0.5413533834586466, recall: 0.5511958180708181, precision: 0.541687552213868, f1: 0.5303902366213207


Training at p_text=0.5:  78%|███████▊  | 35/45 [23:26<07:07, 42.72s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 39, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 34 - Train Loss: 5.237348434772898, Train accuracy: 0.7486404833836858
Validation Accuracy: 0.5112781954887218, recall: 0.5365270146520146, precision: 0.5269531979458449, f1: 0.5096060442996273


Training at p_text=0.5:  80%|████████  | 36/45 [24:03<06:08, 40.98s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 39, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 35 - Train Loss: 5.160521010135082, Train accuracy: 0.7498489425981874
Validation Accuracy: 0.5112781954887218, recall: 0.5365270146520146, precision: 0.5269531979458449, f1: 0.5096060442996273


Training at p_text=0.5:  82%|████████▏ | 37/45 [24:41<05:19, 39.92s/it]

Incorrect labels: {'Direct': 11, 'Metaphorical': 40, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 36 - Train Loss: 5.100844697749361, Train accuracy: 0.7546827794561933
Validation Accuracy: 0.5112781954887218, recall: 0.5356936813186812, precision: 0.5219135154061624, f1: 0.5060532228520691


Training at p_text=0.5:  84%|████████▍ | 38/45 [25:19<04:35, 39.30s/it]

Incorrect labels: {'Direct': 13, 'Metaphorical': 37, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 37 - Train Loss: 5.022783796837989, Train accuracy: 0.7655589123867069
Validation Accuracy: 0.518796992481203, recall: 0.539823717948718, precision: 0.5279452614379084, f1: 0.5131156333490147


Training at p_text=0.5:  87%|████████▋ | 39/45 [25:57<03:53, 38.92s/it]

Incorrect labels: {'Direct': 12, 'Metaphorical': 35, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 38 - Train Loss: 4.9756677911636675, Train accuracy: 0.7673716012084593
Validation Accuracy: 0.5413533834586466, recall: 0.5520489926739927, precision: 0.5409753787878788, f1: 0.5280017313557298


Training at p_text=0.5:  89%|████████▉ | 40/45 [26:34<03:12, 38.47s/it]

Incorrect labels: {'Direct': 15, 'Metaphorical': 28, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 39 - Train Loss: 4.909768459644724, Train accuracy: 0.7746223564954683
Validation Accuracy: 0.5714285714285714, recall: 0.5645352564102565, precision: 0.5507658354717179, f1: 0.5473419699643464


Training at p_text=0.5:  91%|█████████ | 41/45 [27:11<02:32, 38.13s/it]

Incorrect labels: {'Direct': 11, 'Metaphorical': 37, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 40 - Train Loss: 4.8507456170751695, Train accuracy: 0.7830815709969788
Validation Accuracy: 0.5338345864661654, recall: 0.5470032051282051, precision: 0.5371296808196963, f1: 0.5214052867238698


Training at p_text=0.5:  93%|█████████▎| 42/45 [27:49<01:53, 37.90s/it]

Incorrect labels: {'Direct': 14, 'Metaphorical': 34, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 41 - Train Loss: 4.790947792377878, Train accuracy: 0.7854984894259819
Validation Accuracy: 0.5338345864661654, recall: 0.5448069291819292, precision: 0.5309027777777777, f1: 0.5240448102436362


Training at p_text=0.5:  96%|█████████▌| 43/45 [28:26<01:15, 37.62s/it]

Incorrect labels: {'Direct': 11, 'Metaphorical': 37, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 42 - Train Loss: 4.715734136865494, Train accuracy: 0.7963746223564955
Validation Accuracy: 0.5338345864661654, recall: 0.546407967032967, precision: 0.5346119281045751, f1: 0.5202711091650796


Training at p_text=0.5:  98%|█████████▊| 44/45 [29:33<00:46, 46.64s/it]

Incorrect labels: {'Direct': 15, 'Metaphorical': 30, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 43 - Train Loss: 4.62785823294457, Train accuracy: 0.8078549848942598
Validation Accuracy: 0.556390977443609, recall: 0.5539797008547008, precision: 0.5388392163392164, f1: 0.5370568669835911


Training at p_text=0.5: 100%|██████████| 45/45 [31:16<00:00, 41.70s/it]

Incorrect labels: {'Direct': 15, 'Metaphorical': 30, 'Semantic list': 2, 'Reduplication': 4, 'Single': 3}
Epoch 44 - Train Loss: 4.608934229992806, Train accuracy: 0.8096676737160121
Validation Accuracy: 0.556390977443609, recall: 0.5539797008547008, precision: 0.5388392163392164, f1: 0.5370568669835911





([{'train_accuracy': 0.3861027190332326,
   'validation_accuracy': 0.556390977443609,
   'recall': 0.3145299145299145,
   'precision': 0.2671776288726753,
   'f1': 0.27859013184652787,
   'incorrect_labels': {'Direct': 7,
    'Metaphorical': 27,
    'Semantic list': 7,
    'Reduplication': 6,
    'Single': 7}},
  {'train_accuracy': 0.44410876132930516,
   'validation_accuracy': 0.518796992481203,
   'recall': 0.28976266788766786,
   'precision': 0.24555735930735928,
   'f1': 0.25463244568192095,
   'incorrect_labels': {'Direct': 8,
    'Metaphorical': 31,
    'Semantic list': 7,
    'Reduplication': 6,
    'Single': 7}},
  {'train_accuracy': 0.43564954682779455,
   'validation_accuracy': 0.5037593984962406,
   'recall': 0.2846535409035409,
   'precision': 0.2437878787878788,
   'f1': 0.24449836497799146,
   'incorrect_labels': {'Direct': 6,
    'Metaphorical': 35,
    'Semantic list': 7,
    'Reduplication': 6,
    'Single': 7}},
  {'train_accuracy': 0.4453172205438066,
   'validation_