# install and load dependancy

In [1]:
!pip install transformers --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m59.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m75.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m83.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import os
import sys
import torch
import tqdm
import yaml
import json
import numpy as np
from torch.utils.data import DataLoader
from torchvision import transforms as T

import logging
from torch.utils.tensorboard import SummaryWriter
from shutil import copyfile

## Link with Google Drive

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

path = '/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/'
sys.path.append(path)

from dataset import SemEvalDataset, Collate
from models import MemeMultiLabelClassifier
from sampler import MultilabelBalancedRandomSampler

from scorer import evaluate
from format_checker import read_classes

Mounted at /content/drive


# Load data from drive

In [4]:
def load_data(config, path, workers, val_fold):

    # Load data loaders
    test_transforms = T.Compose([T.Resize(256),
                    T.CenterCrop(224),
                    T.ToTensor(),
                    T.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])])
    train_transforms = T.Compose([T.Resize(256),
                    T.RandomCrop(224),
                    T.ToTensor(),
                    T.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])])

    train_dataset = SemEvalDataset(config, path, split='train', transforms=train_transforms, val_fold=val_fold)
    val_dataset = SemEvalDataset(config, path, split='val', transforms=test_transforms, val_fold=val_fold)

    id_intersection = set([x['id'] for x in train_dataset.targets]).intersection([x['id'] for x in val_dataset.targets])
    assert len(id_intersection) == 0

    if config['dataset']['task'] == 3:
        classes = read_classes(path + 'techniques_list_task3.txt')

    collate_fn = Collate(path, config, classes)
    if 'balanced-sampling' in config['training'] and config['training']['balanced-sampling']:
        classes_ids = [[train_dataset.class_list.index(x) for x in info['labels']] for info in train_dataset.targets]
        labels = np.zeros((len(classes_ids), len(train_dataset.class_list)))
        for l, c in zip(labels, classes_ids):
            l[c] = 1
        sampler = MultilabelBalancedRandomSampler(labels)
    else:
        sampler = None

    train_dataloader = DataLoader(train_dataset, batch_size=config['training']['bs'], shuffle=True if sampler is None else False, num_workers=workers, collate_fn=collate_fn, sampler=sampler)
    val_dataloader = DataLoader(val_dataset, batch_size=config['training']['bs'], shuffle=False,
                                  num_workers=workers, collate_fn=collate_fn)

    return train_dataloader, val_dataloader, classes

# Train the model and validate

In [5]:
def train_model(config, path, model, optimizer, scheduler, train_dataloader, val_dataloader, classes, num_epochs, log_step, val_step, val_fold):
    model.train()

    start_epoch = 0
    mean_loss = 0
    progress_bar = tqdm.trange(start_epoch, num_epochs)
    progress_bar.set_description('Train')
    best_f1 = 0.0

    for epoch in progress_bar:
        for it, (image, text, text_len, labels, ids) in enumerate(train_dataloader):
            global_iteration = epoch * len(train_dataloader) + it

            if torch.cuda.is_available():
                image = image.cuda() if image is not None else None
                text = text.cuda()
                labels = labels.cuda()

            optimizer.zero_grad()

            loss = model(image, text, text_len, labels)
            loss.backward()
            optimizer.step()
            mean_loss += loss.item()

            if global_iteration % log_step == 0:
                mean_loss /= log_step
                progress_bar.set_postfix(dict(loss='{:.2}'.format(mean_loss)))
                mean_loss = 0

            if global_iteration % val_step == 0:
                # validate (using different thresholds)
                metrics = validate(val_dataloader, model, classes, thresholds=[0.3, 0.5, 0.8])

                # save best model
                if metrics['macroF1_thr=0.3'] + metrics['microF1_thr=0.3'] > best_f1:
                    print('Saving best model...')
                    checkpoint = {
                        'cfg': config,
                        'epoch': epoch,
                        'model': model.joint_processing_module.state_dict() if not config['text-model']['fine-tune'] and not config['image-model']['fine-tune'] else model.state_dict()}
                    latest = os.path.join(path, 'model_best_fold{}.pt'.format(val_fold))
                    torch.save(checkpoint, latest)
                    best_f1 = metrics['macroF1_thr=0.3'] + metrics['microF1_thr=0.3']

        print('Saving best model...')
        checkpoint = {
          'cfg': config,
          'epoch': epoch,
          'model': model.joint_processing_module.state_dict() if not config['text-model']['fine-tune'] and not config['image-model']['fine-tune'] else model.state_dict()}
        latest = os.path.join(path, 'model_best_fold{}.pt'.format(val_fold))
        torch.save(checkpoint, latest)
        best_f1 = metrics['macroF1_thr=0.3'] + metrics['microF1_thr=0.3']
        scheduler.step()

In [6]:
def validate(val_dataloader, model, classes_list, thresholds=[0.3, 0.5, 0.8]):
    model.eval()
    predictions = []
    metrics = {}
    progress_bar = tqdm.tqdm(thresholds)
    progress_bar.set_description('Validation')
    for thr in progress_bar:
        for it, (image, text, text_len, labels, ids) in enumerate(val_dataloader):
            if torch.cuda.is_available():
                image = image.cuda() if image is not None else None
                text = text.cuda()
                labels = labels.cuda()
            with torch.no_grad():
                pred_classes = model(image, text, text_len, inference_threshold=thr)

            for id, labels in zip(ids, pred_classes):    # loop over every element of the batch
                predictions.append({'id': id, 'labels': labels})

        macro_f1, micro_f1 = evaluate(predictions, val_dataloader.dataset.targets, classes_list)
        metrics['macroF1_thr={}'.format(thr)] = macro_f1
        metrics['microF1_thr={}'.format(thr)] = micro_f1

    model.train()
    return metrics

# Training parameters

In [7]:
def start_training(config, path, val_fold=0):
    num_epochs = 80
    workers = 2
    log_step = 10
    val_step = 200
    test_step = 100000000
    logger_name = './runs/test'

    if 'task' not in config['dataset']:
        config['dataset']['task'] = 3

    train_dataloader, val_dataloader, classes = load_data(config, path, workers, val_fold)

    # Construct the model
    model = MemeMultiLabelClassifier(config, labels=classes)
    if torch.cuda.is_available():
        model.cuda()

    # Construct the optimizer
    if not config['text-model']['fine-tune'] and not config['image-model']['fine-tune']:
        optimizer = torch.optim.Adam([p for n, p in model.named_parameters() if 'textual_module' not in n and 'visual_module' not in n], lr=config['training']['lr'])
    else:
        if config['dataset']['task'] == 3:
            optimizer = torch.optim.Adam([
                {'params': [p for n, p in model.named_parameters() if 'textual_module' not in n and 'visual_module' not in n]},
                {'params': model.textual_module.parameters(), 'lr': config['training']['pretrained-modules-lr']},
                {'params': model.visual_module.parameters(), 'lr': config['training']['pretrained-modules-lr']}]
                , lr=config['training']['lr'])

    # LR scheduler
    scheduler_name = config['training']['scheduler']
    if scheduler_name == 'steplr':
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, gamma=config['training']['gamma'], milestones=config['training']['milestones'])
    elif scheduler_name is None:
        scheduler = None
    else:
        raise ValueError('{} scheduler is not available'.format(scheduler_name))

    # Train the model
    train_model(config, path, model, optimizer, scheduler, train_dataloader, val_dataloader, classes, num_epochs, log_step, val_step, val_fold)


# Main function

In [8]:
def main():

    config = path + 'cfg/config_task3.yaml'
    cross_validation = False

    with open(config, 'r') as ymlfile:
        config = yaml.safe_load(ymlfile)

    if cross_validation:
        # read splits from file
        with open(path + 'data/folds.json', 'r') as f:
            folds = json.load(f)
            num_folds = len(folds)
        for fold in tqdm.trange(num_folds):
            start_training(config, path, val_fold=fold)
    else:
        # train using fold 0 as validation fold
        start_training(config, path, val_fold=0)

if __name__ == '__main__':
    main()

Downloading (…)solve/main/vocab.txt: 0.00B [00:00, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading: "https://download.pytorch.org/models/vit_b_32-d86f8d99.pth" to /root/.cache/torch/hub/checkpoints/vit_b_32-d86f8d99.pth
100%|██████████| 337M/337M [00:06<00:00, 55.8MB/s]


Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['bert.encoder.layer.10.attention.self.key.bias', 'bert.encoder.layer.11.attention.output.LayerNorm.weight', 'bert.encoder.layer.11.attention.self.query.bias', 'cls.predictions.transform.dense.bias', 'bert.encoder.layer.10.output.dense.bias', 'bert.encoder.layer.10.attention.output.dense.weight', 'bert.encoder.layer.11.attention.self.key.bias', 'bert.encoder.layer.11.output.dense.weight', 'bert.encoder.layer.10.attention.self.query.bias', 'cls.predictions.transform.dense.weight', 'bert.encoder.layer.10.attention.output.LayerNorm.bias', 'bert.encoder.layer.10.attention.self.value.weight', 'bert.encoder.layer.11.output.LayerNorm.bias', 'bert.encoder.layer.11.output.dense.bias', 'bert.encoder.layer.10.output.LayerNorm.weight', 'bert.encoder.layer.10.intermediate.dense.bias', 'bert.encoder.layer.11.attention.self.value.weight', 'bert.encoder.layer.11.attention.self.value.bias', 'bert.encode

Saving best model...


Train:   0%|          | 0/80 [02:42<?, ?it/s, loss=0.25]

Saving best model...


Train:   1%|▏         | 1/80 [03:08<3:54:32, 178.13s/it, loss=0.26]

Saving best model...


Train:   2%|▎         | 2/80 [03:18<1:44:07, 80.10s/it, loss=0.21]

Saving best model...


Train:   4%|▍         | 3/80 [03:25<1:01:21, 47.82s/it, loss=0.22]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.45s/it][A
Validation:  67%|██████▋   | 2/3 [00:04<00:02,  2.06s/it][A
Validation: 100%|██████████| 3/3 [00:06<00:00,  2.07s/it]


Saving best model...


Train:   4%|▍         | 3/80 [03:37<1:01:21, 47.82s/it, loss=0.22]

Saving best model...


Train:   5%|▌         | 4/80 [03:45<46:19, 36.58s/it, loss=0.18]

Saving best model...


Train:   6%|▋         | 5/80 [03:57<33:21, 26.69s/it, loss=0.16]

Saving best model...


Train:   8%|▊         | 6/80 [04:09<26:32, 21.52s/it, loss=0.12]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.33s/it][A
Validation:  67%|██████▋   | 2/3 [00:04<00:02,  2.02s/it][A
Validation: 100%|██████████| 3/3 [00:06<00:00,  2.04s/it]


Saving best model...
Saving best model...


Train:   9%|▉         | 7/80 [04:27<24:41, 20.29s/it, loss=0.1] 

Saving best model...


Train:  10%|█         | 8/80 [04:38<21:05, 17.58s/it, loss=0.086]

Saving best model...


Train:  11%|█▏        | 9/80 [04:47<18:22, 15.53s/it, loss=0.063]

Saving best model...


Train:  12%|█▎        | 10/80 [04:53<15:53, 13.63s/it, loss=0.052]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:03<00:06,  3.46s/it][A
Validation:  67%|██████▋   | 2/3 [00:05<00:02,  2.54s/it][A
Validation: 100%|██████████| 3/3 [00:07<00:00,  2.39s/it]


Saving best model...


Train:  12%|█▎        | 10/80 [05:05<15:53, 13.63s/it, loss=0.056]

Saving best model...


Train:  14%|█▍        | 11/80 [05:16<17:07, 14.89s/it, loss=0.044]

Saving best model...


Train:  15%|█▌        | 12/80 [05:28<15:35, 13.76s/it, loss=0.033]

Saving best model...


Train:  16%|█▋        | 13/80 [05:36<14:40, 13.14s/it, loss=0.03] 
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:03<00:07,  3.77s/it][A
Validation:  67%|██████▋   | 2/3 [00:06<00:03,  3.08s/it][A
Validation: 100%|██████████| 3/3 [00:08<00:00,  2.82s/it]
Train:  16%|█▋        | 13/80 [05:46<14:40, 13.14s/it, loss=0.031]

Saving best model...


Train:  18%|█▊        | 14/80 [05:56<15:55, 14.48s/it, loss=0.025]

Saving best model...


Train:  19%|█▉        | 15/80 [06:06<14:46, 13.63s/it, loss=0.02] 

Saving best model...


Train:  20%|██        | 16/80 [06:18<13:08, 12.33s/it, loss=0.02] 

Saving best model...


Train:  21%|██▏       | 17/80 [06:22<12:45, 12.15s/it, loss=0.019]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:03<00:06,  3.28s/it][A
Validation:  67%|██████▋   | 2/3 [00:06<00:03,  3.13s/it][A
Validation: 100%|██████████| 3/3 [00:08<00:00,  2.79s/it]


Saving best model...


Train:  21%|██▏       | 17/80 [06:36<12:45, 12.15s/it, loss=0.016]

Saving best model...


Train:  22%|██▎       | 18/80 [06:50<14:16, 13.81s/it, loss=0.018]

Saving best model...


Train:  24%|██▍       | 19/80 [07:01<14:01, 13.80s/it, loss=0.016]

Saving best model...


Train:  25%|██▌       | 20/80 [07:09<13:12, 13.21s/it, loss=0.013]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:03<00:06,  3.40s/it][A
Validation:  67%|██████▋   | 2/3 [00:06<00:03,  3.22s/it][A
Validation: 100%|██████████| 3/3 [00:08<00:00,  2.78s/it]


Saving best model...


Train:  25%|██▌       | 20/80 [07:19<13:12, 13.21s/it, loss=0.014]

Saving best model...


Train:  26%|██▋       | 21/80 [07:31<14:19, 14.57s/it, loss=0.015]

Saving best model...


Train:  28%|██▊       | 22/80 [07:41<13:16, 13.73s/it, loss=0.011]

Saving best model...


Train:  29%|██▉       | 23/80 [07:52<11:44, 12.37s/it, loss=0.013]

Saving best model...


Train:  30%|███       | 24/80 [07:54<11:17, 12.10s/it, loss=0.0088]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.43s/it][A
Validation:  67%|██████▋   | 2/3 [00:05<00:02,  2.99s/it][A
Validation: 100%|██████████| 3/3 [00:08<00:00,  2.74s/it]
Train:  30%|███       | 24/80 [08:09<11:17, 12.10s/it, loss=0.01]

Saving best model...


Train:  31%|███▏      | 25/80 [08:21<12:31, 13.67s/it, loss=0.011]

Saving best model...


Train:  32%|███▎      | 26/80 [08:32<11:48, 13.12s/it, loss=0.0091]

Saving best model...


Train:  34%|███▍      | 27/80 [08:39<11:11, 12.67s/it, loss=0.0097]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.13s/it][A
Validation:  67%|██████▋   | 2/3 [00:04<00:02,  2.04s/it][A
Validation: 100%|██████████| 3/3 [00:07<00:00,  2.59s/it]
Train:  34%|███▍      | 27/80 [08:50<11:11, 12.67s/it, loss=0.01] 

Saving best model...


Train:  35%|███▌      | 28/80 [09:01<12:07, 13.99s/it, loss=0.008] 

Saving best model...


Train:  36%|███▋      | 29/80 [09:10<11:00, 12.94s/it, loss=0.0075]

Saving best model...


Train:  38%|███▊      | 30/80 [09:22<10:04, 12.10s/it, loss=0.006] 

Saving best model...


Train:  39%|███▉      | 31/80 [09:24<09:53, 12.12s/it, loss=0.0054]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.13s/it][A
Validation:  67%|██████▋   | 2/3 [00:03<00:01,  1.97s/it][A
Validation: 100%|██████████| 3/3 [00:06<00:00,  2.18s/it]


Saving best model...


Train:  39%|███▉      | 31/80 [09:40<09:53, 12.12s/it, loss=0.0071]

Saving best model...


Train:  40%|████      | 32/80 [09:52<10:58, 13.72s/it, loss=0.0059]

Saving best model...


Train:  41%|████▏     | 33/80 [10:02<10:17, 13.13s/it, loss=0.0065]

Saving best model...


Train:  42%|████▎     | 34/80 [10:09<09:21, 12.20s/it, loss=0.0047]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.07s/it][A
Validation:  67%|██████▋   | 2/3 [00:03<00:01,  1.94s/it][A
Validation: 100%|██████████| 3/3 [00:05<00:00,  1.92s/it]


Saving best model...


Train:  42%|████▎     | 34/80 [10:19<09:21, 12.20s/it, loss=0.0045]

Saving best model...


Train:  44%|████▍     | 35/80 [10:29<10:33, 14.09s/it, loss=0.0044]

Saving best model...


Train:  45%|████▌     | 36/80 [10:41<09:14, 12.61s/it, loss=0.004] 

Saving best model...


Train:  46%|████▋     | 37/80 [10:55<08:48, 12.28s/it, loss=0.0041]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.28s/it][A
Validation:  67%|██████▋   | 2/3 [00:04<00:02,  2.01s/it][A
Validation: 100%|██████████| 3/3 [00:05<00:00,  1.99s/it]


Saving best model...


Train:  48%|████▊     | 38/80 [11:13<10:10, 14.54s/it, loss=0.0033]

Saving best model...


Train:  49%|████▉     | 39/80 [11:21<09:19, 13.66s/it, loss=0.0045]

Saving best model...


Train:  50%|█████     | 40/80 [11:33<08:17, 12.45s/it, loss=0.0034]

Saving best model...


Train:  51%|█████▏    | 41/80 [11:38<07:49, 12.03s/it, loss=0.0029]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:03<00:07,  3.97s/it][A
Validation:  67%|██████▋   | 2/3 [00:05<00:02,  2.82s/it][A
Validation: 100%|██████████| 3/3 [00:07<00:00,  2.61s/it]


Saving best model...


Train:  51%|█████▏    | 41/80 [11:50<07:49, 12.03s/it, loss=0.0028]

Saving best model...


Train:  52%|█████▎    | 42/80 [12:02<08:38, 13.64s/it, loss=0.0028]

Saving best model...


Train:  54%|█████▍    | 43/80 [12:14<08:00, 13.00s/it, loss=0.003] 

Saving best model...


Train:  55%|█████▌    | 44/80 [12:22<07:32, 12.58s/it, loss=0.0034]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:05,  2.65s/it][A
Validation:  67%|██████▋   | 2/3 [00:05<00:03,  3.01s/it][A
Validation: 100%|██████████| 3/3 [00:07<00:00,  2.67s/it]


Saving best model...
Saving best model...


Train:  56%|█████▋    | 45/80 [12:41<08:13, 14.11s/it, loss=0.0033]

Saving best model...


Train:  57%|█████▊    | 46/80 [12:52<07:31, 13.27s/it, loss=0.0027]

Saving best model...


Train:  59%|█████▉    | 47/80 [13:04<06:43, 12.22s/it, loss=0.0029]

Saving best model...


Train:  60%|██████    | 48/80 [13:08<06:27, 12.10s/it, loss=0.0029]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.16s/it][A
Validation:  67%|██████▋   | 2/3 [00:05<00:02,  2.88s/it][A
Validation: 100%|██████████| 3/3 [00:08<00:00,  2.73s/it]
Train:  60%|██████    | 48/80 [13:22<06:27, 12.10s/it, loss=0.003] 

Saving best model...


Train:  61%|██████▏   | 49/80 [13:32<07:02, 13.64s/it, loss=0.0027]

Saving best model...


Train:  62%|██████▎   | 50/80 [13:43<06:32, 13.07s/it, loss=0.0027]

Saving best model...


Train:  64%|██████▍   | 51/80 [13:52<05:59, 12.40s/it, loss=0.0024]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.08s/it][A
Validation:  67%|██████▋   | 2/3 [00:04<00:02,  2.00s/it][A
Validation: 100%|██████████| 3/3 [00:07<00:00,  2.51s/it]


Saving best model...


Train:  64%|██████▍   | 51/80 [14:02<05:59, 12.40s/it, loss=0.0034]

Saving best model...


Train:  65%|██████▌   | 52/80 [14:12<06:36, 14.14s/it, loss=0.0029]

Saving best model...


Train:  66%|██████▋   | 53/80 [14:23<05:42, 12.69s/it, loss=0.0027]

Saving best model...


Train:  68%|██████▊   | 54/80 [14:34<05:18, 12.25s/it, loss=0.0026]

Saving best model...


Train:  69%|██████▉   | 55/80 [14:37<05:00, 12.04s/it, loss=0.0025]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.16s/it][A
Validation:  67%|██████▋   | 2/3 [00:04<00:01,  1.98s/it][A
Validation: 100%|██████████| 3/3 [00:05<00:00,  1.95s/it]
Train:  69%|██████▉   | 55/80 [14:53<05:00, 12.04s/it, loss=0.0032]

Saving best model...


Train:  70%|███████   | 56/80 [15:05<05:43, 14.31s/it, loss=0.0026]

Saving best model...


Train:  71%|███████▏  | 57/80 [15:15<05:10, 13.49s/it, loss=0.0027]

Saving best model...


Train:  72%|███████▎  | 58/80 [15:24<04:28, 12.21s/it, loss=0.0027]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:01<00:03,  1.93s/it][A
Validation:  67%|██████▋   | 2/3 [00:03<00:01,  1.92s/it][A
Validation: 100%|██████████| 3/3 [00:05<00:00,  1.90s/it]
Train:  72%|███████▎  | 58/80 [15:33<04:28, 12.21s/it, loss=0.0029]

Saving best model...


Train:  74%|███████▍  | 59/80 [15:42<04:53, 13.96s/it, loss=0.0024]

Saving best model...


Train:  75%|███████▌  | 60/80 [15:54<04:17, 12.85s/it, loss=0.003] 

Saving best model...


Train:  76%|███████▋  | 61/80 [16:05<03:56, 12.44s/it, loss=0.0026]

Saving best model...


Train:  78%|███████▊  | 62/80 [16:08<03:38, 12.15s/it, loss=0.0022]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.16s/it][A
Validation:  67%|██████▋   | 2/3 [00:03<00:01,  1.97s/it][A
Validation: 100%|██████████| 3/3 [00:05<00:00,  1.92s/it]


Saving best model...


Train:  78%|███████▊  | 62/80 [16:24<03:38, 12.15s/it, loss=0.0027]

Saving best model...


Train:  79%|███████▉  | 63/80 [16:34<03:52, 13.70s/it, loss=0.003] 

Saving best model...


Train:  80%|████████  | 64/80 [16:43<03:21, 12.62s/it, loss=0.0026]

Saving best model...


Train:  81%|████████▏ | 65/80 [16:52<03:00, 12.01s/it, loss=0.0027]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:05,  2.57s/it][A
Validation:  67%|██████▋   | 2/3 [00:04<00:02,  2.17s/it][A
Validation: 100%|██████████| 3/3 [00:06<00:00,  2.13s/it]
Train:  81%|████████▏ | 65/80 [17:01<03:00, 12.01s/it, loss=0.0027]

Saving best model...


Train:  82%|████████▎ | 66/80 [17:13<03:11, 13.67s/it, loss=0.0025]

Saving best model...


Train:  84%|████████▍ | 67/80 [17:25<02:49, 13.03s/it, loss=0.0023]

Saving best model...


Train:  85%|████████▌ | 68/80 [17:34<02:30, 12.55s/it, loss=0.0023]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:03<00:06,  3.26s/it][A
Validation:  67%|██████▋   | 2/3 [00:05<00:02,  2.87s/it][A
Validation: 100%|██████████| 3/3 [00:07<00:00,  2.58s/it]


Saving best model...
Saving best model...


Train:  86%|████████▋ | 69/80 [17:52<02:34, 14.04s/it, loss=0.0027]

Saving best model...


Train:  88%|████████▊ | 70/80 [18:03<02:12, 13.24s/it, loss=0.0021]

Saving best model...


Train:  89%|████████▉ | 71/80 [18:14<01:49, 12.13s/it, loss=0.0025]

Saving best model...


Train:  90%|█████████ | 72/80 [18:19<01:35, 11.95s/it, loss=0.0025]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:05,  2.89s/it][A
Validation:  67%|██████▋   | 2/3 [00:05<00:02,  2.99s/it][A
Validation: 100%|██████████| 3/3 [00:07<00:00,  2.65s/it]
Train:  90%|█████████ | 72/80 [18:32<01:35, 11.95s/it, loss=0.0023]

Saving best model...


Train:  91%|█████████▏| 73/80 [18:43<01:34, 13.43s/it, loss=0.0025]

Saving best model...


Train:  92%|█████████▎| 74/80 [18:52<01:17, 12.84s/it, loss=0.0022]

Saving best model...


Train:  94%|█████████▍| 75/80 [19:06<01:01, 12.30s/it, loss=0.0022]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:01<00:03,  1.98s/it][A
Validation:  67%|██████▋   | 2/3 [00:03<00:01,  1.91s/it][A
Validation: 100%|██████████| 3/3 [00:06<00:00,  2.30s/it]


Saving best model...
Saving best model...


Train:  95%|█████████▌| 76/80 [19:23<00:58, 14.55s/it, loss=0.0023]

Saving best model...


Train:  96%|█████████▋| 77/80 [19:35<00:39, 13.04s/it, loss=0.0022]

Saving best model...


Train:  98%|█████████▊| 78/80 [19:46<00:25, 12.59s/it, loss=0.0018]

Saving best model...


Train:  99%|█████████▉| 79/80 [19:50<00:12, 12.23s/it, loss=0.0019]
  0%|          | 0/3 [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation:  33%|███▎      | 1/3 [00:02<00:04,  2.04s/it][A
Validation:  67%|██████▋   | 2/3 [00:03<00:01,  1.92s/it][A
Validation: 100%|██████████| 3/3 [00:05<00:00,  1.91s/it]
Train:  99%|█████████▉| 79/80 [20:02<00:12, 12.23s/it, loss=0.0024]

Saving best model...


Train: 100%|██████████| 80/80 [20:04<00:00, 15.06s/it, loss=0.0024]


# Testing the trained model

In [9]:
%cd /content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection

/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection


In [29]:
!python inference.py --checkpoint model_best_fold0.pt --threshold 0.4 --validate

Namespace(threshold=0.4, checkpoint='model_best_fold0.pt', validate=True, test=False, val_fold=0, ensemble=False, cross_validation=False)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'bert.encoder.layer.11.intermediate.dense.bias', 'bert.encoder.layer.11.intermediate.dense.weight', 'bert.encoder.layer.10.attention.output.dense.weight', 'bert.encoder.layer.11.attention.output.LayerNorm.bias', 'bert.encoder.layer.10.attention.self.key.weight', 'bert.encoder.layer.10.attention.output.LayerNorm.weight', 'bert.encoder.layer.10.attention.output.dense.bias', 'bert.encoder.layer.11.attention.output.dense.weight', 'bert.encoder.layer.11.attention.self.value.weight', 'bert.encoder.layer.11.attention.output.LayerNorm.weight', 'bert.encoder.layer.11.attention.self.value.bias', 'cls.predictions.transform.LayerNorm.weight', 'bert.encoder.layer.11.attention.self.key.bias', 'bert.encoder.layer.10.output.La

In [None]:
!python inference.py --checkpoint model_best_fold0.pt --threshold 0.5 --validate

Namespace(threshold=0.5, checkpoint='model_best_fold0.pt', validate=True, test=False, val_fold=0, ensemble=False, cross_validation=False)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['bert.encoder.layer.11.intermediate.dense.weight', 'bert.encoder.layer.11.attention.self.query.bias', 'bert.encoder.layer.10.attention.output.dense.weight', 'bert.encoder.layer.11.output.LayerNorm.weight', 'bert.encoder.layer.11.attention.self.query.weight', 'bert.encoder.layer.11.output.LayerNorm.bias', 'bert.encoder.layer.10.output.dense.bias', 'bert.encoder.layer.11.attention.self.key.weight', 'bert.encoder.layer.10.intermediate.dense.weight', 'cls.seq_relationship.bias', 'bert.encoder.layer.10.attention.self.query.bias', 'bert.encoder.layer.11.attention.self.key.bias', 'bert.encoder.layer.10.attention.self.value.weight', 'bert.encoder.layer.11.output.dense.weight', 'bert.encoder.layer.10.output.LayerNorm.bias', 'cls.predictions.transform.dense.bi

In [None]:
!python inference.py --checkpoint model_best_fold0.pt --threshold 0.8 --validate

Namespace(threshold=0.8, checkpoint='model_best_fold0.pt', validate=True, test=False, val_fold=0, ensemble=False, cross_validation=False)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['bert.encoder.layer.11.attention.self.query.bias', 'bert.encoder.layer.10.output.dense.weight', 'bert.encoder.layer.11.output.dense.weight', 'cls.predictions.transform.dense.bias', 'bert.encoder.layer.10.attention.self.query.bias', 'bert.encoder.layer.10.attention.output.LayerNorm.weight', 'bert.encoder.layer.11.attention.self.query.weight', 'bert.encoder.layer.11.attention.output.LayerNorm.bias', 'bert.encoder.layer.11.attention.output.dense.bias', 'bert.encoder.layer.10.intermediate.dense.weight', 'bert.encoder.layer.10.output.dense.bias', 'bert.encoder.layer.11.attention.self.value.bias', 'bert.encoder.layer.11.attention.output.LayerNorm.weight', 'bert.encoder.layer.10.attention.output.dense.bias', 'bert.encoder.layer.11.attention.self.key.weight'

In [None]:
!python inference.py --checkpoint model_best_fold0.pt --threshold 0.9 --validate

Namespace(threshold=0.9, checkpoint='model_best_fold0.pt', validate=True, test=False, val_fold=0, ensemble=False, cross_validation=False)
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['bert.encoder.layer.11.attention.self.key.bias', 'bert.encoder.layer.11.output.dense.bias', 'bert.encoder.layer.10.intermediate.dense.weight', 'bert.encoder.layer.10.output.dense.weight', 'bert.encoder.layer.10.intermediate.dense.bias', 'cls.predictions.bias', 'bert.encoder.layer.11.attention.output.LayerNorm.weight', 'bert.encoder.layer.10.attention.self.value.bias', 'cls.predictions.transform.dense.bias', 'bert.encoder.layer.10.attention.output.dense.weight', 'bert.encoder.layer.11.attention.self.key.weight', 'bert.encoder.layer.11.attention.output.dense.bias', 'bert.encoder.layer.11.attention.output.LayerNorm.bias', 'bert.encoder.layer.10.attention.self.query.bias', 'bert.encoder.layer.10.output.LayerNorm.bias', 'bert.encoder.layer.10.attention.se

## In case there is other saved models

In [None]:
!python inference.py --checkpoint model_best_fold1.pt --validate

Namespace(threshold=0.3, checkpoint='model_best_fold1.pt', validate=True, test=False, val_fold=0, ensemble=False, cross_validation=False)
Traceback (most recent call last):
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 178, in <module>
    main(opt)
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 30, in main
    checkpoint = torch.load(opt.checkpoint, map_location='cpu')
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 791, in load
    with _open_file_like(f, 'rb') as opened_file:
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 271, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 252, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: 'model_best_fold1.pt'


In [None]:
!python inference.py --checkpoint model_best_fold2.pt --validate

Namespace(threshold=0.3, checkpoint='model_best_fold2.pt', validate=True, test=False, val_fold=0, ensemble=False, cross_validation=False)
Traceback (most recent call last):
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 178, in <module>
    main(opt)
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 30, in main
    checkpoint = torch.load(opt.checkpoint, map_location='cpu')
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 791, in load
    with _open_file_like(f, 'rb') as opened_file:
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 271, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 252, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: 'model_best_fold2.pt'


In [None]:
!python inference.py --checkpoint model_best_fold3.pt --validate

Namespace(threshold=0.3, checkpoint='model_best_fold3.pt', validate=True, test=False, val_fold=0, ensemble=False, cross_validation=False)
Traceback (most recent call last):
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 178, in <module>
    main(opt)
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 30, in main
    checkpoint = torch.load(opt.checkpoint, map_location='cpu')
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 791, in load
    with _open_file_like(f, 'rb') as opened_file:
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 271, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 252, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: 'model_best_fold3.pt'


In [None]:
!python inference.py --checkpoint model_best_fold4.pt --validate

Namespace(threshold=0.3, checkpoint='model_best_fold4.pt', validate=True, test=False, val_fold=0, ensemble=False, cross_validation=False)
Traceback (most recent call last):
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 178, in <module>
    main(opt)
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 30, in main
    checkpoint = torch.load(opt.checkpoint, map_location='cpu')
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 791, in load
    with _open_file_like(f, 'rb') as opened_file:
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 271, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 252, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: 'model_best_fold4.pt'


In [None]:
!python inference.py --checkpoint model_best_fold5.pt --validate

Namespace(threshold=0.3, checkpoint='model_best_fold5.pt', validate=True, test=False, val_fold=0, ensemble=False, cross_validation=False)
Traceback (most recent call last):
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 178, in <module>
    main(opt)
  File "/content/drive/MyDrive/Telecom/Airbus/Persusasion/MemePersuasionDetection/inference.py", line 30, in main
    checkpoint = torch.load(opt.checkpoint, map_location='cpu')
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 791, in load
    with _open_file_like(f, 'rb') as opened_file:
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 271, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/usr/local/lib/python3.10/dist-packages/torch/serialization.py", line 252, in __init__
    super().__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: 'model_best_fold5.pt'
