In [1]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from datasets import Dataset, DatasetDict, ClassLabel
from transformers import BertModel, ViTModel, BertTokenizer, ViTImageProcessor, Trainer, TrainingArguments, EvalPrediction
from PIL import Image, UnidentifiedImageError, ImageFile

from sklearn.metrics import classification_report, accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from imblearn.over_sampling import RandomOverSampler

ImageFile.LOAD_TRUNCATED_IMAGES = True
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


DATASET_PATH = Path('data/MEC/')
MODEL_OUTPUT_DIR = Path('data/results')
DATASET_CACHE = Path('data/cache')
TEXT_COLUMN = 'text'
IMAGE_PATH_COLUMN = 'image_path'
TARGET_COLUMN = 'formal_register'
SEED = 42
N_EPOCHS = 30
BATCH_SIZE = 32


DATASET = DATASET_PATH / 'mec-dataset.csv'

def dataset_cache(stage: str):
    return str(DATASET_CACHE / stage)

Path(DATASET_CACHE).mkdir(parents=True, exist_ok=True)
Path(DATASET_CACHE).mkdir(parents=True, exist_ok=True)
print(sys.version)

3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0]


## Prepare Dataset

In [2]:
df = pd.read_csv(DATASET, index_col=0)
df[['cohesion', 'thematic_coherence', 'formal_register', 'text_typology']] = 'Nível ' + df[['cohesion', 'thematic_coherence', 'formal_register', 'text_typology']].astype(str)
df[IMAGE_PATH_COLUMN] =  df[IMAGE_PATH_COLUMN].apply(DATASET_PATH.joinpath).astype(str)
df = df.convert_dtypes()
df.info()
df.head(2)

<class 'pandas.core.frame.DataFrame'>
Index: 1188 entries, 0 to 1187
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   text                  1188 non-null   string
 1   motivating_situation  1188 non-null   string
 2   image_url             1188 non-null   string
 3   image_path            1188 non-null   string
 4   cohesion              1188 non-null   string
 5   thematic_coherence    1188 non-null   string
 6   formal_register       1188 non-null   string
 7   text_typology         1188 non-null   string
dtypes: string(8)
memory usage: 83.5 KB


Unnamed: 0,text,motivating_situation,image_url,image_path,cohesion,thematic_coherence,formal_register,text_typology
0,( O chorrinho nino ) - Eu est...,Eu encontrei em cima do armário alguns potes c...,https://storage.googleapis.com/ciclos-10698-bu...,data/MEC/MEC/Rc7dMxTP7ZdLNEvmF0jo/iet1QFw2ARNk...,Nível 3,Nível 3,Nível 3,Nível 4
1,.As meninas do potes de Tintas [T] Uma vez eu ...,Eu encontrei em cima do armário alguns potes c...,https://storage.googleapis.com/ciclos-10698-bu...,data/MEC/MEC/Rc7dMxTP7ZdLNEvmF0jo/F80gTOBoh2Lk...,Nível 3,Nível 3,Nível 3,Nível 4


In [3]:
class_names = sorted(df[TARGET_COLUMN].unique().tolist())

class_label = ClassLabel(num_classes=len(class_names), names=class_names)
class_label

ClassLabel(names=['Nível 1', 'Nível 2', 'Nível 3', 'Nível 4', 'Nível 5'], id=None)

## Training Functions

In [4]:
df[IMAGE_PATH_COLUMN].apply(Path).apply(Path.exists).all()

np.True_

In [5]:
class ClassfierHead(nn.Module):
    def __init__(self, input_size: int, hidden_size = 256, num_classes = 5):
        super().__init__()
        self.fn = nn.Linear(input_size, hidden_size)
        self.classifier = nn.Linear(hidden_size, num_classes)

    def forward(self, combined_last_hidden_state, labels=None):
        result = self.fn(combined_last_hidden_state)
        result = torch.relu_(result)
        logits = self.classifier(result)

        if labels is not None:
            loss = nn.functional.cross_entropy(logits, labels)
            return {"loss": loss, "logits": logits}
        
        return {'logits': logits}


BERT_MODEL_NAME = 'neuralmind/bert-base-portuguese-cased'
VIT_MODEL_NAME = 'google/vit-base-patch16-224'

bert_model: BertModel = BertModel.from_pretrained(BERT_MODEL_NAME).requires_grad_(False)
vit_model: ViTModel = ViTModel.from_pretrained(VIT_MODEL_NAME).requires_grad_(False)

tokenizer: BertTokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
processor: ViTImageProcessor = ViTImageProcessor.from_pretrained(VIT_MODEL_NAME)

# Instanciar o cabeça de classificação
model = ClassfierHead(bert_model.config.hidden_size + vit_model.config.hidden_size)

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
def is_image_valid(image_path):
    try:
        with Image.open(image_path) as img:
            img.verify()
        return True
    except (IOError, UnidentifiedImageError):
        return False

def preprocess_text_and_image(input_ids: torch.Tensor, attention_mask: torch.Tensor, pixel_values: torch.Tensor):
    # Processar o texto usando o BERT

    bert_model.eval().cuda()
    vit_model.eval().cuda()

    with torch.no_grad():
        bert_outputs = bert_model(input_ids=input_ids.cuda(), attention_mask=attention_mask.cuda())
        bert_pooled_output = bert_outputs.last_hidden_state[:, 0, :].detach().cpu()
    
        # Processar a imagem usando o ViT
        vit_outputs = vit_model(pixel_values=pixel_values.cuda())
        vit_pooled_output = vit_outputs.last_hidden_state[:, 0, :].detach().cpu()
    
        # Concatenar as saídas
        combined = torch.cat((bert_pooled_output, vit_pooled_output), dim=1)

    return {
        'combined_last_hidden_state': combined
    }


def prepare_dataset(
    df: pd.DataFrame,
    train_indexes: list[int],
    test_indexes: list[int],
    processor: ViTImageProcessor,
    tokenizer: BertTokenizer,
    text_column: str = 'text',
    image_path_column: str = 'image_path',
    target: str = 'formal_register',
    balanced: bool = True,
) -> DatasetDict:
    # Processing dataset e and storing in file cache
    COLUMNS = 'cohesion', 'thematic_coherence', 'formal_register', 'text_typology'
    original_dataset = (Dataset
        .from_pandas(df)
        .select_columns([text_column, image_path_column, *COLUMNS])
        .filter(is_image_valid, input_columns=[image_path_column])
        .map(lambda path: {'pixel_values': Image.open(path)}, input_columns=[image_path_column])
        .map(
            lambda pixel_values: processor(pixel_values, return_tensors='pt'),
            input_columns=['pixel_values'],
            batched=True,
            batch_size=8,
            num_proc=4,
            cache_file_name=dataset_cache('process-image.arrow'),
        )
        .map(
            lambda text: tokenizer(
                text,
                padding='max_length',
                truncation=True,
                max_length=512,
            ),
            batched=True,
            input_columns=[text_column],
            cache_file_name=dataset_cache('process-text.arrow'),
        )
        .with_format('pt')
        .map(
            preprocess_text_and_image,
            batched=True,
            batch_size=64,
            input_columns=['input_ids', 'attention_mask', 'pixel_values'],
            cache_file_name=dataset_cache('process-model-input.arrow'),
        )
        .remove_columns([image_path_column])
        .cast_column(target, class_label)
        .rename_column(target, 'labels')
    )

    # Balaced indexes
    if balanced:
        train_indexes = RandomOverSampler(random_state=SEED)\
            .fit_resample(train_indexes[None].T, df.loc[train_indexes, target])[0].flatten()

    # Creating dataset split
    dataset = DatasetDict(
        train=original_dataset.select(train_indexes),
        test=original_dataset.select(test_indexes)
    )
    return dataset


def compute_metrics(eval_preds: EvalPrediction, compute_result=False, *, PREDS: list = [], LABELS: list = []):
    labels = eval_preds.label_ids
    preds = eval_preds.predictions.argmax(-1)

    PREDS.append(preds)
    LABELS.append(labels)

    if compute_result:
        preds = torch.concat(PREDS).numpy(force=True)
        labels = torch.concat(LABELS).numpy(force=True)

        f1 = f1_score(labels, preds, average="weighted")
        acc = accuracy_score(labels, preds)

        PREDS, LABELS = [], []

        return {"accuracy": acc, "f1": f1}


def evaluate_model(model, dataset: DatasetDict, device):
    # Get model predictions and ground truth
    model.eval()
    model.to(device)
    torch.cuda.empty_cache()

    with torch.no_grad():
      batches = (batch['combined_last_hidden_state'] for batch in dataset['test'].iter(BATCH_SIZE))
      logits = [model(batch.to(device))['logits'] for batch in batches]
      logits = torch.concat(logits, 0)

    y_pred = torch.argmax(logits, dim=-1).cpu().numpy()
    y_true = dataset["test"]["labels"].cpu().numpy()

    torch.cuda.empty_cache()

    # Evaluate model
    return dict(
        report = classification_report(y_true, y_pred),
        accuracy = accuracy_score(y_true, y_pred),
        weighted_precision = precision_score(y_true, y_pred, average="weighted"),
        weighted_recall = recall_score(y_true, y_pred, average="weighted"),
        weighted_f1 = f1_score(y_true, y_pred, average="weighted"),
        macro_precision = precision_score(y_true, y_pred, average="macro"),
        macro_recall = recall_score(y_true, y_pred, average="macro"),
        macro_f1 = f1_score(y_true, y_pred, average="macro"),
    )

def mean_evaluation(results):
    from collections import defaultdict
    data = defaultdict(list)
    for result in results:
        for key, value in result.items():
            if key == 'report': continue
            data[key].append(value)
    return {
        key: np.mean(value)
        for key, value in data.items()
    }


In [7]:
# train_indexes, test_indexes = next(StratifiedShuffleSplit(random_state=SEED, test_size=0.2).split(df, df[TARGET_COLUMN]))
# dataset = prepare_dataset(df, train_indexes, test_indexes, processor, tokenizer)]
# dataset

In [8]:
def train_model(target: str, balanced: bool = True, test_size: float = 0.2):
    train_indexes, test_indexes = next(StratifiedShuffleSplit(random_state=SEED, test_size=test_size).split(df, df[TARGET_COLUMN]))

    dataset = prepare_dataset(df, train_indexes, test_indexes, processor, tokenizer, target=target, balanced=balanced)
    OUTPUT_DIR = f'results/training/{target}/'

    training_args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        per_device_train_batch_size=32,
        num_train_epochs=20,
        batch_eval_metrics=True,
        dataloader_num_workers=8,
        data_seed=SEED,
        eval_strategy="steps",
        eval_steps=100,
        save_steps=500,
        logging_dir=OUTPUT_DIR + "logging",
        logging_steps=25,
        fp16=True,
        learning_rate=2e-4,
        save_total_limit=2,
        use_cpu=False,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["test"],
        compute_metrics=compute_metrics,
    )

    results = trainer.train()

    evaluation = evaluate_model(model, dataset, device)

    return dict(
        model = model,
        dataset = dataset,
        training = results,
        evaluation = evaluation,
    )


def cross_validate(target: str, balanced: bool = True):
    training_results = []
    evaluation_results = []

    for fold, (train_indexes, test_indexes) in enumerate(StratifiedKFold(random_state=SEED, n_splits=5, shuffle=True).split(df, df[TARGET_COLUMN]), start=1):
    
        dataset = prepare_dataset(df, train_indexes, test_indexes, processor, tokenizer, target=target, balanced=balanced)
        input_size = dataset['test'][0]['combined_last_hidden_state'].size()[0]

        model = ClassfierHead(input_size)

        OUTPUT_DIR = f'results/cross_validation/{target}/fold_{fold}/'

        training_args = TrainingArguments(
            output_dir=OUTPUT_DIR,
            per_device_train_batch_size=32,
            num_train_epochs=20,
            batch_eval_metrics=True,
            dataloader_num_workers=8,
            data_seed=SEED,
            eval_strategy="steps",
            eval_steps=100,
            save_steps=500,
            logging_dir=OUTPUT_DIR + "logging",
            logging_steps=25,
            fp16=True,
            learning_rate=2e-4,
            save_total_limit=2,
            use_cpu=False,
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=dataset["train"],
            eval_dataset=dataset["test"],
            compute_metrics=compute_metrics,
        )

        results = trainer.train()

        evaluation = evaluate_model(model, dataset, device)
        evaluation_results.append(evaluation)

        training_results.append(dict(
            model = model,
            dataset = dataset,
            training = results,
            evaluation = evaluation,
        ))

    return training_results, mean_evaluation(evaluation_results)

In [9]:
formal_register  = cross_validate('formal_register')
thematic_coherence = cross_validate('thematic_coherence')
text_typology = cross_validate('text_typology')
cohesion = cross_validate('cohesion')
results = pd.DataFrame([formal_register[1], thematic_coherence[1], text_typology[1], cohesion[1]], index=['Registro formal', 'Coerência Temática', 'Tipologia Textual', 'Coesão'])

Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.7076,1.09821,0.445378,0.469912
200,0.4834,1.03903,0.44958,0.464979
300,0.3754,0.775935,0.516807,0.543003
400,0.324,0.751126,0.552521,0.578631
500,0.2709,0.843985,0.559664,0.584952
600,0.2298,0.820156,0.568627,0.592901
700,0.2138,0.814309,0.581032,0.603483
800,0.2017,0.838298,0.590336,0.611156
900,0.2032,0.856526,0.595705,0.615256
1000,0.1603,0.825372,0.601261,0.61885


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.722,0.980846,0.609454,0.621348
200,0.4527,1.046891,0.602041,0.616313
300,0.3713,0.896143,0.601795,0.616147
400,0.3338,0.761442,0.604859,0.618505
500,0.2985,0.80661,0.606268,0.619294
600,0.2261,0.845895,0.607731,0.620604
700,0.2004,0.936155,0.607951,0.620882
800,0.207,0.881798,0.608932,0.621479
900,0.1748,0.958233,0.609844,0.622129
1000,0.166,1.001141,0.610113,0.622315


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.6917,1.04272,0.612691,0.623325
200,0.4725,1.215573,0.608613,0.619946
300,0.3624,0.983305,0.607502,0.618922
400,0.334,0.886401,0.608443,0.619447
500,0.2802,0.943799,0.608853,0.619578
600,0.2129,0.969404,0.609053,0.619629
700,0.1998,1.156647,0.60831,0.619063
800,0.1959,1.045847,0.608513,0.619068
900,0.1852,1.057285,0.609154,0.61935
1000,0.1565,1.095473,0.609331,0.619343


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.725,1.193591,0.607404,0.616764
200,0.4721,0.983425,0.606125,0.616023
300,0.3594,0.861678,0.60622,0.61626
400,0.3038,0.819169,0.607276,0.617212
500,0.2964,0.863311,0.607959,0.618061
600,0.2444,0.877198,0.60862,0.618832
700,0.1931,0.872678,0.609261,0.619486
800,0.2055,0.894969,0.610335,0.620527
900,0.209,0.944979,0.610676,0.621001
1000,0.1612,0.917389,0.61176,0.622084


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.6845,1.075817,0.618977,0.628638
200,0.4922,0.86476,0.619155,0.62897
300,0.3859,1.026818,0.617785,0.628037
400,0.3126,0.850179,0.618026,0.628268
500,0.3019,0.918732,0.617742,0.628195
600,0.2503,0.846352,0.61808,0.628377
700,0.2149,0.938515,0.617701,0.628124
800,0.2055,0.898691,0.617881,0.628273
900,0.1937,0.910238,0.617909,0.628289
1000,0.1516,0.952291,0.617692,0.628127


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.8678,1.133062,0.617492,0.627733
200,0.6667,1.055423,0.616583,0.626901
300,0.5051,0.958437,0.616164,0.626572
400,0.3928,0.918134,0.616604,0.626898
500,0.3513,0.941311,0.616699,0.626964
600,0.3333,0.966071,0.617083,0.627258
700,0.2984,0.955587,0.617419,0.627498
800,0.2664,0.968663,0.617789,0.627812
900,0.2617,0.97695,0.618354,0.628231
1000,0.2528,0.987846,0.618668,0.628435


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.8954,1.2343,0.617432,0.627376
200,0.637,1.004056,0.617009,0.62715
300,0.5153,0.942758,0.617512,0.627688
400,0.419,1.01003,0.61702,0.627375
500,0.3762,1.031,0.616688,0.627255
600,0.3386,0.966911,0.616473,0.627155
700,0.2871,0.963427,0.616668,0.627401
800,0.2612,0.970464,0.616603,0.627448
900,0.2782,1.015023,0.616395,0.627355
1000,0.2359,1.009901,0.616405,0.627462


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.8666,1.23689,0.614859,0.626421
200,0.6276,1.051547,0.614082,0.625776
300,0.5274,1.01637,0.613456,0.625207
400,0.4148,0.977147,0.613251,0.624943
500,0.3888,1.00867,0.612947,0.624542
600,0.353,1.015372,0.612682,0.624208
700,0.309,1.07832,0.612087,0.623648
800,0.2692,1.045525,0.611866,0.623405
900,0.2789,1.033568,0.611747,0.623203
1000,0.2481,1.028731,0.611727,0.623104


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.8539,1.218459,0.610606,0.621716
200,0.5679,1.145778,0.609983,0.621029
300,0.4691,1.076948,0.609934,0.620766
400,0.4271,1.112446,0.609729,0.620435
500,0.3815,1.127306,0.609683,0.620187
600,0.3192,1.145797,0.609453,0.6198
700,0.2855,1.162146,0.609531,0.619682
800,0.2606,1.186261,0.609456,0.619438
900,0.2596,1.21278,0.609503,0.619296
1000,0.2387,1.207775,0.609609,0.619215


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.884,1.110365,0.608983,0.618243
200,0.5847,1.126682,0.607958,0.617494
300,0.5281,0.918381,0.608042,0.617509
400,0.4402,0.8986,0.608153,0.61759
500,0.4018,0.890894,0.608348,0.617751
600,0.3347,0.879788,0.608456,0.617786
700,0.3359,0.924041,0.608394,0.617733
800,0.2903,0.921178,0.608473,0.617786
900,0.2925,0.906287,0.608633,0.617881
1000,0.2425,0.947386,0.608626,0.617879


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.8844,1.22972,0.606917,0.616079
200,0.6123,1.069814,0.605833,0.614875
300,0.4791,0.905651,0.605855,0.614511
400,0.4136,1.325314,0.604314,0.612763
500,0.36,1.113139,0.603583,0.611709
600,0.3271,1.116784,0.602807,0.610696
700,0.2876,1.078476,0.602458,0.610014
800,0.2688,1.138023,0.601776,0.609098
900,0.2377,1.00495,0.601796,0.608869
1000,0.2134,1.104235,0.601586,0.608429


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.9055,1.077293,0.599516,0.605002
200,0.6371,1.049585,0.598754,0.604085
300,0.4868,1.047188,0.598049,0.603251
400,0.4072,1.099588,0.597138,0.602229
500,0.3536,1.156872,0.596118,0.601149
600,0.3234,0.94414,0.595863,0.600859
700,0.2912,0.969259,0.595727,0.600689
800,0.2992,0.987067,0.59543,0.600332
900,0.2287,1.039079,0.595043,0.599876
1000,0.2483,1.07182,0.594684,0.599444


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.8575,1.264748,0.591401,0.595731
200,0.6176,1.16275,0.5906,0.594878
300,0.4801,1.110725,0.589959,0.594218
400,0.3844,1.220946,0.589065,0.593295
500,0.3727,1.142962,0.58831,0.592516
600,0.3021,1.149611,0.587882,0.592069
700,0.2654,1.17624,0.587332,0.591497
800,0.2524,1.233298,0.586723,0.59088
900,0.2374,1.372279,0.585847,0.590009
1000,0.2374,1.366443,0.585064,0.589216


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.8252,1.101119,0.580283,0.584473
200,0.5733,1.170253,0.579535,0.583652
300,0.455,1.185249,0.578794,0.582843
400,0.3754,1.112063,0.578336,0.58236
500,0.3242,1.23636,0.577706,0.581704
600,0.3036,1.156933,0.577296,0.581279
700,0.243,1.283538,0.576696,0.580597
800,0.2344,1.205725,0.576412,0.580314
900,0.2428,1.341593,0.575801,0.579706
1000,0.2043,1.372631,0.57512,0.578994


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.8784,1.168158,0.571766,0.575546
200,0.5738,1.097321,0.571279,0.575054
300,0.4165,1.144037,0.570814,0.574546
400,0.3508,1.124403,0.570336,0.574045
500,0.3309,1.264764,0.569553,0.573265
600,0.3044,1.217425,0.569065,0.572746
700,0.2587,1.257706,0.568618,0.572316
800,0.2643,1.199979,0.568425,0.572121
900,0.2407,1.238176,0.568163,0.571829
1000,0.1929,1.314439,0.56776,0.571448


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

Casting the dataset:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.7886,1.08672,0.565679,0.569278
200,0.5234,1.102632,0.56521,0.568888
300,0.4115,0.921157,0.56559,0.569305
400,0.3521,0.952564,0.56563,0.569381
500,0.2679,1.005711,0.565637,0.569434
600,0.2593,0.862425,0.566044,0.569848
700,0.2227,0.930263,0.566315,0.570138
800,0.1921,0.923638,0.566616,0.570453
900,0.1797,0.880827,0.567031,0.570866
1000,0.1491,1.016241,0.567312,0.571175


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.7797,1.100387,0.570697,0.574633
200,0.511,1.017417,0.570543,0.574534
300,0.3898,1.065573,0.570389,0.574435
400,0.3446,0.902618,0.570549,0.574608
500,0.2608,0.97241,0.570552,0.574634
600,0.2561,0.993969,0.570648,0.574761
700,0.2149,0.969511,0.570867,0.574985
800,0.2,1.073017,0.570854,0.575006
900,0.1629,0.985534,0.571147,0.5753
1000,0.173,1.052958,0.571346,0.575511


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.7647,1.017106,0.573196,0.577405
200,0.5234,1.12579,0.572956,0.57726
300,0.3901,0.87357,0.573199,0.577512
400,0.3213,0.953862,0.573309,0.577662
500,0.2886,0.91485,0.57349,0.577866
600,0.2348,0.989733,0.573627,0.578035
700,0.2369,0.942078,0.573878,0.578273
800,0.1787,1.008537,0.574084,0.578472
900,0.1919,1.012636,0.574361,0.578766
1000,0.1309,1.043833,0.574578,0.579004


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.7518,1.071316,0.577173,0.58164
200,0.5344,1.066015,0.576834,0.581376
300,0.3911,0.827872,0.577192,0.581727
400,0.3405,0.933733,0.577317,0.581872
500,0.2594,0.989168,0.577441,0.582031
600,0.2526,0.926635,0.577592,0.582184
700,0.2258,0.914421,0.577849,0.582446
800,0.2051,0.993802,0.577997,0.58261
900,0.1857,1.025296,0.578117,0.582752
1000,0.1914,0.989569,0.578303,0.582934


Filter:   0%|          | 0/1188 [00:00<?, ? examples/s]

Map:   0%|          | 0/1188 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss,Validation Loss,Accuracy,F1
100,0.7719,1.290544,0.579338,0.584014
200,0.5295,0.985374,0.579257,0.583975
300,0.3956,0.977442,0.579151,0.583903
400,0.3472,0.948297,0.579249,0.58402
500,0.2696,0.919457,0.579373,0.584145
600,0.2549,1.052823,0.579306,0.584104
700,0.1938,0.971927,0.579517,0.584311
800,0.1778,1.03126,0.579638,0.58444
900,0.1582,1.076595,0.579747,0.584551
1000,0.1626,1.047576,0.579992,0.58478


In [10]:
results

Unnamed: 0,accuracy,weighted_precision,weighted_recall,weighted_f1,macro_precision,macro_recall,macro_f1
Registro formal,0.643981,0.642576,0.643981,0.641287,0.424591,0.396777,0.404402
Coerência Temática,0.624575,0.622286,0.624575,0.620606,0.399331,0.414174,0.398246
Tipologia Textual,0.521856,0.543915,0.521856,0.529059,0.368106,0.360199,0.35574
Coesão,0.650679,0.64643,0.650679,0.646595,0.430058,0.395604,0.404842
