<a href="https://colab.research.google.com/github/RaduSima/SSL_Project2024/blob/master/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ! pip install datasets
# ! pip install -U accelerate
# ! pip install -U transformers

import ast
import itertools

import pandas as pd
from transformers import Trainer, TrainingArguments

from architectures import OurDifficultyClassifierModel, OurTagClassifierModel
from utils import (compute_metrics_difficulty_classifier,
                   compute_metrics_tag_classifier, get_model_class_from_name,
                   maybe_load_embeddings,
                   prepare_dataset_difficulty_classifier,
                   prepare_dataset_tag_classifier,
                   prepare_finetune_dataset_difficulty_classifier,
                   prepare_finetune_dataset_tag_classifier, save_torch_model,
                   transformers_classes, transformers_repo)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_data = pd.read_csv('./data/AMT10/AMT10_train.csv')
val_data = pd.read_csv('./data/AMT10/AMT10_validation.csv')
test_data = pd.read_csv('./data/AMT10/AMT10_test.csv')

train_texts, train_difficulty_ratings, train_tags = train_data['description'].tolist(), train_data['rating'].tolist(), train_data['tags'].tolist()
val_texts, val_difficulty_ratings, val_tags = val_data['description'].tolist(), val_data['rating'].tolist(), val_data['tags'].tolist()
test_texts, test_difficulty_ratings, test_tags = test_data['description'].tolist(), test_data['rating'].tolist(), test_data['tags'].tolist()

train_tags = [ast.literal_eval(tags) for tags in train_tags]
val_tags = [ast.literal_eval(tags) for tags in val_tags]
test_tags = [ast.literal_eval(tags) for tags in test_tags]

all_tags = sorted(list(set(itertools.chain.from_iterable(train_tags + val_tags + test_tags))))
tag2id = {tag: i for i, tag in enumerate(all_tags)}
id2tag = {i: tag for i, tag in enumerate(all_tags)}

In [3]:
def train_and_finetune_tag_classifier(transformer_name, train_texts, train_tags, val_texts, val_tags, test_texts, test_tags, intermediate_layers, tag2id):
    base_model = transformers_classes[transformer_name]["model_class"].from_pretrained(transformer_name)
    embedding_size = transformers_classes[transformer_name]["embedding_size"]

    train_dataset=prepare_finetune_dataset_tag_classifier(transformer_name, train_texts, train_tags, tag2id)
    val_dataset=prepare_finetune_dataset_tag_classifier(transformer_name, val_texts, val_tags, tag2id)
    test_dataset=prepare_finetune_dataset_tag_classifier(transformer_name, test_texts, test_tags, tag2id)

    model = OurTagClassifierModel(base_model, embedding_size, len(tag2id), intermediate_layers=intermediate_layers)
    
    training_args = TrainingArguments(
      learning_rate=1e-4,
      output_dir="./results",
      num_train_epochs=1,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=16,
      warmup_steps=500,
      weight_decay=0.01,
      logging_dir="./logs",
      metric_for_best_model="f1"
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics_tag_classifier
    )

    trainer.train()

    # Evaluate the model on the testing set
    eval_results = trainer.evaluate(test_dataset)
    print(eval_results)

    save_torch_model(
        trainer.model,
        {"tag2id": tag2id, "intermediate_layers": intermediate_layers, 'model_class': OurTagClassifierModel},
        f"{transformer_name.replace('/', '_')}_transformer_tag_classifier"
    )
    # save scores
    with open(f'./results/{transformer_name.replace("/", "_")}__transformer_tag_classifier_scores.txt', 'w') as f:
        f.write(str(eval_results))
    
    return eval_results

In [4]:
def train_and_finetune_difficulty_classifier(transformer_name, train_texts, train_difficulty_ratings, val_texts, val_difficulty_ratings, test_texts, test_difficulty_ratings, intermediate_layers, num_classes):
    base_model = transformers_classes[transformer_name]["model_class"].from_pretrained(
        transformer_name)
    embedding_size = transformers_classes[transformer_name]["embedding_size"]

    train_dataset = prepare_finetune_dataset_difficulty_classifier(
        transformer_name, train_texts, train_difficulty_ratings, num_classes=num_classes)
    val_dataset = prepare_finetune_dataset_difficulty_classifier(
        transformer_name, val_texts, val_difficulty_ratings, num_classes=num_classes)
    test_dataset = prepare_finetune_dataset_difficulty_classifier(
        transformer_name, test_texts, test_difficulty_ratings, num_classes=num_classes)

    model = OurDifficultyClassifierModel(
        base_model.bert, embedding_size, num_classes, intermediate_layers)

    training_args = TrainingArguments(
        learning_rate=1e-4,
        output_dir="./results",
        num_train_epochs=1,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir="./logs",
        metric_for_best_model="neighborhood_accuracy"
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        compute_metrics=compute_metrics_difficulty_classifier
    )

    trainer.train()

    # Evaluate the model on the testing set
    eval_results = trainer.evaluate(test_dataset)
    print(eval_results)

    save_torch_model(
        trainer.model,
        {"num_classes": num_classes, "intermediate_layers": intermediate_layers, 'model_class': OurDifficultyClassifierModel},
        f"{transformer_name.replace('/', '_')}_transformer_difficulty_classifier"
    )

    # save scores
    with open(f"{transformer_name.replace('/', '_')}_transformer_difficulty_classifier_scores.txt", "w") as f:
        f.write(str(eval_results))

    return eval_results

In [5]:
def train_grid_search_tag_classifier(transformer_name, train_texts, train_tags, val_texts, val_tags, test_texts, test_tags, metric_compare_grid_results, param_grid):
    train_embeddings = maybe_load_embeddings(
        transformer_name, train_texts, "train", "./data/AMT10")
    val_embeddings = maybe_load_embeddings(
        transformer_name, val_texts, "val", "./data/AMT10")
    test_embeddings = maybe_load_embeddings(
        transformer_name, test_texts, "test", "./data/AMT10")

    param_combinations = list(itertools.product(*param_grid.values()))
    embedding_size = transformers_classes[transformer_name]["embedding_size"]

    best_eval_metric = float('-inf')
    best_params = None
    best_trainer = None

    for params in param_combinations:
        params = dict(zip(param_grid.keys(), params))
        learning_rate = params['learning_rate']
        train_batch_size = params['per_device_train_batch_size']
        num_epochs = params['num_train_epochs']
        model_class = get_model_class_from_name(params['model_class'])
        num_classes = len(tag2id)
        intermediate_layers = params['intermediate_layers']
        train_metric = params['train_metric']

        to_train_model = model_class(
            embedding_size=embedding_size, num_classes=num_classes, intermediate_layers=intermediate_layers)

        train_dataset = prepare_dataset_tag_classifier(
            train_embeddings, train_tags, tag2id)
        val_dataset = prepare_dataset_tag_classifier(
            val_embeddings, val_tags, tag2id)
        test_dataset = prepare_dataset_tag_classifier(
            test_embeddings, test_tags, tag2id)

        training_args = TrainingArguments(
            learning_rate=learning_rate,
            output_dir="./results",
            num_train_epochs=num_epochs,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=train_batch_size,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir="./logs",
            metric_for_best_model=train_metric
        )

        trainer = Trainer(
            model=to_train_model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics_tag_classifier
        )

        trainer.train()

        eval_results = trainer.evaluate(test_dataset)

        print(eval_results)

        eval_metric = eval_results[f'eval_{metric_compare_grid_results}']

        if eval_metric > best_eval_metric:
            best_eval_metric = eval_metric
            best_params = params
            best_trainer = trainer

    print(
        f"Best Evaluation Metric: {metric_compare_grid_results}: {best_eval_metric}")
    print(f"Best Hyperparameters: {best_params}")

    # Evaluate the model on the testing set
    test_dataset = prepare_dataset_tag_classifier(
        test_embeddings, test_tags, tag2id)
    best_eval_results = best_trainer.evaluate(test_dataset)
    print(best_eval_results)

    best_model_hyperparams = {
        "tag2id": tag2id,
        "transformer_name": transformer_name,
        "embedding_size": embedding_size,
        "num_classes": len(tag2id),
        "intermediate_layers": best_params['intermediate_layers'],
        "model_class": best_params['model_class']
    }

    save_torch_model(
        best_trainer.model,
        best_model_hyperparams,
        f"{transformer_name.replace('/', '_')}_tag_classifier"
    )
    # save scores
    with open(f"{transformer_name.replace('/', '_')}_tag_classifier_scores.txt", "w") as f:
        f.write(str(best_eval_results))

    return best_params, best_eval_results

In [6]:
def train_grid_search_difficulty_classifier(transformer_name, train_texts, train_difficulty_ratings, val_texts, val_difficulty_ratings, test_texts, test_difficulty_ratings, metric_compare_grid_results, param_grid):
    train_embeddings = maybe_load_embeddings(
        transformer_name, train_texts, "train", "./data/AMT10")
    val_embeddings = maybe_load_embeddings(
        transformer_name, val_texts, "val", "./data/AMT10")
    test_embeddings = maybe_load_embeddings(
        transformer_name, test_texts, "test", "./data/AMT10")

    param_combinations = list(itertools.product(*param_grid.values()))
    embedding_size = transformers_classes[transformer_name]["embedding_size"]

    best_eval_metric = float('-inf')
    best_params = None
    best_trainer = None

    for params in param_combinations:
        params = dict(zip(param_grid.keys(), params))
        learning_rate = params['learning_rate']
        train_batch_size = params['per_device_train_batch_size']
        num_epochs = params['num_train_epochs']
        model_class = get_model_class_from_name(params['model_class'])
        num_classes = params['num_classes']
        intermediate_layers = params['intermediate_layers']
        train_metric = params['train_metric']

        to_train_model = model_class(
            embedding_size=embedding_size, num_classes=num_classes, intermediate_layers=intermediate_layers)

        train_dataset = prepare_dataset_difficulty_classifier(
            train_embeddings, train_difficulty_ratings, num_classes=num_classes)
        val_dataset = prepare_dataset_difficulty_classifier(
            val_embeddings, val_difficulty_ratings, num_classes=num_classes)
        test_dataset = prepare_dataset_difficulty_classifier(
            test_embeddings, test_difficulty_ratings, num_classes=num_classes)

        training_args = TrainingArguments(
            learning_rate=learning_rate,
            output_dir="./results",
            num_train_epochs=num_epochs,
            per_device_train_batch_size=train_batch_size,
            per_device_eval_batch_size=train_batch_size,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir="./logs",
            metric_for_best_model=train_metric
        )

        trainer = Trainer(
            model=to_train_model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            compute_metrics=compute_metrics_difficulty_classifier
        )

        trainer.train()

        eval_results = trainer.evaluate(test_dataset)

        eval_metric = eval_results[f'eval_{metric_compare_grid_results}']

        if eval_metric > best_eval_metric:
            best_eval_metric = eval_metric
            best_params = params
            best_trainer = trainer

    print(
        f"Best Evaluation Metric: {metric_compare_grid_results}: {best_eval_metric}")
    print(f"Best Hyperparameters: {best_params}")

    # Evaluate the model on the testing set
    test_dataset = prepare_dataset_difficulty_classifier(
        test_embeddings, test_difficulty_ratings, num_classes=best_params['num_classes'])
    best_eval_results = best_trainer.evaluate(test_dataset)
    print(best_eval_results)

    best_model_hyperparams = {
        "transformer_name": transformer_name,
        "num_classes": best_params['num_classes'],
        "embedding_size": embedding_size,
        "intermediate_layers": best_params['intermediate_layers'],
        "model_class": best_params['model_class']
    }

    save_torch_model(
        best_trainer.model,
        best_model_hyperparams,
        f"{transformer_name.replace('/', '_')}_difficulty_classifier"
    )
    # save scores
    with open(f"{transformer_name.replace('/', '_')}_difficulty_classifier_scores.txt", "w") as f:
        f.write(str(best_eval_results))
    
    return best_params, best_eval_results

In [7]:
# Train grid search difficulty classifier
param_grid = {
    'learning_rate': [
        8 * 1e-4,
    ],
    'per_device_train_batch_size': [
        1024 # could be bigger (1024, 2048)
    ],
    'num_train_epochs': [
        200, # could be bigger (500, 1000)
    ],
    "train_metric": [
        "neighborhood_accuracy",
        # "accuracy",
    ],

    # Model
    'model_class': [
        'OrdinalRegressionClassifier',
    ],
    'num_classes': [
        5,
        10,
        35
    ],
    'intermediate_layers': [
        [256, 128, 64, 32],
        [512, 128, 32],
        [256, 64, 16],
    ],
}

transformer_name = "google/bigbird-roberta-base"

best_difficulty_classifier_params, best_difficulty_classifier_eval_results = train_grid_search_difficulty_classifier(transformer_name, train_texts, train_difficulty_ratings, val_texts, val_difficulty_ratings, test_texts, test_difficulty_ratings, "neighborhood_accuracy", param_grid)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:35<04:57,  2.36it/s]

{'loss': 0.6419, 'grad_norm': 0.9379503726959229, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [07:13<01:13,  2.70it/s]

{'loss': 0.5327, 'grad_norm': 1.2383931875228882, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [08:40<00:00,  2.30it/s]


{'train_runtime': 520.7947, 'train_samples_per_second': 2018.838, 'train_steps_per_second': 2.304, 'train_loss': 0.5707315953572591, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:30<04:35,  2.54it/s]

{'loss': 0.635, 'grad_norm': 1.0169795751571655, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [06:53<01:08,  2.92it/s]

{'loss': 0.5274, 'grad_norm': 3.244264602661133, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [08:20<00:00,  2.40it/s]


{'train_runtime': 500.4338, 'train_samples_per_second': 2100.977, 'train_steps_per_second': 2.398, 'train_loss': 0.5640952364603679, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 199.97it/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:45<04:59,  2.34it/s]

{'loss': 0.639, 'grad_norm': 1.3402694463729858, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [07:17<01:13,  2.72it/s]

{'loss': 0.5288, 'grad_norm': 1.529092788696289, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [08:40<00:00,  2.31it/s]


{'train_runtime': 520.4342, 'train_samples_per_second': 2020.236, 'train_steps_per_second': 2.306, 'train_loss': 0.5670805867513021, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 200.00it/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:34<05:10,  2.26it/s]

{'loss': 0.6442, 'grad_norm': 1.1784086227416992, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [07:14<01:12,  2.76it/s]

{'loss': 0.5434, 'grad_norm': 1.1322171688079834, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [08:37<00:00,  2.32it/s]


{'train_runtime': 517.0338, 'train_samples_per_second': 2033.523, 'train_steps_per_second': 2.321, 'train_loss': 0.5780397733052571, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 250.00it/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [04:01<06:25,  1.82it/s]

{'loss': 0.642, 'grad_norm': 0.29496216773986816, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [08:17<01:26,  2.29it/s]

{'loss': 0.5359, 'grad_norm': 0.9419613480567932, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [09:57<00:00,  2.01it/s]


{'train_runtime': 597.4912, 'train_samples_per_second': 1759.691, 'train_steps_per_second': 2.008, 'train_loss': 0.5728268432617187, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 250.06it/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:50<05:32,  2.10it/s]

{'loss': 0.6434, 'grad_norm': 0.16145092248916626, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [07:41<01:16,  2.58it/s]

{'loss': 0.5379, 'grad_norm': 1.951047658920288, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [09:12<00:00,  2.17it/s]


{'train_runtime': 552.3127, 'train_samples_per_second': 1903.632, 'train_steps_per_second': 2.173, 'train_loss': 0.5746565437316895, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 244.07it/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:57<06:06,  1.91it/s]

{'loss': 0.6471, 'grad_norm': 0.4050038754940033, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [07:53<01:18,  2.54it/s]

{'loss': 0.5432, 'grad_norm': 2.4406309127807617, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [09:26<00:00,  2.12it/s]


{'train_runtime': 566.2872, 'train_samples_per_second': 1856.655, 'train_steps_per_second': 2.119, 'train_loss': 0.5795585378011068, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 200.00it/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:57<05:45,  2.03it/s]

{'loss': 0.6441, 'grad_norm': 1.173661470413208, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [07:54<01:20,  2.48it/s]

{'loss': 0.5379, 'grad_norm': 0.6369608640670776, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [09:27<00:00,  2.12it/s]


{'train_runtime': 567.3199, 'train_samples_per_second': 1853.275, 'train_steps_per_second': 2.115, 'train_loss': 0.5748220252990722, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 166.62it/s]
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:54<05:33,  2.10it/s]

{'loss': 0.6458, 'grad_norm': 1.1195939779281616, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [07:48<01:19,  2.51it/s]

{'loss': 0.5413, 'grad_norm': 1.6174143552780151, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [09:21<00:00,  2.14it/s]


{'train_runtime': 561.3383, 'train_samples_per_second': 1873.024, 'train_steps_per_second': 2.138, 'train_loss': 0.5780160140991211, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 200.00it/s]


Best Evaluation Metric: neighborhood_accuracy: 0.6412371134020619
Best Hyperparameters: {'learning_rate': 0.0008, 'per_device_train_batch_size': 1024, 'num_train_epochs': 200, 'train_metric': 'neighborhood_accuracy', 'model_class': 'OrdinalRegressionClassifier', 'num_classes': 5, 'intermediate_layers': [256, 128, 64, 32]}


100%|██████████| 1/1 [00:00<00:00, 250.39it/s]

{'eval_loss': 0.5834163427352905, 'eval_accuracy': 0.19278350515463918, 'eval_precision': 0.23979932310823776, 'eval_recall': 0.19367329100342295, 'eval_f1': 0.13653279777071334, 'eval_neighborhood_accuracy': 0.6412371134020619, 'eval_runtime': 0.473, 'eval_samples_per_second': 2050.744, 'eval_steps_per_second': 2.114, 'epoch': 200.0}





In [8]:
# Train grid search tag classifier
param_grid = {
    'learning_rate': [
        8 * 1e-4,
    ],
    'per_device_train_batch_size': [
        1024 # could be bigger (1024, 2048)
    ],
    'num_train_epochs': [
        200, # could be bigger (500, 1000)
    ],
    "train_metric": [
        "accuracy",
        "f1"
    ],
    # Model
    'model_class': [
        'TagClassifier',
    ],
    'intermediate_layers': [
        [256, 128, 64, 32],
        [512, 128, 32],
        [256, 64, 16],
    ],
}

transformer_name = "google/bigbird-roberta-base"

best_tag_classifier_params, best_tag_classifier_eval_results = train_grid_search_tag_classifier(transformer_name, train_texts, train_tags, val_texts, val_tags, test_texts, test_tags, "f1", param_grid)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:44<05:13,  2.24it/s]

{'loss': 0.4937, 'grad_norm': 0.09954436123371124, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [07:28<01:17,  2.57it/s]

{'loss': 0.3949, 'grad_norm': 0.18810302019119263, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [08:56<00:00,  2.24it/s]


{'train_runtime': 536.7217, 'train_samples_per_second': 1958.93, 'train_steps_per_second': 2.236, 'train_loss': 0.433052183787028, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 100.00it/s]


<transformers.trainer_utils.EvalPrediction object at 0x0000021BAC55B9D0>
Hey!
{'eval_loss': 0.34938716888427734, 'eval_accuracy': 0.18144329896907216, 'eval_precision': 0.39067531368752334, 'eval_recall': 0.2966012820585314, 'eval_f1': 0.29477151153135966, 'eval_runtime': 0.508, 'eval_samples_per_second': 1909.449, 'eval_steps_per_second': 1.969, 'epoch': 200.0}


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:55<06:45,  1.73it/s]

{'loss': 0.4713, 'grad_norm': 0.2953706383705139, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1000/1200 [08:20<01:49,  1.83it/s]

{'loss': 0.3772, 'grad_norm': 0.2308856099843979, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [10:04<00:00,  1.99it/s]


{'train_runtime': 604.0187, 'train_samples_per_second': 1740.675, 'train_steps_per_second': 1.987, 'train_loss': 0.4126008160909017, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 69.37it/s]


<transformers.trainer_utils.EvalPrediction object at 0x0000021BAC8AB690>
Hey!
{'eval_loss': 0.350132018327713, 'eval_accuracy': 0.17010309278350516, 'eval_precision': 0.49131966157211765, 'eval_recall': 0.32281715375278247, 'eval_f1': 0.312835556576469, 'eval_runtime': 0.5582, 'eval_samples_per_second': 1737.786, 'eval_steps_per_second': 1.792, 'epoch': 200.0}


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [04:23<06:14,  1.87it/s]

{'loss': 0.5043, 'grad_norm': 0.30883684754371643, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [08:46<01:30,  2.21it/s]

{'loss': 0.4007, 'grad_norm': 0.18360136449337006, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [10:30<00:00,  1.90it/s]


{'train_runtime': 630.3247, 'train_samples_per_second': 1668.029, 'train_steps_per_second': 1.904, 'train_loss': 0.441437931060791, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 83.32it/s]


<transformers.trainer_utils.EvalPrediction object at 0x0000021BAC55A2D0>
Hey!
{'eval_loss': 0.3545096814632416, 'eval_accuracy': 0.17216494845360825, 'eval_precision': 0.30261275383895625, 'eval_recall': 0.2560565108481151, 'eval_f1': 0.2541972136923235, 'eval_runtime': 0.6676, 'eval_samples_per_second': 1452.883, 'eval_steps_per_second': 1.498, 'epoch': 200.0}


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [04:22<06:16,  1.86it/s]

{'loss': 0.499, 'grad_norm': 0.2473190873861313, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [08:45<01:31,  2.18it/s]

{'loss': 0.3928, 'grad_norm': 0.27794915437698364, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [10:29<00:00,  1.91it/s]


{'train_runtime': 629.5267, 'train_samples_per_second': 1670.144, 'train_steps_per_second': 1.906, 'train_loss': 0.43378941853841146, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 62.50it/s]


<transformers.trainer_utils.EvalPrediction object at 0x0000021BAC88AE10>
Hey!
{'eval_loss': 0.3531070053577423, 'eval_accuracy': 0.16804123711340208, 'eval_precision': 0.35139549759975625, 'eval_recall': 0.289429802489101, 'eval_f1': 0.2720757612583342, 'eval_runtime': 0.5622, 'eval_samples_per_second': 1725.279, 'eval_steps_per_second': 1.779, 'epoch': 200.0}


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [04:23<06:13,  1.87it/s]

{'loss': 0.4713, 'grad_norm': 0.2953706383705139, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [08:48<01:32,  2.16it/s]

{'loss': 0.3772, 'grad_norm': 0.2308856099843979, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [10:12<00:00,  1.96it/s]


{'train_runtime': 612.8642, 'train_samples_per_second': 1715.551, 'train_steps_per_second': 1.958, 'train_loss': 0.4126008160909017, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 99.99it/s]


<transformers.trainer_utils.EvalPrediction object at 0x0000021BAC8A07D0>
Hey!
{'eval_loss': 0.350132018327713, 'eval_accuracy': 0.17010309278350516, 'eval_precision': 0.49131966157211765, 'eval_recall': 0.32281715375278247, 'eval_f1': 0.312835556576469, 'eval_runtime': 0.451, 'eval_samples_per_second': 2150.773, 'eval_steps_per_second': 2.217, 'epoch': 200.0}


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)
 42%|████▏     | 500/1200 [03:33<05:11,  2.25it/s]

{'loss': 0.5043, 'grad_norm': 0.30883684754371643, 'learning_rate': 0.0008, 'epoch': 83.33}


 83%|████████▎ | 1001/1200 [06:59<01:12,  2.73it/s]

{'loss': 0.4007, 'grad_norm': 0.18360136449337006, 'learning_rate': 0.00022857142857142857, 'epoch': 166.67}


100%|██████████| 1200/1200 [08:21<00:00,  2.39it/s]


{'train_runtime': 501.0471, 'train_samples_per_second': 2098.405, 'train_steps_per_second': 2.395, 'train_loss': 0.441437931060791, 'epoch': 200.0}


100%|██████████| 1/1 [00:00<00:00, 125.18it/s]


<transformers.trainer_utils.EvalPrediction object at 0x0000021BAC5E1950>
Hey!
{'eval_loss': 0.3545096814632416, 'eval_accuracy': 0.17216494845360825, 'eval_precision': 0.30261275383895625, 'eval_recall': 0.2560565108481151, 'eval_f1': 0.2541972136923235, 'eval_runtime': 0.4522, 'eval_samples_per_second': 2145.123, 'eval_steps_per_second': 2.211, 'epoch': 200.0}
Best Evaluation Metric: f1: 0.312835556576469
Best Hyperparameters: {'learning_rate': 0.0008, 'per_device_train_batch_size': 1024, 'num_train_epochs': 200, 'train_metric': 'accuracy', 'model_class': 'TagClassifier', 'intermediate_layers': [512, 128, 32]}


100%|██████████| 1/1 [00:00<00:00, 125.04it/s]

<transformers.trainer_utils.EvalPrediction object at 0x0000021BAC5C9F10>
Hey!
{'eval_loss': 0.350132018327713, 'eval_accuracy': 0.17010309278350516, 'eval_precision': 0.49131966157211765, 'eval_recall': 0.32281715375278247, 'eval_f1': 0.312835556576469, 'eval_runtime': 0.458, 'eval_samples_per_second': 2117.877, 'eval_steps_per_second': 2.183, 'epoch': 200.0}





In [9]:
# full_difficulty_classifier_model_eval_results = train_and_finetune_difficulty_classifier(transformer_name, train_texts, train_difficulty_ratings, val_texts, val_difficulty_ratings, test_texts, test_difficulty_ratings, best_difficulty_classifier_params['intermediate_layers'], best_difficulty_classifier_params['num_classes'])

In [10]:
# full_tag_classifier_model_eval_results = train_and_finetune_tag_classifier(transformer_name, train_texts, train_tags, val_texts, val_tags, test_texts, test_tags, best_tag_classifier_params['intermediate_layers'])

In [11]:
# TODO: load models saved on disk and evaluate them on the test set, post training