In [1]:
import pickle
import torch
from transformers import AutoTokenizer
from datasets import load_dataset
from torch.utils.data import DataLoader
from transformers import Trainer, TrainingArguments
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import AutoModelForSequenceClassification

In [2]:
path = "../trainedModels"
model_name = "xlnet-base-cased"

if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("MPS device is available. Using GPU for training.")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA device is available. Using GPU for training.")
else:
    device = torch.device("cpu")
    print("No GPU available. Using CPU for training.")

MPS device is available. Using GPU for training.


In [3]:
dataset = load_dataset('csv', data_files={'train': '../transformers_data/train.csv',
                                         'validation': '../transformers_data/validation.csv',
                                         'test': '../transformers_data/test.csv'})

label_mapping = {
    "Effective": 0,
    "Adequate": 1,
    "Ineffective": 2
}

label_mapping_types = {
    "Claim": 0,
    "Concluding Statement": 1,
    "Counterclaim": 2,
    "Evidence": 3,
    "Lead": 4,
    "Position": 5,
    "Rebuttal": 6
}

def encode_labels(example):
    example['labels'] = label_mapping[example['discourse_effectiveness']]
    return example

def encode_types(example):
    example['types'] = label_mapping_types[example['discourse_type']]
    return example

dataset = dataset.map(encode_labels)

dataset = dataset.map(encode_types)

tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    # Ensure all entries are strings
    texts = [text if isinstance(text, str) else "" for text in examples['discourse_text']]
    
    return tokenizer(
        texts,
        padding='max_length',
        truncation=True,
        max_length=128
    )

tokenized_datasets = dataset.map(tokenize_function, batched=True)

tokenized_datasets = tokenized_datasets.remove_columns(['discourse_id', 'essay_id', 'discourse_text', 'discourse_effectiveness', 'discourse_type'])  # Remove unnecessary columns

tokenized_datasets.set_format('torch')

train_dataloader = DataLoader(tokenized_datasets['train'], shuffle=True, batch_size=16)
eval_dataloader = DataLoader(tokenized_datasets['validation'], batch_size=16)

num_labels = 3  # Effective, Adequate, Not Effective
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
model.to(device)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
    }

training_args = TrainingArguments(
    output_dir='./results',                      # Directory to save model checkpoints and outputs
    eval_strategy="epoch",                 # Evaluate at the end of each epoch
    learning_rate=4e-5,                          # Learning rate for optimization
    per_device_train_batch_size=32,              # Batch size per device during training
    per_device_eval_batch_size=32,               # Batch size per device during evaluation
    num_train_epochs=4,                          # Total number of training epochs
    weight_decay=0.05,                           # Weight decay for regularization
    load_best_model_at_end=True,                 # Load the best model when finished training
    metric_for_best_model='accuracy',            # Metric to use for comparing models
    save_strategy='epoch',                       # Save checkpoint every epoch
    save_total_limit=2,                          # Limit the total amount of checkpoints
    warmup_steps=500,                            # Set warmup steps for learning rate scheduling
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics,
)

trainer.train()

results = trainer.evaluate(tokenized_datasets['test'])
print(results)

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/3680 [00:00<?, ?it/s]

{'loss': 0.9027, 'grad_norm': 4.930896282196045, 'learning_rate': 4e-05, 'epoch': 0.54}


  0%|          | 0/115 [00:00<?, ?it/s]

{'eval_loss': 0.8385895490646362, 'eval_accuracy': 0.6441784548422198, 'eval_precision': 0.6624169343363074, 'eval_recall': 0.6441784548422198, 'eval_f1': 0.5913067343294899, 'eval_runtime': 43.6275, 'eval_samples_per_second': 84.259, 'eval_steps_per_second': 2.636, 'epoch': 1.0}
{'loss': 0.8202, 'grad_norm': 4.998886585235596, 'learning_rate': 3.371069182389938e-05, 'epoch': 1.09}
{'loss': 0.7772, 'grad_norm': 4.992334365844727, 'learning_rate': 2.742138364779874e-05, 'epoch': 1.63}


  0%|          | 0/115 [00:00<?, ?it/s]

{'eval_loss': 0.7798449397087097, 'eval_accuracy': 0.6583242655059848, 'eval_precision': 0.6776260514085894, 'eval_recall': 0.6583242655059848, 'eval_f1': 0.6057441428744403, 'eval_runtime': 43.7217, 'eval_samples_per_second': 84.077, 'eval_steps_per_second': 2.63, 'epoch': 2.0}
{'loss': 0.7403, 'grad_norm': 6.736571788787842, 'learning_rate': 2.1132075471698115e-05, 'epoch': 2.17}
{'loss': 0.6777, 'grad_norm': 5.069662094116211, 'learning_rate': 1.4842767295597484e-05, 'epoch': 2.72}


  0%|          | 0/115 [00:00<?, ?it/s]

{'eval_loss': 0.7851482629776001, 'eval_accuracy': 0.6501632208922742, 'eval_precision': 0.6410801782519645, 'eval_recall': 0.6501632208922742, 'eval_f1': 0.6380137186119572, 'eval_runtime': 43.7024, 'eval_samples_per_second': 84.114, 'eval_steps_per_second': 2.631, 'epoch': 3.0}
{'loss': 0.63, 'grad_norm': 7.8140058517456055, 'learning_rate': 8.553459119496857e-06, 'epoch': 3.26}
{'loss': 0.5578, 'grad_norm': 6.438013076782227, 'learning_rate': 2.2641509433962266e-06, 'epoch': 3.8}


  0%|          | 0/115 [00:00<?, ?it/s]

{'eval_loss': 0.8668508529663086, 'eval_accuracy': 0.6534276387377584, 'eval_precision': 0.6450559130263127, 'eval_recall': 0.6534276387377584, 'eval_f1': 0.6385866253073323, 'eval_runtime': 43.7739, 'eval_samples_per_second': 83.977, 'eval_steps_per_second': 2.627, 'epoch': 4.0}
{'train_runtime': 4701.2477, 'train_samples_per_second': 25.025, 'train_steps_per_second': 0.783, 'train_loss': 0.7207895444787067, 'epoch': 4.0}


  0%|          | 0/115 [00:00<?, ?it/s]

{'eval_loss': 0.7629185318946838, 'eval_accuracy': 0.6682077780799565, 'eval_precision': 0.6803455789547144, 'eval_recall': 0.6682077780799565, 'eval_f1': 0.6204779445416811, 'eval_runtime': 43.8104, 'eval_samples_per_second': 83.93, 'eval_steps_per_second': 2.625, 'epoch': 4.0}


In [4]:
name = "xlnet-base-cased"

model.save_pretrained(f'{path}/{name}')

tokenizer.save_pretrained(f'{path}/{name}')

# Guardar el modelo entrenado
# Save the Trainer object to a file
with open(f'{path}/{name}/{name}.pkl', 'wb') as f:
    pickle.dump(trainer.state.log_history, f)

# Make predictions
predictions = trainer.predict(tokenized_datasets['test'])

# Guardar el modelo entrenado
# Save the Trainer object to a file
with open(f'{path}/{name}/{name}-predictions.pkl', 'wb') as f:
    pickle.dump(predictions, f)

  0%|          | 0/115 [00:00<?, ?it/s]