In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset, load_metric
import random
import numpy as np

In [4]:
# Constants
BATCH_SIZE = 16  # Batch size for training and evaluation
EPOCHS = 3  # Number of training epochs
LEARNING_RATE = 2e-5  # Learning rate for fine-tuning
SEED = 42
TASK_NAME = "cola"  # GLUE task name

In [5]:
# Set seeds for reproducibility
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    device = torch.device("cuda")
    torch.cuda.empty_cache()  # Clear GPU memory before starting
else:
    device = torch.device("cpu")
    print("Using CPU")

In [6]:
# Load tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2", trust_remote_code=True)

Loading tokenizer...


In [7]:
# Load GLUE dataset
print(f"Loading GLUE dataset for task: {TASK_NAME}...")
dataset = load_dataset("glue", TASK_NAME)

Loading GLUE dataset for task: cola...


In [8]:
# Tokenize the data
def tokenize_function(examples):
    return tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=128)

In [9]:
print("Tokenizing dataset...")
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Set the format for PyTorch
tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

Tokenizing dataset...


Map:   0%|          | 0/8551 [00:00<?, ? examples/s]

Map:   0%|          | 0/1043 [00:00<?, ? examples/s]

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]

In [10]:
# Load pre-trained ALBERT model for sequence classification
print("Loading ALBERT model for sequence classification...")
model = AutoModelForSequenceClassification.from_pretrained("albert-base-v2", num_labels=2).to(device)

Loading ALBERT model for sequence classification...


Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=LEARNING_RATE,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    load_best_model_at_end=True,
)

In [12]:
# Metrics
accuracy_metric = load_metric("accuracy")
f1_metric = load_metric("f1")


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
    f1 = f1_metric.compute(predictions=predictions, references=labels, average="weighted")
    return {**accuracy, **f1}

  accuracy_metric = load_metric("accuracy")
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [13]:
# Initialize Trainer
print("Initializing Trainer...")
trainer = Trainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
)

Initializing Trainer...


In [14]:
# Fine-tune the model
print("Starting fine-tuning...")
trainer.train()

Starting fine-tuning...


  0%|          | 0/1605 [00:00<?, ?it/s]

{'loss': 0.628, 'grad_norm': 50.520591735839844, 'learning_rate': 1.9376947040498444e-05, 'epoch': 0.09}
{'loss': 0.5896, 'grad_norm': 20.38228988647461, 'learning_rate': 1.8753894080996886e-05, 'epoch': 0.19}
{'loss': 0.533, 'grad_norm': 59.33952713012695, 'learning_rate': 1.8130841121495328e-05, 'epoch': 0.28}
{'loss': 0.5464, 'grad_norm': 29.85576629638672, 'learning_rate': 1.750778816199377e-05, 'epoch': 0.37}
{'loss': 0.5238, 'grad_norm': 13.17466926574707, 'learning_rate': 1.688473520249221e-05, 'epoch': 0.47}
{'loss': 0.5505, 'grad_norm': 19.437562942504883, 'learning_rate': 1.6261682242990654e-05, 'epoch': 0.56}
{'loss': 0.5257, 'grad_norm': 11.828453063964844, 'learning_rate': 1.56386292834891e-05, 'epoch': 0.65}
{'loss': 0.4726, 'grad_norm': 20.440431594848633, 'learning_rate': 1.501557632398754e-05, 'epoch': 0.75}
{'loss': 0.4897, 'grad_norm': 20.19470977783203, 'learning_rate': 1.4392523364485981e-05, 'epoch': 0.84}
{'loss': 0.4716, 'grad_norm': 19.275693893432617, 'learnin

  0%|          | 0/66 [00:00<?, ?it/s]

{'eval_loss': 0.4816175401210785, 'eval_accuracy': 0.7718120805369127, 'eval_f1': 0.7444809174766884, 'eval_runtime': 3.4295, 'eval_samples_per_second': 304.126, 'eval_steps_per_second': 19.245, 'epoch': 1.0}
{'loss': 0.4507, 'grad_norm': 21.48601531982422, 'learning_rate': 1.3146417445482867e-05, 'epoch': 1.03}
{'loss': 0.3518, 'grad_norm': 29.2148494720459, 'learning_rate': 1.2523364485981309e-05, 'epoch': 1.12}
{'loss': 0.3355, 'grad_norm': 24.31055450439453, 'learning_rate': 1.1900311526479751e-05, 'epoch': 1.21}
{'loss': 0.3653, 'grad_norm': 27.39017105102539, 'learning_rate': 1.1277258566978193e-05, 'epoch': 1.31}
{'loss': 0.3366, 'grad_norm': 14.646661758422852, 'learning_rate': 1.0654205607476635e-05, 'epoch': 1.4}
{'loss': 0.3748, 'grad_norm': 57.096580505371094, 'learning_rate': 1.0031152647975077e-05, 'epoch': 1.5}
{'loss': 0.3293, 'grad_norm': 46.044864654541016, 'learning_rate': 9.40809968847352e-06, 'epoch': 1.59}
{'loss': 0.3974, 'grad_norm': 14.311997413635254, 'learnin

  0%|          | 0/66 [00:00<?, ?it/s]

{'eval_loss': 0.4479144811630249, 'eval_accuracy': 0.8024928092042186, 'eval_f1': 0.8016153026969809, 'eval_runtime': 3.3671, 'eval_samples_per_second': 309.759, 'eval_steps_per_second': 19.601, 'epoch': 2.0}
{'loss': 0.2453, 'grad_norm': 3.437206268310547, 'learning_rate': 6.292834890965732e-06, 'epoch': 2.06}
{'loss': 0.2376, 'grad_norm': 23.956539154052734, 'learning_rate': 5.669781931464174e-06, 'epoch': 2.15}
{'loss': 0.1845, 'grad_norm': 48.463218688964844, 'learning_rate': 5.046728971962617e-06, 'epoch': 2.24}
{'loss': 0.2583, 'grad_norm': 23.536489486694336, 'learning_rate': 4.42367601246106e-06, 'epoch': 2.34}
{'loss': 0.2367, 'grad_norm': 25.13164520263672, 'learning_rate': 3.800623052959502e-06, 'epoch': 2.43}
{'loss': 0.2226, 'grad_norm': 38.88330078125, 'learning_rate': 3.177570093457944e-06, 'epoch': 2.52}
{'loss': 0.2043, 'grad_norm': 91.66117858886719, 'learning_rate': 2.5545171339563862e-06, 'epoch': 2.62}
{'loss': 0.2181, 'grad_norm': 11.124504089355469, 'learning_rat

  0%|          | 0/66 [00:00<?, ?it/s]

{'eval_loss': 0.6106752753257751, 'eval_accuracy': 0.8216682646212847, 'eval_f1': 0.815845215820613, 'eval_runtime': 3.2714, 'eval_samples_per_second': 318.82, 'eval_steps_per_second': 20.175, 'epoch': 3.0}
{'train_runtime': 239.3048, 'train_samples_per_second': 107.198, 'train_steps_per_second': 6.707, 'train_loss': 0.362840747573294, 'epoch': 3.0}


TrainOutput(global_step=1605, training_loss=0.362840747573294, metrics={'train_runtime': 239.3048, 'train_samples_per_second': 107.198, 'train_steps_per_second': 6.707, 'total_flos': 153264501112320.0, 'train_loss': 0.362840747573294, 'epoch': 3.0})

In [15]:
# Evaluate the model on the validation set
print("Evaluating model on validation set...")
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")

Evaluating model on validation set...


  0%|          | 0/66 [00:00<?, ?it/s]

Evaluation results: {'eval_loss': 0.4479144811630249, 'eval_accuracy': 0.8024928092042186, 'eval_f1': 0.8016153026969809, 'eval_runtime': 3.2727, 'eval_samples_per_second': 318.696, 'eval_steps_per_second': 20.167, 'epoch': 3.0}


In [16]:
# Save the final model
print("Saving the fine-tuned model...")
model.save_pretrained("./fine_tuned_albert")
tokenizer.save_pretrained("./fine_tuned_albert")
print("Fine-tuning complete and model saved!")

Saving the fine-tuned model...
Fine-tuning complete and model saved!


In [21]:
import pandas as pd
import os

# Define the path to the results file
results_file = "./results.csv"

# Create a DataFrame with the task name and evaluation results
new_results = pd.DataFrame({"task": [TASK_NAME], **eval_results})

# Check if the results file already exists
if os.path.exists(results_file):
    # Read the existing results
    existing_results = pd.read_csv(results_file)
    # Append the new results
    updated_results = pd.concat([existing_results, new_results], ignore_index=True)
else:
    # If the file doesn't exist, use the new results as the updated results
    updated_results = new_results

# Save the updated results to the CSV file
updated_results.to_csv(results_file, index=False)