In [None]:
%pip install datasets
%pip install huggingface
%pip install evaluate

In [None]:
import os
import pandas as pd
import evaluate

from datasets import load_dataset
from transformers import GPT2Tokenizer
from transformers import GPT2ForSequenceClassification
from transformers import TrainingArguments, Trainer
import numpy as np

In [None]:
# Skip WanDB Integration - used for logging
os.environ["WANDB_DISABLED"] = "true"

In [None]:
# Load dataset
dataset = load_dataset("mteb/tweet_sentiment_extraction")
print(dataset['train'])
df = pd.DataFrame(dataset['train'])

Dataset({
    features: ['id', 'text', 'label', 'label_text'],
    num_rows: 26732
})


In [None]:
# Show the dataset
df.head()

Unnamed: 0,id,text,label,label_text
0,cb774db0d1,"I`d have responded, if I were going",1,neutral
1,549e992a42,Sooo SAD I will miss you here in San Diego!!!,0,negative
2,088c60f138,my boss is bullying me...,0,negative
3,9642c003ef,what interview! leave me alone,0,negative
4,358bd9e861,"Sons of ****, why couldn`t they put them on t...",0,negative


In [None]:
# Tokenize the prompt using the tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

# Replace the pad_token with eos_token
tokenizer.pad_token = tokenizer.eos_token

# Function to tokenize each examples
def tokenize_function(examples):
   return tokenizer(examples["text"], padding="max_length", truncation=True)

# Tokenized dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Map:   0%|          | 0/26732 [00:00<?, ? examples/s]

Map:   0%|          | 0/3432 [00:00<?, ? examples/s]

In [None]:
# Split dataset into TRAIN and EVAL partition
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

In [None]:
# We select a small fragment of the data for illustration due to resource limitations

small_train_dataset = small_train_dataset.select(range(100))
small_eval_dataset = small_eval_dataset.select(range(100))

In [None]:
# Load the model
model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=3)

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Define the metric for evaluation
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
   logits, labels = eval_pred
   predictions = np.argmax(logits, axis=-1)
   return metric.compute(predictions=predictions, references=labels)

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
# Training parameters and objects

training_args = TrainingArguments(
   output_dir="test_trainer",
   evaluation_strategy="epoch",
   per_device_train_batch_size=1,  # Reduce batch size here
   per_device_eval_batch_size=1,    # Optionally, reduce for evaluation as well
   gradient_accumulation_steps=4,
   report_to=None
   )

trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=small_train_dataset,
   eval_dataset=small_eval_dataset,
   compute_metrics=compute_metrics,

)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [None]:
# Check the model performance before fine-tuning
trainer.evaluate()

{'eval_loss': 3.3460071086883545,
 'eval_model_preparation_time': 0.003,
 'eval_accuracy': 0.29,
 'eval_runtime': 7.6596,
 'eval_samples_per_second': 13.055,
 'eval_steps_per_second': 13.055}

In [None]:
# Train the model
trainer.train()

Step,Training Loss


TrainOutput(global_step=75, training_loss=1.2326266479492187, metrics={'train_runtime': 89.5385, 'train_samples_per_second': 3.351, 'train_steps_per_second': 0.838, 'total_flos': 156779465932800.0, 'train_loss': 1.2326266479492187, 'epoch': 3.0})

In [None]:
# Check the model performance after fine-tuning
trainer.evaluate()

{'eval_loss': 1.2125474214553833,
 'eval_model_preparation_time': 0.003,
 'eval_accuracy': 0.4,
 'eval_runtime': 7.4261,
 'eval_samples_per_second': 13.466,
 'eval_steps_per_second': 13.466,
 'epoch': 3.0}