Install Required Libraries

In [None]:
!pip install transformers datasets scikit-learn torch


Collecting datasets
  Downloading datasets-3.0.0-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.0-py3-none-any.whl (474 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.3/474.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl (39.9 MB)
[2K  

Import Libraries and Prepare Data

In [None]:
from google.colab import files
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Upload the dataset
uploaded = files.upload()

# Load the datasets into pandas DataFrames
train_df = pd.read_csv('xstest_train_clean.csv')
test_df = pd.read_csv('xstest_test_clean.csv')

# Use the correct column names 'prompt' for text and 'label' for labels
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_df['prompt'], train_df['label'], test_size=0.2, random_state=42)

# Test set (already separated, no need for further split)
test_texts = test_df['prompt']
test_labels = test_df['label']

# Ensure that the texts are converted to a list format
train_texts = train_texts.tolist()  # Convert Pandas Series to list
val_texts = val_texts.tolist()
test_texts = test_texts.tolist()


Saving xstest_test_clean.csv to xstest_test_clean.csv
Saving xstest_train_clean.csv to xstest_train_clean.csv


Tokenize the Data

In [None]:
# Load GPT-2 tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token  # Ensure eos_token is used as the padding token

# Tokenize data
def preprocess_data(texts):
    return tokenizer(texts, padding=True, truncation=True, max_length=512, return_tensors="pt")

# Tokenize train, validation, and test data
train_encodings = preprocess_data(train_texts)
val_encodings = preprocess_data(val_texts)
test_encodings = preprocess_data(test_texts)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]



Create a PyTorch Dataset

In [None]:
# Check that the lengths of encodings and labels match
assert len(train_encodings['input_ids']) == len(train_labels), "Mismatch between input encodings and labels"
assert len(val_encodings['input_ids']) == len(val_labels), "Mismatch between validation encodings and labels"
assert len(test_encodings['input_ids']) == len(test_labels), "Mismatch between test encodings and labels"

# Ensure labels are lists or tensors
train_labels = train_labels.tolist() if isinstance(train_labels, pd.Series) else train_labels
val_labels = val_labels.tolist() if isinstance(val_labels, pd.Series) else val_labels
test_labels = test_labels.tolist() if isinstance(test_labels, pd.Series) else test_labels


In [None]:
# Create TextDataset class with improved error handling
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        assert len(encodings['input_ids']) == len(labels), "Encodings and labels must have the same length"
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        # Ensure valid index access
        if idx >= len(self.labels):
            raise IndexError(f"Index {idx} out of bounds for dataset of length {len(self.labels)}")

        item = {key: torch.tensor(val[idx]).clone().detach() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx]).clone().detach()
        return item

    def __len__(self):
        return len(self.labels)

# Create dataset objects for train, validation, and test sets
train_dataset = TextDataset(train_encodings, train_labels)
val_dataset = TextDataset(val_encodings, val_labels)
test_dataset = TextDataset(test_encodings, test_labels)


Load the GPT-2 Model for Classification

In [None]:
# Load pre-trained GPT-2 model for sequence classification
model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=2)
model.config.pad_token_id = tokenizer.pad_token_id  # Ensure padding is aligned with the tokenizer


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Define Evaluation Metrics

In [None]:
# Define custom metrics function for evaluation
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)

    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}


Set Training Arguments

In [None]:
# Set training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",  # Evaluate at the end of each epoch
    logging_strategy="epoch",
    save_strategy="epoch",  # Save at the end of each epoch
    learning_rate=2e-5,  # Standard learning rate
    per_device_train_batch_size=4,  # Adjust according to GPU memory
    per_device_eval_batch_size=8,
    num_train_epochs=5,  # Run for 3 epochs
    weight_decay=0.01,
    warmup_steps=500,  # Warmup learning rate
    fp16=False,  # Mixed precision disabled for simplicity
    save_total_limit=2  # Limit saved checkpoints
)



Train the Model

In [None]:
# Set up Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics  # Use custom metric function
)

# Train the model
trainer.train()

  item = {key: torch.tensor(val[idx]).clone().detach() for key, val in self.encodings.items()}


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.5389,0.531557,0.791667,0.777778,0.8,0.788732
2,0.491,0.497859,0.777778,0.74359,0.828571,0.783784
3,0.3797,0.513199,0.736111,0.807692,0.6,0.688525
4,0.4152,0.671739,0.763889,0.6875,0.942857,0.795181
5,0.4595,0.54155,0.819444,0.84375,0.771429,0.80597


  item = {key: torch.tensor(val[idx]).clone().detach() for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).clone().detach() for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).clone().detach() for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).clone().detach() for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]).clone().detach() for key, val in self.encodings.items()}


TrainOutput(global_step=360, training_loss=0.4568533261617025, metrics={'train_runtime': 1135.4038, 'train_samples_per_second': 1.268, 'train_steps_per_second': 0.317, 'total_flos': 13963045109760.0, 'train_loss': 0.4568533261617025, 'epoch': 5.0})

Save the Trained Model

In [None]:
# Save the model and tokenizer to your Google Drive
model.save_pretrained('/content/drive/MyDrive/GPT_xstest_full')
tokenizer.save_pretrained('/content/drive/MyDrive/GPT_xstest_full')


('/content/drive/MyDrive/GPT_xstest_full/tokenizer_config.json',
 '/content/drive/MyDrive/GPT_xstest_full/special_tokens_map.json',
 '/content/drive/MyDrive/GPT_xstest_full/vocab.json',
 '/content/drive/MyDrive/GPT_xstest_full/merges.txt',
 '/content/drive/MyDrive/GPT_xstest_full/added_tokens.json')

Reload the Saved Model

In [None]:
# Reload the saved model and tokenizer
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification

# Load the saved GPT model and tokenizer
model = GPT2ForSequenceClassification.from_pretrained('/content/drive/MyDrive/GPT_xstest_full')
tokenizer = GPT2Tokenizer.from_pretrained('/content/drive/MyDrive/GPT_xstest_full')
model.config.pad_token_id = tokenizer.pad_token_id  # Ensure padding is correctly set


Model Evaluation on the Test Set

In [None]:
import torch
from datasets import Dataset
from sklearn.metrics import classification_report, confusion_matrix

# Convert test encodings and labels into Dataset format for evaluation
test_dataset = Dataset.from_dict({
    'input_ids': test_encodings['input_ids'],
    'attention_mask': test_encodings['attention_mask'],
    'labels': torch.tensor(test_labels)
})

# Evaluate the model on the test set
results = trainer.evaluate(test_dataset)
print("Test set evaluation results:", results)

# Get the true labels (y_true) from the test set
y_true = test_labels

# Get predictions from the model
predictions = trainer.predict(test_dataset).predictions
y_pred = torch.argmax(torch.tensor(predictions), axis=1)

# Generate confusion matrix
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", cm)

# Generate classification report
print("Classification Report:\n", classification_report(y_true, y_pred, target_names=['Non-Toxic', 'Toxic']))


Test set evaluation results: {'eval_loss': 0.7324721217155457, 'eval_accuracy': 0.7666666666666667, 'eval_precision': 0.8205128205128205, 'eval_recall': 0.6956521739130435, 'eval_f1': 0.7529411764705882, 'eval_runtime': 7.7448, 'eval_samples_per_second': 11.621, 'eval_steps_per_second': 1.549, 'epoch': 5.0}
Confusion Matrix:
 [[37  7]
 [14 32]]
Classification Report:
               precision    recall  f1-score   support

   Non-Toxic       0.73      0.84      0.78        44
       Toxic       0.82      0.70      0.75        46

    accuracy                           0.77        90
   macro avg       0.77      0.77      0.77        90
weighted avg       0.77      0.77      0.77        90



Inference with the Trained Model

In [None]:
# Example inference with new data
texts = ["I love programming.", "You are an idiot!"]

# Tokenize the input text
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)

# Move model to evaluation mode
model.eval()

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)

# Print the predicted classes
labels = ['Non-Toxic', 'Toxic']
for i, text in enumerate(texts):
    print(f"Text: {text}")
    print(f"Predicted Label: {labels[predictions[i]]}\n")


Text: I love programming.
Predicted Label: Non-Toxic

Text: You are an idiot!
Predicted Label: Non-Toxic

