In [86]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

In [34]:
from transformers import DistilBertForSequenceClassification, RobertaForSequenceClassification, AlbertForSequenceClassification, Trainer, TrainingArguments

In [56]:
from transformers import DistilBertTokenizer, RobertaTokenizer, AlbertTokenizer

In [103]:
from transformers import AutoModelForSequenceClassification

In [3]:
import pandas as pd
import numpy as np

In [4]:
import torch

In [29]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [101]:
from peft import LoraConfig, get_peft_model

In [87]:
from datasets import Dataset

# Load Data

In [5]:
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

In [6]:
text_column = "review"
label_column = "sentiment"

In [7]:
print(train_data.head())
print(test_data.head())

                                              review sentiment
0  SAPS AT SEA <br /><br />Aspect ratio: 1.37:1<b...  negative
1  If you want mindless action, hot chicks and a ...  positive
2  "The Woman in Black" is easily one of the cree...  positive
3  I can barely find the words to describe how mu...  negative
4  What's in here ?! Let me tell you. It's the pr...  negative
                                              review sentiment
0  Steven Rea plays a forensic scientist thrust o...  positive
1  As the first of the TV specials offered on the...  positive
2  There may something poetically right in seeing...  negative
3  all i can say about this film is to read the b...  negative
4  I thought it was a pretty good movie and shoul...  positive


In [8]:
label_mapping = {'positive': 1, 'negative': 0}

train_labels = train_data[label_column].map(label_mapping).tolist()
test_labels = test_data[label_column].map(label_mapping).tolist()

# Distilbert

In [9]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [10]:
train_encodings = tokenizer(train_data[text_column].tolist(), truncation=True, padding=True, max_length=512)
test_encodings = tokenizer(test_data[text_column].tolist(), truncation=True, padding=True, max_length=512)

In [11]:
class IMDbDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [12]:
train_dataset = IMDbDataset(train_encodings, train_labels)
test_dataset = IMDbDataset(test_encodings, test_labels)

In [13]:
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # number of training epochs
    per_device_train_batch_size=8,   # batch size for training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,                # how often to log metrics
    evaluation_strategy="epoch",     # evaluate once per epoch
    save_strategy="epoch",          # save the model once per epoch
    load_best_model_at_end=True     # load the best model at the end of training
)

In [15]:
trainer = Trainer(
    model=model,                         # the model to train
    args=training_args,                  # training arguments
    train_dataset=train_dataset,         # training dataset
    eval_dataset=test_dataset,           # evaluation dataset
    tokenizer=tokenizer,                 # tokenizer to be used for the text data
)

In [16]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
10,0.6982
20,0.6956
30,0.6812
40,0.6938
50,0.6836
60,0.6846
70,0.6779
80,0.6804
90,0.6623
100,0.6558


KeyboardInterrupt: 

In [None]:
trainer.evaluate()

This would take 1 hour and 20 mintes to train, so attempting with sub sample

## Sub Sample

In [17]:
train_data_sample = train_data.sample(n=1000, random_state=42)  # 1000 samples from the train data
test_data_sample = test_data.sample(n=200, random_state=42)     # 200 samples from the test data

In [18]:
train_labels_sample = train_data_sample[label_column].map(label_mapping).tolist()
test_labels_sample = test_data_sample[label_column].map(label_mapping).tolist()

In [19]:
train_encodings_sample = tokenizer(train_data_sample[text_column].tolist(), truncation=True, padding=True, max_length=512)
test_encodings_sample = tokenizer(test_data_sample[text_column].tolist(), truncation=True, padding=True, max_length=512)

In [20]:
train_dataset_sample = IMDbDataset(train_encodings_sample, train_labels_sample)
test_dataset_sample = IMDbDataset(test_encodings_sample, test_labels_sample)

In [21]:
trainer = Trainer(
    model=model,                         # the model to train
    args=training_args,                  # training arguments
    train_dataset=train_dataset_sample,   # use the subsampled train dataset
    eval_dataset=test_dataset_sample,     # use the subsampled test dataset
    tokenizer=tokenizer,                  # tokenizer to process the text
)

  trainer = Trainer(


In [22]:
trainer.train()

Step,Training Loss
10,0.2293
20,0.1789
30,0.1804
40,0.2779
50,0.2293
60,0.5042
70,0.2671
80,0.1039
90,0.257
100,0.2171


TrainOutput(global_step=375, training_loss=0.17216474623481431, metrics={'train_runtime': 150.8727, 'train_samples_per_second': 19.884, 'train_steps_per_second': 2.486, 'total_flos': 397402195968000.0, 'train_loss': 0.17216474623481431, 'epoch': 3.0})

3 minutes to run on subset

In [23]:
trainer.evaluate()

{'eval_loss': 0.6884127855300903,
 'eval_runtime': 2.9425,
 'eval_samples_per_second': 67.97,
 'eval_steps_per_second': 4.418,
 'epoch': 3.0}

In [24]:
results = trainer.predict(test_dataset_sample)

In [25]:
predictions = results.predictions

In [26]:
predicted_labels = predictions.argmax(axis=-1)

In [30]:
accuracy = accuracy_score(test_labels_sample, predicted_labels)
precision = precision_score(test_labels_sample, predicted_labels)
recall = recall_score(test_labels_sample, predicted_labels)
f1 = f1_score(test_labels_sample, predicted_labels)

In [31]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.8650
Precision: 0.8922
Recall: 0.8505
F1 Score: 0.8708


In [32]:
for i in range(5):
    print(f"Review: {test_data_sample[text_column].iloc[i]}")
    print(f"Predicted Label: {predicted_labels[i]}")
    print(f"True Label: {test_labels_sample[i]}")
    print("-" * 50)

Review: Over the years, I've come to be a fan of director/writer Barry Levinson and he didn't let me down with this very funny look at politics. Popular TV comedian Tom Dobbs(Robin Williams)has enlightened the nation with his scathing jokes about the state of the country and elected politicians responsible. Night after night, he has his fans rolling in the isles; then the question is proposed that Dobbs run for president himself. His manager Jack Menken(Christopher Walken)says go for it. Dobb's flippant truisms flames a grass-root movement that puts him on the ballot. Comedian to President-Elect. Meanwhile, a young woman(Laura Linney)finds a flaw in the computer system that will count the ballots coast to coast. My favorite sequence is Linney's meltdown in the coffee shop.Williams is absolutely hysterical with his rapid quips. Others of note in the cast: Jeff Goldblum, Lewis Black and Rick Roberts.
Predicted Label: 1
True Label: 1
--------------------------------------------------
Revi

In [33]:
model.save_pretrained('./distilbert_imdb_trained')
tokenizer.save_pretrained('./distilbert_imdb_trained')

('./distilbert_imdb_trained/tokenizer_config.json',
 './distilbert_imdb_trained/special_tokens_map.json',
 './distilbert_imdb_trained/vocab.txt',
 './distilbert_imdb_trained/added_tokens.json')

## Apply LoRA

In [106]:
lora_config = LoraConfig(
    r=16,                            # Rank of the decomposition matrix
    lora_alpha=32,                   # Scaling factor
    target_modules=["query", "value"],  # Target the query and value linear layers
    lora_dropout=0.1,                # Dropout probability
    bias="none",                     # Whether to add bias
    task_type="SEQ_CLS"              # Task type: Sequence Classification (sentiment analysis)
)

In [107]:
model = AutoModelForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=2
)
model = get_peft_model(model, lora_config)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [108]:
model.print_trainable_parameters()

trainable params: 591,362 || all params: 110,075,140 || trainable%: 0.5372


In [109]:
trainer = Trainer(
    model=model,                         # LoRA-enhanced model
    args=training_args,                  # Training arguments
    train_dataset=train_dataset_sample,  # Subsampled train dataset
    eval_dataset=test_dataset_sample,    # Subsampled test dataset
    tokenizer=tokenizer                  # Tokenizer
)

  trainer = Trainer(


In [110]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,0.698217
2,No log,0.694053
3,No log,0.694275


TrainOutput(global_step=189, training_loss=0.6985534183562748, metrics={'train_runtime': 246.2113, 'train_samples_per_second': 12.185, 'train_steps_per_second': 0.768, 'total_flos': 794783158272000.0, 'train_loss': 0.6985534183562748, 'epoch': 3.0})

took roughly 4 minutes to train

In [112]:
evaluation_results = trainer.evaluate()
print("Evaluation Metrics:", evaluation_results)

Evaluation Metrics: {'eval_loss': 0.6942746043205261, 'eval_runtime': 6.46, 'eval_samples_per_second': 30.96, 'eval_steps_per_second': 2.012, 'epoch': 3.0}


In [113]:
predictions = trainer.predict(test_dataset_sample)
print("Predicted Sentiment Labels:", predictions.predictions.argmax(axis=-1))

Predicted Sentiment Labels: [1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 0 1 1 1 1 1 0 1 0 0 1 1 1 0 0 1 1 1 1 1 0
 1 1 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 0 1 1 1 1 1 1 1 1
 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 0 0 1 1 1 0 1 1 0 0 1
 0 1 1 1 0 1 1 1 0 1 1 1 0 1 1]


In [136]:
predicted_labels = predictions.predictions.argmax(axis=-1)
true_labels = predictions.label_ids

In [137]:
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average="binary")  # Use "binary" for binary classification
recall = recall_score(true_labels, predicted_labels, average="binary")
f1 = f1_score(true_labels, predicted_labels, average="binary")

In [138]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Accuracy: 0.6450
Precision: 0.6837
Recall: 0.6262
F1-Score: 0.6537


# RoBERTa

In [35]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [36]:
train_encodings_sample = tokenizer(train_data_sample[text_column].tolist(), truncation=True, padding=True, max_length=512)
test_encodings_sample = tokenizer(test_data_sample[text_column].tolist(), truncation=True, padding=True, max_length=512)

In [37]:
train_dataset_sample = IMDbDataset(train_encodings_sample, train_labels_sample)
test_dataset_sample = IMDbDataset(test_encodings_sample, test_labels_sample)

In [38]:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [39]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # number of training epochs
    per_device_train_batch_size=8,   # batch size for training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,                # how often to log metrics
    evaluation_strategy="epoch",     # evaluate once per epoch
    save_strategy="epoch",          # save the model once per epoch
    load_best_model_at_end=True     # load the best model at the end of training
)



In [40]:
trainer = Trainer(
    model=model,                         # the model to train
    args=training_args,                  # training arguments
    train_dataset=train_dataset_sample,  # training dataset (subset)
    eval_dataset=test_dataset_sample,    # evaluation dataset (subset)
    tokenizer=tokenizer,                 # tokenizer to process the text data
)

  trainer = Trainer(


In [41]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,0.4385,0.372243
2,0.3119,0.470507
3,0.1481,0.416487


TrainOutput(global_step=375, training_loss=0.4278216775258382, metrics={'train_runtime': 364.1454, 'train_samples_per_second': 8.238, 'train_steps_per_second': 1.03, 'total_flos': 789333166080000.0, 'train_loss': 0.4278216775258382, 'epoch': 3.0})

4.5 minutes to run on subset

In [42]:
trainer.evaluate()

{'eval_loss': 0.37224280834198,
 'eval_runtime': 5.6758,
 'eval_samples_per_second': 35.237,
 'eval_steps_per_second': 2.29,
 'epoch': 3.0}

In [43]:
model.save_pretrained('./roberta_imdb_trained_subset')
tokenizer.save_pretrained('./roberta_imdb_trained_subset')

('./roberta_imdb_trained_subset/tokenizer_config.json',
 './roberta_imdb_trained_subset/special_tokens_map.json',
 './roberta_imdb_trained_subset/vocab.json',
 './roberta_imdb_trained_subset/merges.txt',
 './roberta_imdb_trained_subset/added_tokens.json')

In [48]:
results = trainer.predict(test_dataset_sample)

In [49]:
predictions = results.predictions

In [50]:
predicted_labels = predictions.argmax(axis=-1)

In [51]:
accuracy = accuracy_score(test_labels_sample, predicted_labels)
precision = precision_score(test_labels_sample, predicted_labels)
recall = recall_score(test_labels_sample, predicted_labels)
f1 = f1_score(test_labels_sample, predicted_labels)

In [52]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.8600
Precision: 0.8835
Recall: 0.8505
F1 Score: 0.8667


In [53]:
for i in range(5):
    print(f"Review: {test_data_sample[text_column].iloc[i]}")
    print(f"Predicted Label: {predicted_labels[i]}")
    print(f"True Label: {test_labels_sample[i]}")
    print("-" * 50)

Review: Over the years, I've come to be a fan of director/writer Barry Levinson and he didn't let me down with this very funny look at politics. Popular TV comedian Tom Dobbs(Robin Williams)has enlightened the nation with his scathing jokes about the state of the country and elected politicians responsible. Night after night, he has his fans rolling in the isles; then the question is proposed that Dobbs run for president himself. His manager Jack Menken(Christopher Walken)says go for it. Dobb's flippant truisms flames a grass-root movement that puts him on the ballot. Comedian to President-Elect. Meanwhile, a young woman(Laura Linney)finds a flaw in the computer system that will count the ballots coast to coast. My favorite sequence is Linney's meltdown in the coffee shop.Williams is absolutely hysterical with his rapid quips. Others of note in the cast: Jeff Goldblum, Lewis Black and Rick Roberts.
Predicted Label: 1
True Label: 1
--------------------------------------------------
Revi

## Apply LoRA to RoBERTa

In [114]:
model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [115]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query", "value"],  # Target specific attention layers
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_CLS"  # Sentiment analysis = Sequence Classification
)

In [116]:
model = get_peft_model(model, lora_config)

In [117]:
model.print_trainable_parameters()

trainable params: 1,181,954 || all params: 125,829,124 || trainable%: 0.9393


In [118]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset_sample,
    eval_dataset=test_dataset_sample,
    tokenizer=tokenizer
)

  trainer = Trainer(


In [119]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,0.690318
2,No log,0.691589
3,No log,0.691476


TrainOutput(global_step=189, training_loss=0.6929202609592013, metrics={'train_runtime': 240.6415, 'train_samples_per_second': 12.467, 'train_steps_per_second': 0.785, 'total_flos': 800226054144000.0, 'train_loss': 0.6929202609592013, 'epoch': 3.0})

This also took about 4 minutes to train

In [120]:
evaluation_results = trainer.evaluate()
print("Evaluation Metrics:", evaluation_results)

Evaluation Metrics: {'eval_loss': 0.6914758086204529, 'eval_runtime': 6.0354, 'eval_samples_per_second': 33.138, 'eval_steps_per_second': 2.154, 'epoch': 3.0}


In [121]:
predictions = trainer.predict(test_dataset_sample)
print("Predicted Sentiment Labels:", predictions.predictions.argmax(axis=-1))

Predicted Sentiment Labels: [0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 1 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1 1 0 1 0 0 0 1 0 1 1 0 0 1 0 0 0 1
 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1
 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1]


In [122]:
predicted_labels = predictions.predictions.argmax(axis=-1)
true_labels = predictions.label_ids

In [123]:
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average="binary")  # Use "binary" for binary classification
recall = recall_score(true_labels, predicted_labels, average="binary")
f1 = f1_score(true_labels, predicted_labels, average="binary")

In [124]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Accuracy: 0.5300
Precision: 0.6512
Recall: 0.2617
F1-Score: 0.3733


# ALBERT

In [57]:
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

In [58]:
train_encodings_sample = tokenizer(train_data_sample[text_column].tolist(), truncation=True, padding=True, max_length=512)
test_encodings_sample = tokenizer(test_data_sample[text_column].tolist(), truncation=True, padding=True, max_length=512)

In [59]:
train_dataset_sample = IMDbDataset(train_encodings_sample, train_labels_sample)
test_dataset_sample = IMDbDataset(test_encodings_sample, test_labels_sample)

In [60]:
model = AlbertForSequenceClassification.from_pretrained("albert-base-v2")

model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [61]:
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # number of training epochs
    per_device_train_batch_size=8,   # batch size for training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,                # how often to log metrics
    evaluation_strategy="epoch",     # evaluate once per epoch
    save_strategy="epoch",          # save the model once per epoch
    load_best_model_at_end=True     # load the best model at the end of training
)



In [62]:
trainer = Trainer(
    model=model,                         # the model to train
    args=training_args,                  # training arguments
    train_dataset=train_dataset_sample,  # training dataset (subset)
    eval_dataset=test_dataset_sample,    # evaluation dataset (subset)
    tokenizer=tokenizer,                 # tokenizer to process the text data
)

  trainer = Trainer(


In [63]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,0.3343,0.322629
2,0.3221,0.460761
3,0.4295,0.511235


TrainOutput(global_step=375, training_loss=0.45944153690338135, metrics={'train_runtime': 323.9204, 'train_samples_per_second': 9.262, 'train_steps_per_second': 1.158, 'total_flos': 71694305280000.0, 'train_loss': 0.45944153690338135, 'epoch': 3.0})

5 minutes to run on subset

In [64]:
trainer.evaluate()

{'eval_loss': 0.3226289451122284,
 'eval_runtime': 6.5964,
 'eval_samples_per_second': 30.32,
 'eval_steps_per_second': 1.971,
 'epoch': 3.0}

In [65]:
model.save_pretrained('./albert_imdb_trained_subset')
tokenizer.save_pretrained('./albert_imdb_trained_subset')

('./albert_imdb_trained_subset/tokenizer_config.json',
 './albert_imdb_trained_subset/special_tokens_map.json',
 './albert_imdb_trained_subset/spiece.model',
 './albert_imdb_trained_subset/added_tokens.json')

In [66]:
results = trainer.predict(test_dataset_sample)

In [67]:
predictions = results.predictions

In [68]:
predicted_labels = predictions.argmax(axis=-1)

In [69]:
accuracy = accuracy_score(test_labels_sample, predicted_labels)
precision = precision_score(test_labels_sample, predicted_labels)
recall = recall_score(test_labels_sample, predicted_labels)
f1 = f1_score(test_labels_sample, predicted_labels)

In [70]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.8550
Precision: 0.8545
Recall: 0.8785
F1 Score: 0.8664


In [71]:
for i in range(5):
    print(f"Review: {test_data_sample[text_column].iloc[i]}")
    print(f"Predicted Label: {predicted_labels[i]}")
    print(f"True Label: {test_labels_sample[i]}")
    print("-" * 50)

Review: Over the years, I've come to be a fan of director/writer Barry Levinson and he didn't let me down with this very funny look at politics. Popular TV comedian Tom Dobbs(Robin Williams)has enlightened the nation with his scathing jokes about the state of the country and elected politicians responsible. Night after night, he has his fans rolling in the isles; then the question is proposed that Dobbs run for president himself. His manager Jack Menken(Christopher Walken)says go for it. Dobb's flippant truisms flames a grass-root movement that puts him on the ballot. Comedian to President-Elect. Meanwhile, a young woman(Laura Linney)finds a flaw in the computer system that will count the ballots coast to coast. My favorite sequence is Linney's meltdown in the coffee shop.Williams is absolutely hysterical with his rapid quips. Others of note in the cast: Jeff Goldblum, Lewis Black and Rick Roberts.
Predicted Label: 1
True Label: 1
--------------------------------------------------
Revi

## Add LoRA to ALBERT

In [125]:
model = AutoModelForSequenceClassification.from_pretrained(
    "albert-base-v2",  # Replace with "albert-large-v2" if needed
    num_labels=2       # Number of sentiment labels
)

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [126]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query", "value"],  # Target specific attention layers
    lora_dropout=0.1,
    bias="none",
    task_type="SEQ_CLS"  # Sentiment analysis = Sequence Classification
)

In [127]:
model = get_peft_model(model, lora_config)

In [128]:
model.print_trainable_parameters()

trainable params: 50,690 || all params: 11,735,812 || trainable%: 0.4319


In [129]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset_sample,
    eval_dataset=test_dataset_sample,
    tokenizer=tokenizer
)

  trainer = Trainer(


In [130]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,0.677417
2,No log,0.661938
3,No log,0.650589


TrainOutput(global_step=189, training_loss=0.6713133716078662, metrics={'train_runtime': 267.6388, 'train_samples_per_second': 11.209, 'train_steps_per_second': 0.706, 'total_flos': 72161464320000.0, 'train_loss': 0.6713133716078662, 'epoch': 3.0})

this also took 4 minutes

In [131]:
evaluation_results = trainer.evaluate()
print("Evaluation Metrics:", evaluation_results)

Evaluation Metrics: {'eval_loss': 0.6505890488624573, 'eval_runtime': 6.9995, 'eval_samples_per_second': 28.573, 'eval_steps_per_second': 1.857, 'epoch': 3.0}


In [132]:
predictions = trainer.predict(test_dataset_sample)
print("Predicted Sentiment Labels:", predictions.predictions.argmax(axis=-1))

Predicted Sentiment Labels: [1 0 1 0 1 1 0 1 1 1 0 0 1 0 1 1 0 1 0 1 1 0 1 0 1 0 0 0 0 0 0 1 0 0 1 1 0
 0 1 1 0 1 1 0 0 1 0 1 1 0 1 0 0 0 0 1 1 1 1 1 0 0 1 1 0 1 0 0 0 0 1 0 0 0
 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 1 0 1 1 0 1 1 0 0 1 0 0 0 1 1 0
 0 1 0 0 0 1 1 1 0 1 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 1 0 0 1 1 1 0 1 1 1 0 0
 0 0 0 1 0 0 1 1 0 1 0 1 1 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 1 1 1 0
 1 1 1 1 1 0 1 0 0 0 0 1 0 0 1]


In [133]:
predicted_labels = predictions.predictions.argmax(axis=-1)
true_labels = predictions.label_ids

In [134]:
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels, average="binary")  # Use "binary" for binary classification
recall = recall_score(true_labels, predicted_labels, average="binary")
f1 = f1_score(true_labels, predicted_labels, average="binary")

In [135]:
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Accuracy: 0.6450
Precision: 0.6837
Recall: 0.6262
F1-Score: 0.6537
