# CREATE CONDA ENVIRONMENT

In [4]:
!conda env create -f env.yml

In [5]:
!conda activate sentiment

# IMPORTS

In [1]:
from pandas import read_csv, get_dummies
from sklearn.model_selection import train_test_split
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer, TrainerCallback
from torch import cuda, device, float16
from torch.utils.data import Dataset
import re
import numpy as np
import evaluate
import wandb
from datasets import DatasetDict, Dataset
import os
from zipfile import ZipFile

# DATA

## INSTALLING DATA

In [None]:
!kaggle datasets download -d lakshmi25npathi/imdb-dataset-of-50k-movie-reviews

Dataset URL: https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews
License(s): other
Downloading imdb-dataset-of-50k-movie-reviews.zip to d:\FCDS\Bert-Roberta-RNN-Movie-Reviews-Sentiment-1




  0%|          | 0.00/25.7M [00:00<?, ?B/s]
  4%|▍         | 1.00M/25.7M [00:18<07:25, 58.2kB/s]
  4%|▍         | 1.00M/25.7M [00:30<07:25, 58.2kB/s]
  8%|▊         | 2.00M/25.7M [00:32<06:12, 66.7kB/s]
 12%|█▏        | 3.00M/25.7M [00:48<06:05, 65.2kB/s]
 12%|█▏        | 3.00M/25.7M [01:00<06:05, 65.2kB/s]
 16%|█▌        | 4.00M/25.7M [01:14<07:11, 52.7kB/s]
 16%|█▌        | 4.00M/25.7M [01:30<07:11, 52.7kB/s]
 19%|█▉        | 5.00M/25.7M [02:26<13:23, 27.0kB/s]
 19%|█▉        | 5.00M/25.7M [02:40<13:23, 27.0kB/s]
 23%|██▎       | 6.00M/25.7M [04:08<19:49, 17.4kB/s]
 23%|██▎       | 6.00M/25.7M [04:20<19:49, 17.4kB/s]
 27%|██▋       | 7.00M/25.7M [05:14<19:23, 16.9kB/s]
 27%|██▋       | 7.00M/25.7M [05:30<19:23, 16.9kB/s]
 31%|███       | 8.00M/25.7M [06:17<18:26, 16.8kB/s]
 31%|███       | 8.00M/25.7M [06:30<18:26, 16.8kB/s]
 35%|███▌      | 9.00M/25.7M [06:39<13:52, 21.1kB/s]
 35%|███▌      | 9.00M/25.7M [06:50<13:52, 21.1kB/s]
 39%|███▉      | 10.0M/25.7M [07:02<10:52, 25.3kB/s]
 

In [None]:
with ZipFile('imdb-dataset-of-50k-movie-reviews.zip', 'r') as extractor:
    extractor.extractall()

In [None]:
os.remove('imdb-dataset-of-50k-movie-reviews.zip')

## READING & ADJUSTING

In [2]:
data = read_csv("IMDB Dataset.csv")
data['sentiment'] = get_dummies(data['sentiment'],drop_first=True, dtype=int)['positive'] 
data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


## CLEANING

In [3]:
CLEANR = re.compile('<.*?>')
for i, r in data.iterrows():
    data.at[i, 'review'] = re.sub(CLEANR,"",r['review'])

## SPLITTING

In [4]:
train, test = train_test_split(data, train_size = 0.7, shuffle = True, random_state=42)

In [5]:
print("Original data split %")
print(f"Train: {int((len(train)/len(data))*100)}%", end="\t")
print(f"Test: {int((len(test)/len(data))*100)}%")

Original data split %
Train: 70%	Test: 30%


# GENERATING HUGGING FACE DATASET FORMAT

In [6]:
train = Dataset.from_pandas(train).remove_columns(["__index_level_0__"])
test = Dataset.from_pandas(test).remove_columns(["__index_level_0__"])

dataset = DatasetDict({
    'train': train,
    'test': test,
})
data = dataset.rename_column('sentiment', 'label')
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['review', 'sentiment'],
        num_rows: 35000
    })
    test: Dataset({
        features: ['review', 'sentiment'],
        num_rows: 15000
    })
})


# BERT SEQUENCE CLASSIFICATION

## TOKENIZATION

In [50]:
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples["review"], padding="max_length", truncation=True)

tokenized_dataset = data.map(tokenize_function, batched=True)

Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

## MODEL INITIALIZATION

In [51]:
model = AutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-uncased", num_labels=2)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [52]:
device_0 = device('cuda' if cuda.is_available else 'cpu')
if device_0.type == 'cuda':
    model.to(device_0)
    print('model is moved to gpu')

model is moved to gpu


## METRICS

In [53]:
metric = evaluate.load("accuracy")

In [54]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [55]:
class CustomWandbCallback(TrainerCallback):
    def __init__(self, wandb):
        self.wandb = wandb

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None:
            epoch = logs.get("epoch", state.epoch)
            for key, value in logs.items():
                if "loss" in key or "accuracy" in key:
                    self.wandb.log({f"{key}": value, "epoch": epoch})

## TRAINER & TRAINING ARGUMENTS

In [63]:
training_args = TrainingArguments(
    output_dir="results",
    eval_strategy="epoch",
    report_to='wandb',
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    num_train_epochs=7,
    )

In [64]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"].shuffle(seed=42).select(range(1000)),
    eval_dataset=tokenized_dataset["test"].select(range(300)),
    callbacks=[CustomWandbCallback(wandb)],
    compute_metrics=compute_metrics,
)

## W&B SET UP

In [58]:
wandb.login(key=os.environ.get('WANDB_API_KEY'))

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Katie\_netrc


True

In [59]:
run = wandb.init(project='Movie Review Sentiment Classification with Tranformers', name='bert_run')

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

In [60]:
run

## TRAINING

In [65]:
trainer.train()

  0%|          | 0/875 [00:00<?, ?it/s]

{'loss': 0.4226, 'grad_norm': 23.50316619873047, 'learning_rate': 4.2857142857142856e-05, 'epoch': 1.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.21635307371616364, 'eval_accuracy': 0.92, 'eval_runtime': 15.9264, 'eval_samples_per_second': 18.837, 'eval_steps_per_second': 2.386, 'epoch': 1.0}
{'loss': 0.2245, 'grad_norm': 0.18228253722190857, 'learning_rate': 3.571428571428572e-05, 'epoch': 2.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.35450059175491333, 'eval_accuracy': 0.9066666666666666, 'eval_runtime': 15.6923, 'eval_samples_per_second': 19.118, 'eval_steps_per_second': 2.422, 'epoch': 2.0}
{'loss': 0.0957, 'grad_norm': 99.09716033935547, 'learning_rate': 2.857142857142857e-05, 'epoch': 3.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.5825324654579163, 'eval_accuracy': 0.89, 'eval_runtime': 15.6292, 'eval_samples_per_second': 19.195, 'eval_steps_per_second': 2.431, 'epoch': 3.0}
{'loss': 0.0231, 'grad_norm': 0.012049528770148754, 'learning_rate': 2.1428571428571428e-05, 'epoch': 4.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.5057306289672852, 'eval_accuracy': 0.9133333333333333, 'eval_runtime': 16.2758, 'eval_samples_per_second': 18.432, 'eval_steps_per_second': 2.335, 'epoch': 4.0}
{'loss': 0.008, 'grad_norm': 0.0043056015856564045, 'learning_rate': 1.4285714285714285e-05, 'epoch': 5.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.5529717206954956, 'eval_accuracy': 0.9233333333333333, 'eval_runtime': 15.7455, 'eval_samples_per_second': 19.053, 'eval_steps_per_second': 2.413, 'epoch': 5.0}
{'loss': 0.0001, 'grad_norm': 0.002354174619540572, 'learning_rate': 7.142857142857143e-06, 'epoch': 6.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.5680630803108215, 'eval_accuracy': 0.9233333333333333, 'eval_runtime': 15.9324, 'eval_samples_per_second': 18.83, 'eval_steps_per_second': 2.385, 'epoch': 6.0}
{'loss': 0.0001, 'grad_norm': 0.002003719098865986, 'learning_rate': 0.0, 'epoch': 7.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.5623147487640381, 'eval_accuracy': 0.9233333333333333, 'eval_runtime': 15.9103, 'eval_samples_per_second': 18.856, 'eval_steps_per_second': 2.388, 'epoch': 7.0}
{'train_runtime': 1084.5606, 'train_samples_per_second': 6.454, 'train_steps_per_second': 0.807, 'train_loss': 0.11058723449281284, 'epoch': 7.0}


TrainOutput(global_step=875, training_loss=0.11058723449281284, metrics={'train_runtime': 1084.5606, 'train_samples_per_second': 6.454, 'train_steps_per_second': 0.807, 'total_flos': 1841777387520000.0, 'train_loss': 0.11058723449281284, 'epoch': 7.0})

In [None]:
run.finish()

[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced


# DISTIL-BERT SEQUENCE CLASSIFICATION

## TOKENIZATION

In [None]:
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples["review"], padding="max_length", truncation=True)

tokenized_dataset = data.map(tokenize_function, batched=True)

Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

## MODEL INITIALIZATION

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased", num_labels=2)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
device_1 = device('cuda' if cuda.is_available else 'cpu')
if device_1.type == 'cuda':
    model.to(device_1)
    print('model is moved to gpu')

model is moved to gpu


## METRICS

In [None]:
metric = evaluate.load("accuracy")

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
class CustomWandbCallback(TrainerCallback):
    def __init__(self, wandb):
        self.wandb = wandb

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None:
            epoch = logs.get("epoch", state.epoch)
            for key, value in logs.items():
                if "loss" in key or "accuracy" in key:
                    self.wandb.log({f"{key}": value, "epoch": epoch})

## TRAINER & TRAINING ARGUMENTS

In [None]:
training_args = TrainingArguments(
    output_dir="results",
    eval_strategy="epoch",
    report_to='wandb',
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    num_train_epochs=7
    )

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"].shuffle(seed=42).select(range(1000)),
    eval_dataset=tokenized_dataset["test"].select(range(300)),
    callbacks=[CustomWandbCallback(wandb)],
    compute_metrics=compute_metrics,
)

## W&B SET UP

In [None]:
wandb.login(key=os.environ.get('WANDB_API_KEY'))

[34m[1mwandb[0m: Currently logged in as: [33mkatherineashraf[0m ([33mSoloWork[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Katie\_netrc


True

In [None]:
run = wandb.init(project='Movie Review Sentiment Classification with Tranformers', name='distil-bert_run')

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

In [None]:
run

## TRAINING

In [None]:
trainer.train()



  0%|          | 0/875 [00:00<?, ?it/s]

{'loss': 0.4628, 'grad_norm': 19.079669952392578, 'learning_rate': 4.2857142857142856e-05, 'epoch': 1.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.6143785715103149, 'eval_accuracy': 0.74, 'eval_runtime': 9.2209, 'eval_samples_per_second': 32.535, 'eval_steps_per_second': 4.121, 'epoch': 1.0}
{'loss': 0.2926, 'grad_norm': 4.9159440994262695, 'learning_rate': 3.571428571428572e-05, 'epoch': 2.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.7516472339630127, 'eval_accuracy': 0.7833333333333333, 'eval_runtime': 8.9563, 'eval_samples_per_second': 33.496, 'eval_steps_per_second': 4.243, 'epoch': 2.0}
{'loss': 0.1119, 'grad_norm': 0.12082237750291824, 'learning_rate': 2.857142857142857e-05, 'epoch': 3.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.41157156229019165, 'eval_accuracy': 0.91, 'eval_runtime': 8.9448, 'eval_samples_per_second': 33.539, 'eval_steps_per_second': 4.248, 'epoch': 3.0}
{'loss': 0.0346, 'grad_norm': 0.009427830576896667, 'learning_rate': 2.1428571428571428e-05, 'epoch': 4.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.5523303151130676, 'eval_accuracy': 0.8833333333333333, 'eval_runtime': 9.3896, 'eval_samples_per_second': 31.95, 'eval_steps_per_second': 4.047, 'epoch': 4.0}
{'loss': 0.0094, 'grad_norm': 0.04642750695347786, 'learning_rate': 1.4285714285714285e-05, 'epoch': 5.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.5800144076347351, 'eval_accuracy': 0.8933333333333333, 'eval_runtime': 9.5684, 'eval_samples_per_second': 31.353, 'eval_steps_per_second': 3.971, 'epoch': 5.0}
{'loss': 0.0029, 'grad_norm': 0.005240229889750481, 'learning_rate': 7.142857142857143e-06, 'epoch': 6.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.6185078024864197, 'eval_accuracy': 0.89, 'eval_runtime': 9.5685, 'eval_samples_per_second': 31.353, 'eval_steps_per_second': 3.971, 'epoch': 6.0}
{'loss': 0.001, 'grad_norm': 0.00587252713739872, 'learning_rate': 0.0, 'epoch': 7.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.6027518510818481, 'eval_accuracy': 0.8866666666666667, 'eval_runtime': 9.0077, 'eval_samples_per_second': 33.305, 'eval_steps_per_second': 4.219, 'epoch': 7.0}
{'train_runtime': 569.0017, 'train_samples_per_second': 12.302, 'train_steps_per_second': 1.538, 'train_loss': 0.13075893207958766, 'epoch': 7.0}


TrainOutput(global_step=875, training_loss=0.13075893207958766, metrics={'train_runtime': 569.0017, 'train_samples_per_second': 12.302, 'train_steps_per_second': 1.538, 'total_flos': 927271790592000.0, 'train_loss': 0.13075893207958766, 'epoch': 7.0})

In [None]:
run.finish()

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▂▂▂▃▃▃▅▅▅▆▆▆▇▇▇████
eval/accuracy,▁▃█▇▇▇▇
eval/loss,▅█▁▄▄▅▅
eval/runtime,▄▁▁▆██▂
eval/samples_per_second,▅██▃▁▁▇
eval/steps_per_second,▅██▃▁▁▇
eval_accuracy,▁▃█▇▇▇▇
eval_loss,▅█▁▄▄▅▅
loss,█▅▃▂▁▁▁
train/epoch,▁▁▂▂▃▃▅▅▆▆▇▇███

0,1
epoch,7.0
eval/accuracy,0.88667
eval/loss,0.60275
eval/runtime,9.0077
eval/samples_per_second,33.305
eval/steps_per_second,4.219
eval_accuracy,0.88667
eval_loss,0.60275
loss,0.001
total_flos,927271790592000.0


# ROBERTA SEQUENCE CLASSIFICATION

## TOKENIZATION

In [24]:
tokenizer = AutoTokenizer.from_pretrained("roberta-base")

def tokenize_function(examples):
    return tokenizer(examples["review"], padding="max_length", truncation=True)

tokenized_dataset = data.map(tokenize_function, batched=True)

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

## MODEL INITIALIZATION

In [23]:
model = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=2)


config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [26]:
device_1 = device('cuda' if cuda.is_available else 'cpu')
if device_1.type == 'cuda':
    model.to(device_1)
    print('model is moved to gpu')

model is moved to gpu


## METRICS

In [27]:
metric = evaluate.load("accuracy")

In [28]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [29]:
class CustomWandbCallback(TrainerCallback):
    def __init__(self, wandb):
        self.wandb = wandb

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None:
            epoch = logs.get("epoch", state.epoch)
            for key, value in logs.items():
                if "loss" in key or "accuracy" in key:
                    self.wandb.log({f"{key}": value, "epoch": epoch})

## TRAINER & TRAINING ARGUMENTS

In [30]:
training_args = TrainingArguments(
    output_dir="results",
    eval_strategy="epoch",
    report_to='wandb',
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    num_train_epochs=7
    )

In [31]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"].shuffle(seed=42).select(range(1000)),
    eval_dataset=tokenized_dataset["test"].select(range(300)),
    callbacks=[CustomWandbCallback(wandb)],
    compute_metrics=compute_metrics,
)

## W&B SET UP

In [32]:
wandb.login(key=os.environ.get('WANDB_API_KEY'))

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Katie\_netrc


True

In [33]:
run = wandb.init(project='Movie Review Sentiment Classification with Tranformers', name='roberta_run')

In [34]:
run

## TRAINING

In [35]:
trainer.train()

  0%|          | 0/875 [00:00<?, ?it/s]

{'loss': 0.5, 'grad_norm': 7.648731231689453, 'learning_rate': 4.2857142857142856e-05, 'epoch': 1.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.230277881026268, 'eval_accuracy': 0.9266666666666666, 'eval_runtime': 62.0393, 'eval_samples_per_second': 4.836, 'eval_steps_per_second': 0.613, 'epoch': 1.0}
{'loss': 0.311, 'grad_norm': 0.1672140508890152, 'learning_rate': 3.571428571428572e-05, 'epoch': 2.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.501458466053009, 'eval_accuracy': 0.89, 'eval_runtime': 62.5536, 'eval_samples_per_second': 4.796, 'eval_steps_per_second': 0.607, 'epoch': 2.0}
{'loss': 0.215, 'grad_norm': 6.544699668884277, 'learning_rate': 2.857142857142857e-05, 'epoch': 3.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.4504135549068451, 'eval_accuracy': 0.9066666666666666, 'eval_runtime': 63.9084, 'eval_samples_per_second': 4.694, 'eval_steps_per_second': 0.595, 'epoch': 3.0}
{'loss': 0.1342, 'grad_norm': 37.261417388916016, 'learning_rate': 2.1428571428571428e-05, 'epoch': 4.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.5497056841850281, 'eval_accuracy': 0.9033333333333333, 'eval_runtime': 62.3494, 'eval_samples_per_second': 4.812, 'eval_steps_per_second': 0.609, 'epoch': 4.0}
{'loss': 0.0536, 'grad_norm': 0.10636503994464874, 'learning_rate': 1.4285714285714285e-05, 'epoch': 5.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.44834575057029724, 'eval_accuracy': 0.9133333333333333, 'eval_runtime': 61.5837, 'eval_samples_per_second': 4.871, 'eval_steps_per_second': 0.617, 'epoch': 5.0}
{'loss': 0.0325, 'grad_norm': 0.025958357378840446, 'learning_rate': 7.142857142857143e-06, 'epoch': 6.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.43520569801330566, 'eval_accuracy': 0.93, 'eval_runtime': 67.1405, 'eval_samples_per_second': 4.468, 'eval_steps_per_second': 0.566, 'epoch': 6.0}
{'loss': 0.021, 'grad_norm': 0.02783527784049511, 'learning_rate': 0.0, 'epoch': 7.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.42528367042541504, 'eval_accuracy': 0.9266666666666666, 'eval_runtime': 67.3379, 'eval_samples_per_second': 4.455, 'eval_steps_per_second': 0.564, 'epoch': 7.0}
{'train_runtime': 4336.9993, 'train_samples_per_second': 1.614, 'train_steps_per_second': 0.202, 'train_loss': 0.18105824170793805, 'epoch': 7.0}


TrainOutput(global_step=875, training_loss=0.18105824170793805, metrics={'train_runtime': 4336.9993, 'train_samples_per_second': 1.614, 'train_steps_per_second': 0.202, 'total_flos': 1841777387520000.0, 'train_loss': 0.18105824170793805, 'epoch': 7.0})

In [36]:
run.finish()

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▂▂▂▃▃▃▅▅▅▆▆▆▇▇▇████
eval/accuracy,▇▁▄▃▅█▇
eval/loss,▁▇▆█▆▅▅
eval/runtime,▂▂▄▂▁██
eval/samples_per_second,▇▇▅▇█▁▁
eval/steps_per_second,▇▇▅▇█▁▁
eval_accuracy,▇▁▄▃▅█▇
eval_loss,▁▇▆█▆▅▅
loss,█▅▄▃▁▁▁
train/epoch,▁▁▂▂▃▃▅▅▆▆▇▇███

0,1
epoch,7.0
eval/accuracy,0.92667
eval/loss,0.42528
eval/runtime,67.3379
eval/samples_per_second,4.455
eval/steps_per_second,0.564
eval_accuracy,0.92667
eval_loss,0.42528
loss,0.021
total_flos,1841777387520000.0


# ELECTRA SEQUENCE CLASSSIFICATION

## TOKENIZATION

In [37]:
tokenizer = AutoTokenizer.from_pretrained("google/electra-small-discriminator")

def tokenize_function(examples):
    return tokenizer(examples["review"], padding="max_length", truncation=True)

tokenized_dataset = data.map(tokenize_function, batched=True)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/35000 [00:00<?, ? examples/s]

Map:   0%|          | 0/15000 [00:00<?, ? examples/s]

## MODEL INITIALIZATION

In [38]:
model = AutoModelForSequenceClassification.from_pretrained("google/electra-small-discriminator", num_labels=2)


pytorch_model.bin:   0%|          | 0.00/54.2M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [39]:
device_1 = device('cuda' if cuda.is_available else 'cpu')
if device_1.type == 'cuda':
    model.to(device_1)
    print('model is moved to gpu')

model is moved to gpu


## METRICS

In [40]:
metric = evaluate.load("accuracy")

In [41]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [42]:
class CustomWandbCallback(TrainerCallback):
    def __init__(self, wandb):
        self.wandb = wandb

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None:
            epoch = logs.get("epoch", state.epoch)
            for key, value in logs.items():
                if "loss" in key or "accuracy" in key:
                    self.wandb.log({f"{key}": value, "epoch": epoch})

## TRAINER & TRAINING ARGUMENTS

In [43]:
training_args = TrainingArguments(
    output_dir="results",
    eval_strategy="epoch",
    report_to='wandb',
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    num_train_epochs=7
    )

In [44]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"].shuffle(seed=42).select(range(1000)),
    eval_dataset=tokenized_dataset["test"].select(range(300)),
    callbacks=[CustomWandbCallback(wandb)],
    compute_metrics=compute_metrics,
)

## W&B SET UP

In [45]:
wandb.login(key=os.environ.get('WANDB_API_KEY'))

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Katie\_netrc


True

In [46]:
run = wandb.init(project='Movie Review Sentiment Classification with Tranformers', name='electra_run')

In [47]:
run

## TRAINING

In [48]:
trainer.train()

  0%|          | 0/875 [00:00<?, ?it/s]

{'loss': 0.6154, 'grad_norm': 5.137572765350342, 'learning_rate': 4.2857142857142856e-05, 'epoch': 1.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.41971415281295776, 'eval_accuracy': 0.8866666666666667, 'eval_runtime': 2.5594, 'eval_samples_per_second': 117.213, 'eval_steps_per_second': 14.847, 'epoch': 1.0}
{'loss': 0.3627, 'grad_norm': 8.133498191833496, 'learning_rate': 3.571428571428572e-05, 'epoch': 2.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.2593539357185364, 'eval_accuracy': 0.9033333333333333, 'eval_runtime': 2.5515, 'eval_samples_per_second': 117.579, 'eval_steps_per_second': 14.893, 'epoch': 2.0}
{'loss': 0.1981, 'grad_norm': 92.19606018066406, 'learning_rate': 2.857142857142857e-05, 'epoch': 3.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.3325931429862976, 'eval_accuracy': 0.9166666666666666, 'eval_runtime': 2.5493, 'eval_samples_per_second': 117.682, 'eval_steps_per_second': 14.906, 'epoch': 3.0}
{'loss': 0.1307, 'grad_norm': 0.9414613842964172, 'learning_rate': 2.1428571428571428e-05, 'epoch': 4.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.37201422452926636, 'eval_accuracy': 0.9, 'eval_runtime': 2.5714, 'eval_samples_per_second': 116.667, 'eval_steps_per_second': 14.778, 'epoch': 4.0}
{'loss': 0.0793, 'grad_norm': 12.879191398620605, 'learning_rate': 1.4285714285714285e-05, 'epoch': 5.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.41390442848205566, 'eval_accuracy': 0.9033333333333333, 'eval_runtime': 2.7033, 'eval_samples_per_second': 110.976, 'eval_steps_per_second': 14.057, 'epoch': 5.0}
{'loss': 0.0408, 'grad_norm': 0.08070999383926392, 'learning_rate': 7.142857142857143e-06, 'epoch': 6.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.4389466941356659, 'eval_accuracy': 0.91, 'eval_runtime': 2.5792, 'eval_samples_per_second': 116.317, 'eval_steps_per_second': 14.734, 'epoch': 6.0}
{'loss': 0.036, 'grad_norm': 0.08362441509962082, 'learning_rate': 0.0, 'epoch': 7.0}


  0%|          | 0/38 [00:00<?, ?it/s]

{'eval_loss': 0.4470287561416626, 'eval_accuracy': 0.91, 'eval_runtime': 2.5628, 'eval_samples_per_second': 117.061, 'eval_steps_per_second': 14.828, 'epoch': 7.0}
{'train_runtime': 179.1875, 'train_samples_per_second': 39.065, 'train_steps_per_second': 4.883, 'train_loss': 0.20898332105364117, 'epoch': 7.0}


TrainOutput(global_step=875, training_loss=0.20898332105364117, metrics={'train_runtime': 179.1875, 'train_samples_per_second': 39.065, 'train_steps_per_second': 4.883, 'total_flos': 205937485824000.0, 'train_loss': 0.20898332105364117, 'epoch': 7.0})

In [49]:
run.finish()

VBox(children=(Label(value='0.031 MB of 0.047 MB uploaded (0.003 MB deduped)\r'), FloatProgress(value=0.663057…

0,1
epoch,▁▁▁▂▂▂▃▃▃▅▅▅▆▆▆▇▇▇████
eval/accuracy,▁▅█▄▅▆▆
eval/loss,▇▁▄▅▇██
eval/runtime,▁▁▁▂█▂▂
eval/samples_per_second,███▇▁▇▇
eval/steps_per_second,███▇▁▇▇
eval_accuracy,▁▅█▄▅▆▆
eval_loss,▇▁▄▅▇██
loss,█▅▃▂▂▁▁
train/epoch,▁▁▂▂▃▃▅▅▆▆▇▇███

0,1
epoch,7.0
eval/accuracy,0.91
eval/loss,0.44703
eval/runtime,2.5628
eval/samples_per_second,117.061
eval/steps_per_second,14.828
eval_accuracy,0.91
eval_loss,0.44703
loss,0.036
total_flos,205937485824000.0


# FINAL EVALUATION

In [76]:
def get_last_metrics(run, metric_name):
    history = run.history(keys=[metric_name])
    if not history.empty:
        return history[metric_name].values[-1]
    return None

results = []

api = wandb.Api()
runs = api.runs("SoloWork/Movie Review Sentiment Classification with Tranformers")

for run in runs:
    run_name = run.name
    run_name = run_name.replace("_run","")
    last_train_loss = get_last_metrics(run, "train_loss")
    last_eval_loss = get_last_metrics(run, "eval_loss")
    
    if last_train_loss is not None and last_eval_loss is not None:
        results.append({
            "run_name": run_name.capitalize(),
            "last_train_loss": last_train_loss,
            "last_eval_loss": last_eval_loss,
            "loss_difference": last_eval_loss - last_train_loss
        })

for result in results:
    print(f"Model Name: {result['run_name']}")
    print(f"Last Train Loss: {round(result['last_train_loss'],3)}")
    print(f"Last Eval Loss: {round(result['last_eval_loss'],3)}")
    print(f"Loss Difference: {round(result['loss_difference'],3)}")
    print()


Model Name: Bert
Last Train Loss: 0.111
Last Eval Loss: 0.562
Loss Difference: 0.452

Model Name: Distil-bert
Last Train Loss: 0.131
Last Eval Loss: 0.603
Loss Difference: 0.472

Model Name: Roberta
Last Train Loss: 0.181
Last Eval Loss: 0.425
Loss Difference: 0.244

Model Name: Electra
Last Train Loss: 0.209
Last Eval Loss: 0.447
Loss Difference: 0.238



**<ins>Summing up:</ins>**
|| BERT | Distil-BERT | RoBERTa | Electra |
| :-: | :-: | :-: | :-: | :-: |
| **Training Loss** | 0.111 | 0.131 | 0.181 | 0.209 |
| **Evaluation Loss** | 0.562 | 0.603 | <span style='color:cyan'> 0.425 </span> | 0.447 |
| **Difference** | 0.452 | <span style='color:red'> 0.472 </span> | 0.244 | <span style='color:yellow'> 0.238 </span> |

Although RoBERTa had the least evaluation loss, but Electra had the minimal difference between Training and Evaluation Loss, making it the least over-fit model among all four.

<img src='Training_and_Evaluation_Loss_For_All_Transformers.png'/>

We can also see that:
The first to reach optimal points `(train_loss = val_loss)` is RoBERTa, followed by BERT, then Electra, however, DistilBert never reached it. 