# Fine-tuninig the LLM Model
Mahan Madani - Mohammad Mehdi Begmaz

## Load Dataset and important libraries

In [1]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import pandas as pd
import numpy as np
import nltk
import torch

from datasets import Dataset

from transformers import (
    AutoTokenizer,
    AutoTokenizer,
    AutoModelForCausalLM,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer)

from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig

import evaluate
from evaluate import load

from pynvml import *

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
df = pd.read_csv("./dataset/BG3_reviews_preprocessed.csv")  # load the preprocessed version of the dataset
print(df.columns)
print(df.shape)

Index(['review', 'voted_up', 'votes_up', 'votes_funny', 'weighted_vote_score',
       'word_count', 'profanity'],
      dtype='object')
(10000, 7)


## Model

In [4]:
id2label = {0: "Negative", 1: "Positive"}
label2id = {"Negative":0, "Positive":1}

model_name = 'gpt2'
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, id2label=id2label, label2id=label2id).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
df['label'] = df['voted_up'].astype(int)

In [6]:
df['label'].value_counts()

1    9609
0     391
Name: label, dtype: int64

## Tokenization

In [7]:
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id

In [8]:
train_dataset = Dataset.from_pandas(df[['review', 'label']])
train_dataset

Dataset({
    features: ['review', 'label'],
    num_rows: 10000
})

In [9]:
class TokenizerWrapper:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
    
    def tokenize_function(self, examples):
        self.tokenizer.truncation_side = "right"

        return self.tokenizer(
            examples["review"],
            max_length=512,
            truncation=True,
        )

In [10]:
tokenizer_wrapper = TokenizerWrapper(tokenizer)

tokenized_dataset = train_dataset.map(
    tokenizer_wrapper.tokenize_function,
    num_proc=4,
    remove_columns=train_dataset.column_names.remove('label'),
    batched=True)

Map (num_proc=4):   0%|          | 0/10000 [00:00<?, ? examples/s]

## Finetune model

In [11]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [12]:
# import accuracy evaluation metric
accuracy = evaluate.load("accuracy")

# define an evaluation function to pass into trainer later
def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)

    return {"accuracy": accuracy.compute(predictions=predictions, references=labels)}

In [13]:
def print_trainable_parameters(model):

    # Prints the number of trainable parameters in the model.

    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )


In [14]:
config = LoraConfig(task_type="SEQ_CLS",
                        r=4,
                        lora_alpha=32,
                        lora_dropout=0.01,
                        target_modules = ['c_attn'])

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 148992 || all params: 124590336 || trainable%: 0.11958551905663052


In [15]:
# hyperparameters
lr = 1e-3
batch_size = 4
num_epochs = 5

In [16]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./gpt2-lora-classification",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    num_train_epochs=num_epochs,
    weight_decay=0.01,
)

In [17]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator, # this will dynamically pad examples in each batch to be equal length
    compute_metrics=compute_metrics
)

# train model
results = trainer.train()

  0%|          | 0/12500 [00:00<?, ?it/s]

Checkpoint destination directory ./classification\checkpoint-500 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.2924, 'learning_rate': 0.00096, 'epoch': 0.2}


Checkpoint destination directory ./classification\checkpoint-1000 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'loss': 0.2531, 'learning_rate': 0.00092, 'epoch': 0.4}
{'loss': 0.2449, 'learning_rate': 0.00088, 'epoch': 0.6}
{'loss': 0.1974, 'learning_rate': 0.00084, 'epoch': 0.8}
{'loss': 0.2437, 'learning_rate': 0.0008, 'epoch': 1.0}
{'loss': 0.2838, 'learning_rate': 0.00076, 'epoch': 1.2}
{'loss': 0.2486, 'learning_rate': 0.0007199999999999999, 'epoch': 1.4}
{'loss': 0.2214, 'learning_rate': 0.00068, 'epoch': 1.6}
{'loss': 0.2209, 'learning_rate': 0.00064, 'epoch': 1.8}
{'loss': 0.2711, 'learning_rate': 0.0006, 'epoch': 2.0}
{'loss': 0.2224, 'learning_rate': 0.0005600000000000001, 'epoch': 2.2}
{'loss': 0.247, 'learning_rate': 0.0005200000000000001, 'epoch': 2.4}
{'loss': 0.224, 'learning_rate': 0.00048, 'epoch': 2.6}
{'loss': 0.2515, 'learning_rate': 0.00044, 'epoch': 2.8}
{'loss': 0.2584, 'learning_rate': 0.0004, 'epoch': 3.0}
{'loss': 0.2446, 'learning_rate': 0.00035999999999999997, 'epoch': 3.2}
{'loss': 0.2183, 'learning_rate': 0.00032, 'epoch': 3.4}
{'loss': 0.2242, 'learning_rate': 0.

In [18]:
# save model parameters
model.save_pretrained("./model/classification")
tokenizer.save_pretrained("./model/classification")

('./model/classification\\tokenizer_config.json',
 './model/classification\\special_tokens_map.json',
 './model/classification\\vocab.json',
 './model/classification\\merges.txt',
 './model/classification\\added_tokens.json',
 './model/classification\\tokenizer.json')

# Classify Reviews

In [19]:
from transformers.utils import logging
import transformers

logging.set_verbosity(transformers.logging.ERROR)

In [20]:
def classify(text):
    inputs = tokenizer.encode(text, return_tensors="pt").to(device)
    logits = model(inputs).logits
    predictions = torch.argmax(logits)

    print(id2label[predictions.tolist()])

In [21]:
generative_model = AutoModelForCausalLM.from_pretrained("./model/v3")
generative_tokenizer = AutoTokenizer.from_pretrained("./model/v3")

In [22]:
generated_text = generative_model.generate(do_sample=True, top_k=50, top_p=0.95, pad_token_id=tokenizer.pad_token_id, max_new_tokens=200)
generated_text = generative_tokenizer.decode(generated_text[0], skip_special_tokens=True)
print(generated_text)
classify(generated_text)

4/5 stars with no issues. a good story, strong characters, good choices, no problem with fights (like the original baldur's gate games i've used in the past). great replayability too!   if you're a fan of the divinity series, this game is probably good for you. the controls are well set, all of them intuitive, and the world is set in a real world environment! i don't remember my first play with this game, but i've spent a few hours through it.   still with the bugs, and some interesting twists (eg some turnbased combat which i think could make a good game), the only downside is that it is still early access (and you may get stuck sometimes), so if you are interested in early access (and still have any ideas) please look no further!  this game is well worth the price. i'm not really sure how to sum up the game's potential since we won't know much more
Positive


In [23]:
# define list of examples
text_list = ["It was good.", "just bad, not for me.", "Better than the first one.", "This is not worth watching even once.", "This one is a pass."]

print("Untrained model predictions:")
print("----------------------------")
for text in text_list:
    # tokenize text
    inputs = tokenizer.encode(text, return_tensors="pt").to(device)
    # compute logits
    logits = model(inputs).logits
    # convert logits to label
    predictions = torch.argmax(logits)

    print(text + " - " + id2label[predictions.tolist()])

Untrained model predictions:
----------------------------
It was good. - Positive
just bad, not for me. - Positive
Better than the first one. - Positive
This is not worth watching even once. - Positive
This one is a pass. - Positive


In [24]:
negative_df = df[df['label'] == 0].reset_index(drop=True)

In [25]:
negative_df['review'][0]

"this is tough to write because honestly, act 1 and 2 make this game the game of the year. heck, i was ready to put it in my top 5 all time favorite games. i have 100 hours in this game now and just loved the story and choices and the combat.  and then act 3 happened. i started having performance issues, i started having weird glitches were my companions would lose half their bodies and couldn't fight anymore, i've had 5 different quests completely bug out on me and even a main quest bug out that i can't even complete. not only that, but the story choices of what is going on  the main bad guys and fights aren't as thought out as things were in the first 2 acts. it is very clear that act 3 was completely rushed in my opinion to get the release out the door and does not have the same care that acts 1 and 2 had.  all of that enjoyment of acts 1 and 2 to just come up against the frustrations in act 3 killed my enthusiasm for this game. i know larian will likely fix these issues as i suspec

In [29]:
classify(negative_df['review'][3])

Positive
