In [None]:
!pip install -qqq transformers datasets bitsandbytes accelerate scikit-learn peft trl

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer
from datasets import load_dataset
import numpy as np

TOKEN = "<TOKEN>"
MODEL_NAME = "google/gemma-2b"
device = "cuda"

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=TOKEN)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=bnb_config, device_map="auto", token=TOKEN)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [24]:
dataset = load_dataset('imdb')
# dataset = dataset.shuffle().train_test_split(0.1, stratify_by_column="label")
train_ds = dataset['train']
test_ds = dataset['test'].shuffle()
print(train_ds)
print(test_ds)

Dataset({

    features: ['text', 'label'],

    num_rows: 25000

})

Dataset({

    features: ['text', 'label'],

    num_rows: 25000

})


In [22]:
TOKENS = {
    " Positive": 40695,
    " Negative": 48314,
    " positive": 6222,
    " negative": 8322,
    "Positive": 35202,
    "Negative": 39654,
}

def get_prompt_list(item):
    res = []
    for text, label in zip(item['text'], item['label']):
        content = get_prompt(text)
        content += "Positive" if label == 1 else 'Negative'
        res.append(content)
    # print(res)
    return res

def get_prompt(query):
    content = f"""### REVIEW:
{query}

### SENTIMENT:
"""
    return content

def llm(query):
    prompt = get_prompt(query)
    inputs = tokenizer.encode(prompt, add_special_tokens=True, return_tensors="pt")
    outputs = model.generate(input_ids=inputs.to(model.device), max_new_tokens=1, output_scores=True, return_dict_in_generate=True)

    positive_pred = outputs.scores[0][0][TOKENS['Positive']]
    negative_pred = outputs.scores[0][0][TOKENS['Negative']]

    positive_pred = positive_pred.cpu()
    negative_pred = negative_pred.cpu()

    scores = np.array([positive_pred, negative_pred])
    probs = np.exp(scores) / np.sum(np.exp(scores))
    
    positive_prob = probs[0]
    negative_prob = probs[1]
    # print(positive_pred, negative_pred)
    
    return tokenizer.decode(outputs.sequences[0]), positive_prob

def predict(query, print_res = False):
    text, prob = llm(query)
    if print_res:
        print(text)
    return prob

In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

def print_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)

In [8]:
from tqdm import tqdm

y_pred = []
y_test = []
i = 0
for ex in tqdm(test_ds):
    pred, label = predict(ex['text']), ex['label']
    # print(pred, label)
    y_pred.append(pred)
    y_test.append(label)

print('before fine tuning')
print_metrics(y_test, np.round(y_pred))

100%|██████████| 500/500 [01:22<00:00,  6.10it/s]

before fine tuning

Accuracy: 0.772

Precision: 0.7005813953488372

Recall: 0.9563492063492064





In [9]:
import os
os.environ["WANDB_DISABLED"] = "true"
from peft import LoraConfig

lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

In [10]:
import transformers
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=train_ds,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        learning_rate=2e-5,
        num_train_epochs=2,
        fp16=True,
        logging_steps=20,
        output_dir="outputs",
        optim="paged_adamw_8bit",
        report_to="none"
    ),
    peft_config=lora_config,
    formatting_func=get_prompt_list,
)

trainer.train()





Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
20,3.0872
40,3.056
60,2.9574
80,2.9257
100,2.8496
120,2.8595
140,2.9001
160,2.8019
180,2.8267
200,2.8081





Cannot access gated repo for url https://huggingface.co/google/gemma-2b/resolve/main/config.json.

Repo model google/gemma-2b is gated. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in google/gemma-2b.







Cannot access gated repo for url https://huggingface.co/google/gemma-2b/resolve/main/config.json.

Repo model google/gemma-2b is gated. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in google/gemma-2b.







Cannot access gated repo for url https://huggingface.co/google/gemma-2b/resolve/main/config.json.

Repo model google/gemma-2b is gated. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in google/gemma-2b.







Cannot access gated repo for url https://huggingface.co/google/gemma-2b/resolve/main/config.json.

Repo model google/gemma-2b is gated. You must be authenticated to access it. - silently ignoring the lookup for the file 

TrainOutput(global_step=3124, training_loss=2.7479343209773415, metrics={'train_runtime': 4571.8426, 'train_samples_per_second': 10.937, 'train_steps_per_second': 0.683, 'total_flos': 3.188944477846733e+17, 'train_loss': 2.7479343209773415, 'epoch': 2.0})

In [25]:
# this is very slow if you want to run on all 25k samples :)

from tqdm import tqdm

y_pred = []
y_test = []
i = 0
for ex in tqdm(test_ds):
    pred, label = predict(ex['text']), ex['label']
    y_pred.append(pred)
    y_test.append(label)

print('after fine tuning')
print_metrics(y_test, np.round(y_pred))

100%|██████████| 25000/25000 [31:45<00:00, 13.12it/s]


after fine tuning

Accuracy: 0.96608

Precision: 0.9623075702269481

Recall: 0.97016


In [13]:
cnt = 0
for i in range(len(y_pred)):
    if np.round(y_pred[i]) != y_test[i]:
        example = test_ds[i]
        print(predict(example['text'], print_res=True), example['label'])
        cnt += 1
        if cnt == 5:
            break

<bos>### REVIEW:

The EMPEROR'S NEW GROOVE cast returns for Disney Pictures follow up, but this time the spotlight is on Kronk(voiced by Patrick Warburton), who is no longer Yzma's(Eartha Kitt)henchman. Kronk has started a new life and is very happy with his role as chef of his own restaurant. Things go merrily along until Kronk gets word that his Papi(John Mahoney)is coming for a visit. Kronk is worried, because he knows that his life won't impress his Papi. One thing that he has always wanted and never received is a "thumbs up" from his dad. A flurry of blunders and a gigantic cheese explosion in the restaurant leaves our likable hero very deep in trouble and anxiety. To save the day, a little help from his friends.<br /><br />Other voices: Tracey Ullman, David Spade, John Goodman, Wendie Malick, April Winchell and Gatlin Green.



### SENTIMENT:

Positive



### SENTIMENT

0.6853661 0

<bos>### REVIEW:

**SPOILERS AHEAD**<br /><br />It is really unfortunate that a movie so well prod