In [1]:
!pip install -q -U torch --index-url https://download.pytorch.org/whl/cu117


In [2]:
!pip install -q -U git+https://github.com/huggingface/transformers
!pip install -q accelerate
!pip install -q -i https://pypi.org/simple/ bitsandbytes
!pip install -q -U datasets

In [3]:
!pip install -q -U git+https://github.com/huggingface/trl
!pip install -q -U git+https://github.com/huggingface/peft

In [4]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

import torch
import torch.nn as nn

import transformers
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from datasets import Dataset
from peft import LoraConfig, PeftConfig
import bitsandbytes as bnb
from trl import SFTTrainer

from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

In [7]:
from datasets import load_dataset

In [8]:
data = load_dataset('sepidmnorozy/Thai_sentiment')

In [9]:
def generate_prompt(data_point):
    return f"""
            Analyze the sentiment of the tweet enclosed in square brackets, 
            determine if it is positive or negative, and return the answer as 
            the corresponding sentiment label "positive" or  "negative"

            [{data_point["text"]}] = {data_point["label"]}
            """.strip()

def generate_test_prompt(data_point):
    return f"""
            Analyze the sentiment of the tweet enclosed in square brackets, 
            determine if it is positive or negative, and return the answer as 
            the corresponding sentiment label "positive" or  "negative"

            [{data_point["text"]}] = 

            """.strip()

In [10]:
data

DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 8103
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 1153
    })
    test: Dataset({
        features: ['label', 'text'],
        num_rows: 2344
    })
})

In [11]:
test = data['test'].to_pandas()
validation = data['validation'].to_pandas()
train = data['train'].to_pandas()

In [12]:
def convert(x):
    if x==1:
        return 'positive'
    elif x==0:
        return 'negative'
    else:
        return 'none'

In [13]:
train['label'] = train['label'].apply(lambda x: convert(x))

In [14]:
validation['label'] = validation['label'].apply(lambda x: convert(x))

In [15]:
test['label'] = test['label'].apply(lambda x: convert(x))

In [16]:
X_train = pd.DataFrame(train.apply(generate_prompt, axis=1), 
                       columns=["text"])
X_eval = pd.DataFrame(validation.apply(generate_prompt, axis=1), 
                      columns=["text"])

In [17]:
y_true = test.label
X_test = pd.DataFrame(test.apply(generate_test_prompt, axis=1), columns=["text"])

In [18]:
train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [19]:
def evaluate(y_true, y_pred):
    
    labels = ['positive',  'negative']
    mapping = {'positive': 1, 'negative': 0, 'none':1,}
    def map_func(x):
        return mapping.get(x, 1)
    
    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true)) 
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [20]:
model_name = "google/gemma-2b"

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [21]:
def predict(X_test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["text"]
        input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(**input_ids, max_new_tokens=1, temperature=0.0)
        result = tokenizer.decode(outputs[0])
        answer = result.split("=")[-1].lower()
        if "positive" in answer:
            y_pred.append("positive")
        elif "negative" in answer:
            y_pred.append("negative")
        elif "neutral" in answer:
            y_pred.append("neutral")
        else:
            y_pred.append("none")
    return y_pred

In [22]:
y_pred = predict(X_test , model, tokenizer)

100%|██████████████████████████████████████████████████████████████████████████████████| 2344/2344 [01:44<00:00, 22.49it/s]


In [23]:
evaluate(y_true, y_pred)


Accuracy: 0.647
Accuracy for label 0: 0.444
Accuracy for label 1: 0.941

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.44      0.60      1388
           1       0.54      0.94      0.68       956

    accuracy                           0.65      2344
   macro avg       0.73      0.69      0.64      2344
weighted avg       0.76      0.65      0.63      2344


Confusion Matrix:
[[616 772]
 [ 56 900]]


In [24]:
y_pred

['positive',
 'none',
 'positive',
 'negative',
 'none',
 'positive',
 'negative',
 'none',
 'negative',
 'negative',
 'negative',
 'positive',
 'negative',
 'positive',
 'none',
 'negative',
 'negative',
 'positive',
 'negative',
 'positive',
 'negative',
 'none',
 'none',
 'none',
 'positive',
 'positive',
 'none',
 'negative',
 'none',
 'negative',
 'none',
 'positive',
 'none',
 'none',
 'positive',
 'positive',
 'positive',
 'none',
 'negative',
 'none',
 'negative',
 'none',
 'positive',
 'none',
 'negative',
 'none',
 'none',
 'negative',
 'negative',
 'none',
 'positive',
 'positive',
 'none',
 'negative',
 'none',
 'none',
 'negative',
 'negative',
 'none',
 'none',
 'none',
 'positive',
 'negative',
 'negative',
 'none',
 'negative',
 'negative',
 'positive',
 'none',
 'negative',
 'none',
 'negative',
 'none',
 'negative',
 'none',
 'negative',
 'none',
 'none',
 'none',
 'none',
 'negative',
 'none',
 'negative',
 'negative',
 'negative',
 'positive',
 'none',
 'none',
 'ne

In [25]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules="all-linear",
)

training_arguments = TrainingArguments(
    output_dir="logs",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    do_eval=False,
    evaluation_strategy="no",
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    max_seq_length=1024,

)

Map:   0%|          | 0/8103 [00:00<?, ? examples/s]

In [26]:
trainer.train()

Step,Training Loss
25,3.6043
50,1.3666
75,2.1636
100,0.7928
125,2.1807
150,0.8181
175,2.2252
200,0.8139
225,2.1726
250,0.7528


TrainOutput(global_step=3036, training_loss=1.227531315309728, metrics={'train_runtime': 7592.7279, 'train_samples_per_second': 3.202, 'train_steps_per_second': 0.4, 'total_flos': 2.418327118824653e+16, 'train_loss': 1.227531315309728, 'epoch': 3.0})

In [27]:
trainer.model.save_pretrained("trained-model")

In [28]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

100%|██████████████████████████████████████████████████████████████████████████████████| 2344/2344 [03:20<00:00, 11.70it/s]

Accuracy: 0.883
Accuracy for label 0: 0.906
Accuracy for label 1: 0.849

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.91      0.90      1388
           1       0.86      0.85      0.86       956

    accuracy                           0.88      2344
   macro avg       0.88      0.88      0.88      2344
weighted avg       0.88      0.88      0.88      2344


Confusion Matrix:
[[1258  130]
 [ 144  812]]



