In [1]:
%%capture
pip install --upgrade torch  torchvision transformers datasets accelerate bitsandbytes peft trl

In [2]:
import numpy as np
import pandas as pd
import random
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig,PeftModel
from trl import SFTTrainer
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)

from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)


seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

# Ensure reproducibility for CUDA (if using GPU)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
model_name = "/kaggle/input/llama-3.2/transformers/3b-instruct/1"
new_model="fine-tuned-llama-3.2-model"

In [3]:
import wandb

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-Tuning Llama-3.2-for Sentiment Analysis', 
    job_type="training", 
    anonymous="allow",
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33molfat[0m ([33molfat-sayed[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112751955554105, max=1.0…

## Loading & processing the dataset <span style="font-size:16px;">📊⚙️</span>

In [4]:
df = pd.read_csv("/kaggle/input/sentiment-analysis-dataset/train.csv", encoding='latin1')
df

Unnamed: 0,textID,text,selected_text,sentiment,Time of Tweet,Age of User,Country,Population -2020,Land Area (Km²),Density (P/Km²)
0,cb774db0d1,"I`d have responded, if I were going","I`d have responded, if I were going",neutral,morning,0-20,Afghanistan,38928346,652860.0,60
1,549e992a42,Sooo SAD I will miss you here in San Diego!!!,Sooo SAD,negative,noon,21-30,Albania,2877797,27400.0,105
2,088c60f138,my boss is bullying me...,bullying me,negative,night,31-45,Algeria,43851044,2381740.0,18
3,9642c003ef,what interview! leave me alone,leave me alone,negative,morning,46-60,Andorra,77265,470.0,164
4,358bd9e861,"Sons of ****, why couldn`t they put them on t...","Sons of ****,",negative,noon,60-70,Angola,32866272,1246700.0,26
...,...,...,...,...,...,...,...,...,...,...
27476,4eac33d1c0,wish we could come see u on Denver husband l...,d lost,negative,night,31-45,Ghana,31072940,227540.0,137
27477,4f4c4fc327,I`ve wondered about rake to. The client has ...,", don`t force",negative,morning,46-60,Greece,10423054,128900.0,81
27478,f67aae2310,Yay good for both of you. Enjoy the break - y...,Yay good for both of you.,positive,noon,60-70,Grenada,112523,340.0,331
27479,ed167662a5,But it was worth it ****.,But it was worth it ****.,positive,night,70-100,Guatemala,17915568,107160.0,167


In [5]:
df.isnull().value_counts(),df.shape

(textID  text   selected_text  sentiment  Time of Tweet  Age of User  Country  Population -2020  Land Area (Km²)  Density (P/Km²)
 False   False  False          False      False          False        False    False             False            False              27480
         True   True           False      False          False        False    False             False            False                  1
 Name: count, dtype: int64,
 (27481, 10))

In [6]:
df.dropna(inplace=True)

In [7]:
df['sentiment'].unique()

array(['neutral', 'negative', 'positive'], dtype=object)

In [8]:
df = df.sample(frac=1, random_state=42).reset_index(drop=True).head(3000)
train_size = 0.8
eval_size = 0.1


train_end = int(train_size * len(df))
eval_end = train_end + int(eval_size * len(df))


X_train = df[:train_end]
X_eval = df[train_end:eval_end]
X_test = df[eval_end:]

def generate_prompt(data_point):
    return f"""
            Classify the text into positive, negative,nuetral, and return the answer as the corresponding sentiment-analysis disorder label.
context: {data_point["text"]}
label: {data_point["sentiment"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Classify the text into positive, negative,nuetral,and return the answer as the corresponding sentiment-analysis disorder label.
context: {data_point["text"]}
label: """.strip()

X_train.loc[:,'text'] = X_train.apply(generate_prompt, axis=1)
X_eval.loc[:,'text'] = X_eval.apply(generate_prompt, axis=1)

y_true = X_test.loc[:,'sentiment']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["context"])

In [9]:
# Convert to datasets
train_data = Dataset.from_pandas(X_train[["text"]])
eval_data = Dataset.from_pandas(X_eval[["text"]])

In [10]:
train_data['text'][3]

'Classify the text into positive, negative,nuetral, and return the answer as the corresponding sentiment-analysis disorder label.\ncontext:  congrats hey\nlabel: positive'

## Loading the model and tokenizer 🦙3.2

In [11]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Test the Base Model  <span style="font-size:16px;">📈</span>

In [12]:
def predict(test, model, tokenizer):
    y_pred = []
    categories = ["positive", "negative", "nuetral"]
    
    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["context"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens=2, 
                        temperature=0.1)
        
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        
    
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")
    
    return y_pred

y_pred = predict(X_test, model, tokenizer)


  0%|          | 0/300 [00:00<?, ?it/s]Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
100%|██████████| 300/300 [00:46<00:00,  6.45it/s]


In [13]:
def evaluate(y_true, y_pred):
    labels = ["positive", "negative", "nuetral"]
    mapping = {label: idx for idx, label in enumerate(labels)}
    
    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data
    
    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)
    
  
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')
    

    unique_labels = set(y_true_mapped)       # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')
        
    
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)
    
   
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

evaluate(y_true, y_pred)

Accuracy: 0.587
Accuracy for label positive: 0.695
Accuracy for label negative: 0.411
Accuracy for label nuetral: 0.641

Classification Report:
              precision    recall  f1-score   support

    positive       0.52      0.70      0.59        82
    negative       0.77      0.41      0.54        90
     nuetral       0.00      0.00      0.00         0

   micro avg       0.59      0.55      0.57       172
   macro avg       0.43      0.37      0.38       172
weighted avg       0.65      0.55      0.56       172


Confusion Matrix:
[[57  1  0]
 [17 37  0]
 [ 0  0  0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Building the Model

In [14]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:              # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)
modules = find_all_linear_names(model)
modules

['q_proj', 'gate_proj', 'up_proj', 'down_proj', 'o_proj', 'k_proj', 'v_proj']

In [15]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules,
)

training_arguments = TrainingArguments(
    output_dir=new_model,                    
    num_train_epochs=1,                       
    per_device_train_batch_size=1,            
    gradient_accumulation_steps=8,            # number of steps before performing a backward/update pass
    gradient_checkpointing=True,              #  to save memory
    optim="paged_adamw_32bit",
    logging_steps=1,                         
    learning_rate=2e-4,                       
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,                        
    max_steps=-1,
    warmup_ratio=0.03,                       
    group_by_length=False,
    lr_scheduler_type="cosine",            
    report_to="wandb",                  
    eval_strategy="steps",              # save checkpoint every epoch
    eval_steps = 0.1
)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    max_seq_length=512,
    packing=False,
    dataset_kwargs={
    "add_special_tokens": False,
    "append_concat_token": False,
    }
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/2400 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

In [16]:
trainer.train()

  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss
30,1.4119,1.74104
60,1.2143,1.69674
90,1.2949,1.67382
120,1.4104,1.667052
150,1.5046,1.659174
180,1.3575,1.653582
210,1.2164,1.647382
240,1.5194,1.644082
270,1.8895,1.642791
300,1.3502,1.642495


TrainOutput(global_step=300, training_loss=1.6407002675533295, metrics={'train_runtime': 1289.8074, 'train_samples_per_second': 1.861, 'train_steps_per_second': 0.233, 'total_flos': 2051320500971520.0, 'train_loss': 1.6407002675533295, 'epoch': 1.0})

In [17]:
wandb.finish()
model.config.use_cache = True

VBox(children=(Label(value='0.032 MB of 0.032 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,█▅▃▃▂▂▁▁▁▁
eval/runtime,█▁▄▂▂▂▁▂▂▂
eval/samples_per_second,▁█▅▇▇▇█▇▇▇
eval/steps_per_second,▁█▅▇▇▇█▇▇▇
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███
train/grad_norm,▄▄▃▃█▁▂▁▂▁▂▂▁▁▁▂▁▁▁▁▁▁▂▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁
train/learning_rate,▅▆██████▇▇▇▇▇▆▆▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
train/loss,█▄▃▄▂▁▂▂▂▂▂▁▂▂▂▂▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▂▁▁▂▁

0,1
eval/loss,1.6425
eval/runtime,12.8729
eval/samples_per_second,23.305
eval/steps_per_second,2.952
total_flos,2051320500971520.0
train/epoch,1.0
train/global_step,300.0
train/grad_norm,0.24227
train/learning_rate,0.0
train/loss,1.3502


In [18]:
trainer.save_model(new_model)
tokenizer.save_pretrained(new_model)

('fine-tuned-llama-3.2-model/tokenizer_config.json',
 'fine-tuned-llama-3.2-model/special_tokens_map.json',
 'fine-tuned-llama-3.2-model/tokenizer.json')

## Test the Fine-Tuned Model <span style="font-size:16px;">📉</span>

In [19]:
# Reload tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)

base_model_reload = AutoModelForCausalLM.from_pretrained(
        model_name,
        return_dict=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [20]:
# Merge adapter with base model
tuned_model = PeftModel.from_pretrained(base_model_reload, new_model)
tuned_model = tuned_model.merge_and_unload()

In [21]:
tuned_model.eval()
y_pred = predict(X_test, tuned_model, tokenizer)
evaluate(y_true, y_pred)

  0%|          | 0/300 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  0%|          | 1/300 [00:00<01:06,  4.53it/s]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  1%|          | 2/300 [00:00<00:46,  6.44it/s]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  1%|          | 3/300 [00:00<00:39,  7.46it/s]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  1%|▏         | 4/300 [00:00<00:36,  8.05it/s]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  2%|▏         | 5/300 [00:00<00:34,  8.45it/s]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  2%|▏         | 6/300 [00:00<00:33,  8.68it/s]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  2%|▏         | 7/300 [00:00<00:33,  8.85it/s]Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  3%|▎         | 8/300 [00:00<00:32,  8.96it/s]Setting `pad_toke

Accuracy: 0.727
Accuracy for label positive: 0.805
Accuracy for label negative: 0.878
Accuracy for label nuetral: 0.570

Classification Report:
              precision    recall  f1-score   support

    positive       0.73      0.80      0.76        82
    negative       0.68      0.88      0.77        90
     nuetral       0.00      0.00      0.00         0

   micro avg       0.70      0.84      0.77       172
   macro avg       0.47      0.56      0.51       172
weighted avg       0.70      0.84      0.77       172


Confusion Matrix:
[[66  5  0]
 [ 2 79  0]
 [ 0  0  0]]



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## UI
 ### This interface allows you to input text and test the performance of the fine-tuned model.

In [22]:
from IPython.display import display, clear_output
import ipywidgets as widgets

save_directory = "/kaggle/working/fine-tuned-llama-3.2-model"

model = AutoModelForCausalLM.from_pretrained(save_directory)
tokenizer = AutoTokenizer.from_pretrained(save_directory)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def generate_prompt(text):
    return f"""Classify the text into positive, negative, neutral, and return the answer as the corresponding sentiment-analysis disorder label.
text: {text}
label: """.strip()


def classify_text(text):
    prompt = generate_prompt(text)
    outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)
    result = outputs[0]["generated_text"].split("label: ")[-1].strip()
    return result


input_text = widgets.Textarea(
    value='',
    placeholder='Enter your text here',
    layout=widgets.Layout(width='90%', height='100px'),
    disabled=False
)


input_text.style = {'description_width': 'initial'}       # Set the background color 
input_text.add_class('custom-textarea')


classify_button = widgets.Button(
    description="Classify Sentiment",
    button_style='success',  
    layout=widgets.Layout(width='30%')
)


remove_text_button = widgets.Button(
    description="Remove Text",
    button_style='danger',  
    layout=widgets.Layout(width='30%')
)

output_area = widgets.Output()


def on_classify_button_click(b):
    with output_area:
        clear_output(wait=True)  # previous output
        sentiment_result = classify_text(input_text.value)
        print(f"Sentiment: {sentiment_result}")


def on_remove_text_button_click(b):
    input_text.value = ""  


classify_button.on_click(on_classify_button_click)
remove_text_button.on_click(on_remove_text_button_click)


button_box = widgets.HBox([classify_button, remove_text_button], layout=widgets.Layout(justify_content='center'))

display(input_text)
display(button_box)
display(output_area)

# Add custom CSS styling
display(widgets.HTML("""
<style>
    .custom-textarea textarea {
        background-color: #f5deb3 !important; /* Light brown (wheat) */
        color: #003366;  /* Dark navy text color */
        border: 2px solid #008000;  /* Green border */
    }
</style>
"""))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


Textarea(value='', layout=Layout(height='100px', width='90%'), placeholder='Enter your text here', style=Descr…

HBox(children=(Button(button_style='success', description='Classify Sentiment', layout=Layout(width='30%'), st…

Output()

HTML(value='\n<style>\n    .custom-textarea textarea {\n        background-color: #f5deb3 !important; /* Light…

###  <div style="box-shadow: rgba(240, 46, 170, 0.4) -5px 5px inset, rgba(240, 46, 170, 0.3) -10px 10px inset, rgba(240, 46, 170, 0.2) -15px 15px inset, rgba(240, 46, 170, 0.1) -20px 20px inset, rgba(240, 46, 170, 0.05) -25px 25px inset; padding:20px; font-size:30px; font-family: consolas; display:fill; border-radius:15px; color: rgba(240, 46, 170, 0.7)"> <b> ༼⁠ ⁠つ⁠ ⁠◕⁠‿⁠◕⁠ ⁠༽⁠つ Thank You!</b></div>