In [1]:
!pip install transformers peft accelerate datasets bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl (60.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.1


In [2]:
import os
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,
    TrainingArguments, Trainer, DataCollatorForLanguageModeling, pipeline
)
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
# 1️⃣ Load and Prepare Data
# -----------------------------
df = pd.read_csv("/content/all-data.csv", encoding='latin', names=['sentiment', 'text'])
labels = ['negative', 'neutral', 'positive']

## Split the Dataset

In [4]:
X_train, X_test = [], []
for label in labels:
    sample = df[df['sentiment'] == label]
    X_train_temp, X_test_temp = train_test_split(sample, train_size=300, test_size=300, random_state=42)
    X_train.append(X_train_temp)
    X_test.append(X_test_temp)

In [5]:
X_train = pd.concat(X_train).sample(frac=1)
X_test = pd.concat(X_test).sample(frac=1)

In [8]:
eval_idx = [idx for idx in df.index if idx not in X_train.index and idx not in X_test.index]
X_eval = df.iloc[eval_idx]

In [12]:
X_train.reset_index(drop=True,inplace=True)
X_test.reset_index(drop=True,inplace=True)

## Hugging Face DatasetConversion

In [13]:
train_data = Dataset.from_pandas(X_train)
test_data = Dataset.from_pandas(X_test)
eval_data = Dataset.from_pandas(X_eval)

## 2️⃣ Prompt Creation (consistent format)
# -----------------------------

In [14]:
def generate_prompt(data):
    prompt = (
        "Analyze the mentioned review and give the sentiment as one of "
        '"positive", "negative" or "neutral".\n'
        f"Review: {data['text']}\nSentiment:"
    )
    return {'prompt': prompt, 'label': data['sentiment']}

train_data_prompted = train_data.map(generate_prompt)
eval_data_prompted = eval_data.map(generate_prompt)

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/3046 [00:00<?, ? examples/s]

## 3️⃣ Load Base Model + Tokenizer
## -----------------------------

In [20]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineGrained).
The token `LORA` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `LORA`


In [15]:
base_model = "meta-llama/Llama-3.2-1B"


In [16]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)


In [21]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [22]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

## 4️⃣ Tokenization with masked loss
## -----------------------------

In [23]:
def tokenizer_function(examples):
    texts = [p + " " + l for p, l in zip(examples['prompt'], examples['label'])]
    tokenized = tokenizer(texts, truncation=True, max_length=512, padding="max_length")
    labels = tokenized["input_ids"].copy()
    # mask prompt tokens
    # for i, p in enumerate(examples['prompt']):
    #     prompt_len = len(tokenizer(p)["input_ids"])
    #     labels[i][:prompt_len] = [-100] * prompt_len
    tokenized["labels"] = labels
    return tokenized

In [24]:
tokenize_train = train_data_prompted.map(tokenizer_function, batched=True, remove_columns=train_data_prompted.column_names)
tokenize_eval = eval_data_prompted.map(tokenizer_function, batched=True, remove_columns=eval_data_prompted.column_names)

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/3046 [00:00<?, ? examples/s]

## 5️⃣ Apply LoRA Fine-Tuning
## -----------------------------

In [25]:
peft_config = LoraConfig(
    lora_alpha=8,
    lora_dropout=0.1,
    r=16,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    task_type="CAUSAL_LM"
)

### freeze Model parameter and apply loraadapter

In [26]:
model_to_train = prepare_model_for_kbit_training(model)
lora_model = get_peft_model(model_to_train, peft_config)
lora_model.print_trainable_parameters()

trainable params: 1,703,936 || all params: 1,237,518,336 || trainable%: 0.1377


In [27]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,             # ↑ slightly more training
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    learning_rate=1e-4,             # ↓ smaller, more stable
    weight_decay=0.001,
    fp16=True,
    logging_dir="./log",
    logging_steps=20,
    report_to=[]
)

In [28]:
trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=tokenize_train,
    eval_dataset=tokenize_eval,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)


In [29]:
%%time
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
1,1.7447,1.674779
2,1.5792,1.633189
3,1.4971,1.618216


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


CPU times: user 42min 16s, sys: 3min 16s, total: 45min 32s
Wall time: 46min 37s


TrainOutput(global_step=339, training_loss=1.8098369137024106, metrics={'train_runtime': 2795.7651, 'train_samples_per_second': 0.966, 'train_steps_per_second': 0.121, 'total_flos': 8085796238131200.0, 'train_loss': 1.8098369137024106, 'epoch': 3.0})

In [31]:
lora_model.save_pretrained("./lora_model")

## 6️⃣ Load Fine-tuned Model
## -----------------------------

In [32]:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",
    trust_remote_code=True
)
fine_tuned_model = PeftModel.from_pretrained(model, "./lora_model", device_map="auto")

## 7️⃣ Prediction Function
## -----------------------------

In [36]:
def predict(X_test, model, tokenizer):
    def generate_test_prompt(data):
        prompt = (
            "Analyze the mentioned review and give the sentiment as one of "
            '"positive", "negative" or "neutral".\n'
            f"Review: {data['text']}\nSentiment:"
        )
        return {'prompt': prompt}

    X_test_prompted = X_test.map(generate_test_prompt)
    pipe = pipeline(
        task="text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=5,
       # deterministic output
        device_map="auto"
    )

    y_pred = []
    for i in tqdm(range(len(X_test_prompted))):
        prompt = X_test_prompted[i]['prompt']
        result = pipe(prompt)[0]['generated_text']
        answer = result.split("Sentiment:")[-1].strip().lower()
        if "positive" in answer:
            y_pred.append("positive")
        elif "negative" in answer:
            y_pred.append("negative")
        elif "neutral" in answer:
            y_pred.append("neutral")
        else:
            y_pred.append("neutral")
    return y_pred


## 8️⃣ Evaluate
# -----------------------------

In [37]:
def evaluate(y_true, y_pred):
    mapping = {'positive': 2, 'neutral': 1, 'negative': 0}
    y_true = np.vectorize(mapping.get)(y_true)
    y_pred = np.vectorize(mapping.get)(y_pred)
    print(f"Accuracy: {accuracy_score(y_true, y_pred):.3f}")
    print("\nClassification Report:\n", classification_report(y_true, y_pred))
    print("\nConfusion Matrix:\n", confusion_matrix(y_true, y_pred))

In [38]:
y_true = X_test['sentiment']
y_pred = predict(test_data, fine_tuned_model, tokenizer)

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Device set to use cuda:0
  1%|          | 10/900 [00:03<03:44,  3.96it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 900/900 [02:26<00:00,  6.16it/s]


In [39]:
evaluate(y_true, y_pred)

Accuracy: 0.600

Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.80      0.72       300
           1       0.49      0.60      0.54       300
           2       0.73      0.40      0.51       300

    accuracy                           0.60       900
   macro avg       0.62      0.60      0.59       900
weighted avg       0.62      0.60      0.59       900


Confusion Matrix:
 [[240  53   7]
 [ 81 181  38]
 [ 49 132 119]]
