# unsloth LLAMA 3.2 3b using generation and a prompt template

## installations for bitsandbytes (sunbites)

In [1]:
!pip install bitsandbytes
!pip install transformers accelerate
!pip install -U bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl.metadata (2.9 kB)
Downloading bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl (69.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.1/69.1 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.0


## hugging face Login

In [4]:
!pip install huggingface_hub
from huggingface_hub import notebook_login

notebook_login()



VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

## Loading the model

In [2]:
import re
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM, BitsAndBytesConfig
from transformers import AutoTokenizer

# Set up quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Enable 4-bit quantization
    bnb_4bit_quant_type="nf4",  # Normal Float-4 quantization type
    bnb_4bit_use_double_quant=True,  # Double quantization for accuracy
)

# Load the LLaMA model and tokenizer
model_path = "unsloth/Llama-3.2-3B-Instruct"  # Replace with your model path
model = LlamaForCausalLM.from_pretrained(
    model_path,
    quantization_config=bnb_config,
    device_map="auto",  # Automatically allocate layers to devices
)
tokenizer = AutoTokenizer.from_pretrained(model_path)


# Set the model to evaluation mode
model.eval()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/928 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/6.43G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 3072)
    (layers): ModuleList(
      (0-27): 28 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (k_proj): Linear4bit(in_features=3072, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=3072, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=3072, out_features=8192, bias=False)
          (up_proj): Linear4bit(in_features=3072, out_features=8192, bias=False)
          (down_proj): Linear4bit(in_features=8192, out_features=3072, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((3072,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((3072,), eps=1e

## Inference on one example

In [3]:

def create_prompt(text):
    """
    Create an optimized prompt for classifying emotions with strict output formatting.
    """
    return f"""
You are an expert in emotion analysis. Analyze the following text and classify each emotion on a scale of 0 to 3 based on the presence of emotional indicators in the text.

**Scoring Scale:**
0: Not at all present
1: Slightly present
2: Clearly present
3: Very present

**Emotions to Analyze:**
Anger, Fear, Joy, Sadness, Surprise.

**Text:**
"{text}"

**Instructions:**
1. Use only the provided text for analysis.
2. Analyze only the emotions listed above (Anger, Fear, Joy, Sadness, Surprise).
3. Provide scores in the exact format specified below.
4. Do not include any additional text, explanations, or emotions outside the specified list.

**Output Format:**
Anger: Score: <value> Fear: Score: <value> Joy: Score: <value> Sadness: Score: <value> Surprise: Score: <value>
"""

def parse_emotion_scores(response):
    """
    Parse emotion scores from the model's response and ensure validity.

    Args:
        response (str): The raw response from the model.

    Returns:
        dict: Parsed emotion scores with values between 0 and 3.
    """
    emotion_scores = {}
    emotions = ["Anger", "Fear", "Joy", "Sadness", "Surprise"]
    for emotion in emotions:
        # Use regex to extract the score for each emotion
        match = re.search(rf"{emotion}:\s*Score:\s*([0-3])", response, re.IGNORECASE)
        if match:
            score = int(match.group(1))
        else:
            score = 0  # Default to 0 if no match found
        emotion_scores[emotion] = score
    return emotion_scores

def validate_response(response):
    """
    Ensure the model's response adheres to the expected output format.

    Args:
        response (str): The raw response from the model.

    Returns:
        bool: True if response is valid, False otherwise.
    """
    required_emotions = ["Anger", "Fear", "Joy", "Sadness", "Surprise"]
    for emotion in required_emotions:
        if not re.search(rf"{emotion}:\s*Score:\s*([0-3])", response, re.IGNORECASE):
            return False
    return True



def infer_emotions_with_chain_of_thought(text, model, tokenizer):
    """
    Infers emotion scores for a given text using an optimized prompt with LLaMA.

    Args:
        text (str): The input text.
        model: The pre-trained LLaMA model.
        tokenizer: The tokenizer for the LLaMA model.

    Returns:
        dict: Predicted emotion scores on a scale of 0 to 3.
        str: The model's raw response.
    """
    # Generate the prompt
    prompt = create_prompt(text)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)

    # Generate the response with adjusted parameters
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,  # Reduced to avoid unnecessary generation
        do_sample=False,     # Deterministic output for consistency
        temperature=0.0,     # Set to 0 for deterministic behavior
        top_p=1.0,            # Disable nucleus sampling
        pad_token_id=tokenizer.pad_token_id,
        eos_token_id=tokenizer.eos_token_id,
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

    # Validate and parse the response
    if validate_response(response):
        emotion_scores = parse_emotion_scores(response)
    else:
        # Attempt to extract the correct part if the response deviates
        emotion_scores = parse_emotion_scores(response)
        # Optionally, log or handle invalid responses here

    return emotion_scores, response

# Example input text
example_text = "It looks like dark smelly applesauce."

# Perform inference
predicted_emotions, model_response = infer_emotions_with_chain_of_thought(example_text, model, tokenizer)

# Print the results
print("Input Text:", example_text)
print("Predicted Emotion Scores:", predicted_emotions)
print("\nModel Response:\n", model_response)



Input Text: It looks like dark smelly applesauce.
Predicted Emotion Scores: {'Anger': 3, 'Fear': 0, 'Joy': 0, 'Sadness': 0, 'Surprise': 2}

Model Response:
 You are an expert in emotion analysis. Analyze the following text and classify each emotion on a scale of 0 to 3 based on the presence of emotional indicators in the text.

**Scoring Scale:**
0: Not at all present
1: Slightly present
2: Clearly present
3: Very present

**Emotions to Analyze:**
Anger, Fear, Joy, Sadness, Surprise.

**Text:**
"It looks like dark smelly applesauce."

**Instructions:**
1. Use only the provided text for analysis.
2. Analyze only the emotions listed above (Anger, Fear, Joy, Sadness, Surprise).
3. Provide scores in the exact format specified below.
4. Do not include any additional text, explanations, or emotions outside the specified list.

**Output Format:**
Anger: Score: <value> Fear: Score: <value> Joy: Score: <value> Sadness: Score: <value> Surprise: Score: <value>
Where <value> is the score (0, 1, 2,

## TESTING on the whole data set to get the accuracy

In [8]:
import pandas as pd
from sklearn.metrics import mean_squared_error, f1_score
# Import necessary functions
from IPython.display import display
import pandas as pd

# Load your CSV dataset
file_path = "/content/eng.csv"  # Replace with your CSV file path
df = pd.read_csv(file_path)

# Select only 10 samples for testing
test_df = df.head(100)  # Use `df.sample(10)` for random selection

# Define a function to calculate evaluation metrics
def calculate_metrics(predictions, ground_truths):
    """
    Calculate evaluation metrics between predictions and ground truths.
    """
    # Convert predictions and ground truths to flat lists for F1 score
    predictions_flat = predictions.values.flatten()
    ground_truths_flat = ground_truths.values.flatten()

    # Calculate F1 score (macro-averaged)
    f1 = f1_score(ground_truths_flat, predictions_flat, average='macro')

    # Mean Squared Error
    mse = mean_squared_error(ground_truths, predictions)

    return f1, mse

# Loop over rows in the dataset to get predictions
predicted_scores = []
for _, row in test_df.iterrows():
    text = row['text']
    ground_truth = [row['Anger'], row['Fear'], row['Joy'], row['Sadness'], row['Surprise']]

    # Predict emotion scores using your model
    predicted_emotions, _ = infer_emotions_with_chain_of_thought(text, model, tokenizer)

    # Append predictions to the list
    predicted_scores.append([
        predicted_emotions['Anger'],
        predicted_emotions['Fear'],
        predicted_emotions['Joy'],
        predicted_emotions['Sadness'],
        predicted_emotions['Surprise']
    ])

# Convert predictions and ground truths to DataFrame for comparison
predicted_scores = pd.DataFrame(predicted_scores, columns=['Anger', 'Fear', 'Joy', 'Sadness', 'Surprise'])
ground_truths = test_df[['Anger', 'Fear', 'Joy', 'Sadness', 'Surprise']]

# Calculate metrics
f1, mse = calculate_metrics(predicted_scores, ground_truths)

print(f"F1 Score (Macro-Averaged): {f1:.4f}")
print(f"Mean Squared Error: {mse:.4f}")




F1 Score (Macro-Averaged): 0.4264
Mean Squared Error: 0.7140


# Bert Classification Code

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_scheduler,
)
from torch.utils.data import DataLoader, Dataset
import torch
from torch.optim import AdamW
from tqdm import tqdm

# Load the dataset
data_path = '/content/encoded_emotions2.csv'
data = pd.read_csv(data_path)

# Define binary columns for labels
binary_columns = [
    "Anger_0", "Anger_1", "Anger_2", "Anger_3",
    "Fear_0", "Fear_1", "Fear_2", "Fear_3",
    "Joy_0", "Joy_1", "Joy_2", "Joy_3",
    "Sadness_0", "Sadness_1", "Sadness_2", "Sadness_3",
    "Surprise_0", "Surprise_1", "Surprise_2", "Surprise_3",
]

# Extract texts and labels
texts = data["text"].tolist()
labels = data[binary_columns].values

# Split into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.2, random_state=42
)

# Load tokenizer
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Tokenize the text data
def tokenize_function(texts):
    return tokenizer(texts, padding=True, truncation=True, max_length=128)

train_encodings = tokenize_function(train_texts)
val_encodings = tokenize_function(val_texts)

# Convert labels to PyTorch tensors
train_labels = torch.tensor(train_labels, dtype=torch.float32)
val_labels = torch.tensor(val_labels, dtype=torch.float32)

# Define custom dataset class
class EmotionBinaryDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = self.labels[idx]
        return item

# Create datasets
train_dataset = EmotionBinaryDataset(train_encodings, train_labels)
val_dataset = EmotionBinaryDataset(val_encodings, val_labels)

# Load pre-trained model with 20 output labels
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=20)

# Dataloader and optimizer
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)

optimizer = AdamW(model.parameters(), lr=2e-5)
num_training_steps = len(train_loader) * 3  # 3 epochs
lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(3):  # Number of epochs
    print(f"Epoch {epoch + 1}")
    model.train()
    train_loss = 0
    loop = tqdm(train_loader, leave=True)
    for batch in loop:
        batch = {key: val.to(device) for key, val in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        train_loss += loss.item()
        loop.set_description(f"Loss: {loss.item():.4f}")

    print(f"Epoch {epoch + 1} training loss: {train_loss / len(train_loader):.4f}")

# Save the model
model.save_pretrained("./fine_tuned_bert_binary")
tokenizer.save_pretrained("./fine_tuned_bert_binary")

# Inference
def predict(text):
    model.eval()
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.sigmoid(outputs.logits).cpu().numpy()
    return predictions

# Example inference
example_text = "Immediately my throat tightens."
predicted_scores = predict(example_text)

# Display predicted scores
predicted_scores

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1


Loss: 0.3103: 100%|██████████| 277/277 [00:53<00:00,  5.17it/s]


Epoch 1 training loss: 0.4077
Epoch 2


Loss: 0.3483: 100%|██████████| 277/277 [00:54<00:00,  5.07it/s]


Epoch 2 training loss: 0.3149
Epoch 3


Loss: 0.2023: 100%|██████████| 277/277 [00:54<00:00,  5.13it/s]


Epoch 3 training loss: 0.2775


array([[0.85529304, 0.08718282, 0.04910556, 0.03282693, 0.14159574,
        0.38415357, 0.30925506, 0.08395925, 0.9509713 , 0.05647148,
        0.029693  , 0.02515816, 0.49717483, 0.2410192 , 0.11889549,
        0.06622875, 0.8189266 , 0.12567945, 0.05551054, 0.03964295]],
      dtype=float32)

In [3]:
from sklearn.metrics import f1_score

# Validation Loop and F1 Score Calculation
model.eval()
val_predictions = []
val_true_labels = []
with torch.no_grad():
    for batch in tqdm(val_loader, leave=True):
        batch = {key: val.to(device) for key, val in batch.items()}
        outputs = model(**batch)
        predictions = torch.sigmoid(outputs.logits).cpu().numpy()  # Convert to probabilities
        val_predictions.extend(predictions)
        val_true_labels.extend(batch["labels"].cpu().numpy())

# Convert to binary predictions using a threshold (e.g., 0.5)
val_predictions = np.array(val_predictions)
val_true_labels = np.array(val_true_labels)
binary_predictions = (val_predictions > 0.5).astype(int)

# Compute F1 score
f1 = f1_score(val_true_labels, binary_predictions, average="micro")  # Micro-average for multi-label classification
print(f"Validation F1 Score: {f1:.4f}")


100%|██████████| 70/70 [00:03<00:00, 23.08it/s]


Validation F1 Score: 0.7231
