<a href="https://colab.research.google.com/github/KaifAhmad1/LLM-FineTuning-for-Sentiment-Classification/blob/main/Phi_2_for_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Installing Dependencies:**

In [14]:
!pip install -qU \
     accelerate \
     peft \
     einops \
     datasets \
     bitsandbytes \
     trl \
     tramsformers \
     datasets

[31mERROR: Could not find a version that satisfies the requirement tramsformers (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for tramsformers[0m[31m
[0m

**Imports:**

In [15]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from transformers import (
    AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments,
    pipeline, logging
)
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

In [18]:
filename = "/content/drive/MyDrive/all_financial_sentiment_data.csv"

financial_headlines = pd.read_csv(filename,
                 names=["sentiment", "text"],
                 encoding="utf-8", encoding_errors="replace")

**Data Exploration:**

In [19]:
financial_headlines

Unnamed: 0,sentiment,text
0,neutral,"According to Gran , the company has no plans t..."
1,neutral,Technopolis plans to develop in stages an area...
2,negative,The international electronic industry company ...
3,positive,With the new production plant the company woul...
4,positive,According to the company 's updated strategy f...
...,...,...
4841,negative,LONDON MarketWatch -- Share prices ended lower...
4842,neutral,Rinkuskiai 's beer sales fell by 6.5 per cent ...
4843,negative,Operating profit fell to EUR 35.4 mn from EUR ...
4844,negative,Net sales of the Paper segment decreased to EU...


In [22]:
financial_headlines.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4846 entries, 0 to 4845
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   sentiment  4846 non-null   object
 1   text       4846 non-null   object
dtypes: object(2)
memory usage: 75.8+ KB


In [23]:
# Finding Label Distribution
sentiment_distribution = financial_headlines['sentiment'].value_counts()
print(sentiment_distribution)

neutral     2879
positive    1363
negative     604
Name: sentiment, dtype: int64


**Data Preparation:**

In [89]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from datasets import Dataset

In [90]:
# Split the dataset into training and testing sets for each sentiment
X_train = list()
X_test = list()

for sentiment in ["positive", "neutral", "negative"]:
    train, test = train_test_split(financial_headlines[financial_headlines.sentiment==sentiment],
                                   train_size=300,
                                   test_size=300,
                                   random_state=42)
    X_train.append(train)
    X_test.append(test)

In [91]:
# Concatenate the training and testing sets for each sentiment
X_train = pd.concat(X_train).sample(frac=1, random_state=10)
X_test = pd.concat(X_test)

In [92]:
# Create an evaluation set from the remaining data points
eval_idx = [idx for idx in financial_headlines.index if idx not in list(train.index) + list(test.index)]
X_eval = financial_headlines[financial_headlines.index.isin(eval_idx)]
X_eval = (X_eval
          .groupby('sentiment', group_keys=False)
          .apply(lambda x: x.sample(n=50, random_state=10, replace=True)))
X_train = X_train.reset_index(drop=True)

In [93]:
# Function to generate a prompt for training data
def generate_prompt(data_point):
    return f"""The sentiment of the following phrase: '{data_point["text"]}' is
            \n\n Positive
            \n Negative
            \n Neutral
            \n Cannot be determined
            \n\nSolution: The correct option is {data_point["sentiment"]}""".strip()

# Function to generate a prompt for evaluation data
def generate_test_prompt(data_point):
    return f"""The sentiment of the following phrase: '{data_point["text"]}' is
            \n\n Positive
            \n Negative
            \n Neutral
            \n Cannot be determined
            \n\nSolution: The correct option is""".strip()

In [94]:
# Create DataFrames for training and evaluation data with generated prompts
X_train = pd.DataFrame(X_train.apply(generate_prompt, axis=1),
                       columns=["text"])
X_eval = pd.DataFrame(X_eval.apply(generate_prompt, axis=1),
                      columns=["text"])

In [95]:
# Extract true labels for testing set
y_true = X_test.sentiment

# Create a DataFrame for testing data with generated prompts
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])

In [96]:
# Create datasets for training and evaluation using the datasets library
train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

**Evaluation:**

In [103]:
def evaluate(y_true, y_pred):
    # Define labels and mapping for sentiment categories
    labels = ['positive', 'neutral', 'negative']
    mapping = {'positive': 2, 'neutral': 1, 'none': 1, 'negative': 0}

    # Function to map labels to numerical values
    def map_func(x):
        return mapping.get(x, 1)

    # Map true and predicted labels using the mapping function
    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)

    # Calculate overall accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Overall Accuracy: {accuracy:.3f}')

    # Generate accuracy report for each sentiment label
    unique_labels = set(y_true)  # Get unique labels

    for label in unique_labels:
        # Find indices where true labels match the current sentiment label
        label_indices = [i for i in range(len(y_true)) if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]

        # Calculate accuracy for the current sentiment label
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {accuracy:.3f}')

    # Generate classification report for precision, recall, and F1-score
    class_report = classification_report(y_true=y_true, y_pred=y_pred, target_names=labels)
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix to analyze the performance of the model
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1, 2])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [104]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Configuration for quantization
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

# Load pre-trained model with quantization config
model_name = "microsoft/phi-2"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    device_map="auto",
    quantization_config=bnb_config,
)

# Customize model configuration
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load tokenizer for the same pre-trained model
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True,
)

# Set pad_token to eos_token for consistency
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [105]:
from tqdm import tqdm
from transformers import pipeline

def predict(X_test, model, tokenizer):
    # List to store predicted labels
    y_pred = []

    # Loop through each data point in the test set
    for i in tqdm(range(len(X_test))):
        # Get the prompt from the test set
        prompt = X_test.iloc[i]["text"]

        # Create a text generation pipeline using the specified model and tokenizer
        pipe = pipeline(task="text-generation",
                        model=model,
                        tokenizer=tokenizer,
                        max_new_tokens=3,
                        temperature=0.0,
                       )

        # Generate text based on the prompt
        result = pipe(prompt, pad_token_id=pipe.tokenizer.eos_token_id)

        # Extract the generated text and identify the sentiment label
        answer = result[0]['generated_text'].split("The correct option is")[-1].lower()
        if "positive" in answer:
            y_pred.append("positive")
        elif "negative" in answer:
            y_pred.append("negative")
        elif "neutral" in answer:
            y_pred.append("neutral")
        else:
            y_pred.append("none")

    return y_pred

In [106]:
y_pred = predict(X_test, model, tokenizer)

100%|██████████| 900/900 [04:21<00:00,  3.44it/s]


In [107]:
evaluate(y_true, y_pred)

Overall Accuracy: 0.349
Accuracy for label positive: 0.073
Accuracy for label neutral: 0.863
Accuracy for label negative: 0.110

Classification Report:
              precision    recall  f1-score   support

    positive       0.92      0.07      0.14       300
     neutral       0.33      0.86      0.47       300
    negative       0.39      0.11      0.17       300

    accuracy                           0.35       900
   macro avg       0.55      0.35      0.26       900
weighted avg       0.55      0.35      0.26       900


Confusion Matrix:
[[ 22 266  12]
 [  2 259  39]
 [  0 267  33]]
