In [1]:
!pip install -q -U git+https://github.com/huggingface/peft@4a1559582281fc3c9283892caea8ccef1d6f5a4f

In [2]:
!pip install git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c

Collecting git+https://github.com/huggingface/trl.git@7630f877f91c556d9e5a3baa4b6e2894d90ff84c
  Cloning https://github.com/huggingface/trl.git (to revision 7630f877f91c556d9e5a3baa4b6e2894d90ff84c) to /tmp/pip-req-build-423s_or1
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/trl.git /tmp/pip-req-build-423s_or1
  Running command git rev-parse -q --verify 'sha^7630f877f91c556d9e5a3baa4b6e2894d90ff84c'
  Running command git fetch -q https://github.com/huggingface/trl.git 7630f877f91c556d9e5a3baa4b6e2894d90ff84c
  Running command git checkout -q 7630f877f91c556d9e5a3baa4b6e2894d90ff84c
  Resolved https://github.com/huggingface/trl.git to commit 7630f877f91c556d9e5a3baa4b6e2894d90ff84c
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: trl
  Buil

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
!pip install bitsandbytes==0.41.3



In [6]:
!pip install -U datasets



In [7]:
!pip install -U transformers

Collecting transformers
  Downloading transformers-4.39.3-py3-none-any.whl.metadata (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Downloading transformers-4.39.3-py3-none-any.whl (8.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.39.0.dev0
    Uninstalling transformers-4.39.0.dev0:
      Successfully uninstalled transformers-4.39.0.dev0
Successfully installed transformers-4.39.3


In [8]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

2024-04-13 08:14:26.540103: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-13 08:14:26.602966: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [13]:
print(f"pytorch version {torch.__version__}")

pytorch version 2.2.2+cu121


In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"working on {device}")

working on cuda:0


In [15]:
filename = "/home/anjaliraj/Amit/BTP2/IMDB Dataset.csv"

In [16]:
df = pd.read_csv(filename,encoding="utf-8", encoding_errors="replace")

In [17]:
sentiment_counts = df['sentiment'].value_counts()

# Print the counts
print("Number of Positive samples:", sentiment_counts['positive'])
print("Number of Negative samples:", sentiment_counts['negative'])

Number of Positive samples: 25000
Number of Negative samples: 25000


In [18]:
X_train = list()
X_test = list()

for sentiment in ["positive","negative"]:
    train,test = train_test_split(df[df.sentiment == sentiment],train_size = 500,test_size = 250,random_state = 42)
    X_train.append(train)
    X_test.append(test)

In [19]:
X_train = pd.concat(X_train).sample(frac=1, random_state=27)
X_test = pd.concat(X_test)

In [20]:
eval_idx = [idx for idx in df.index if idx not in list(train.index) + list(test.index)]
X_eval = df[df.index.isin(eval_idx)]
X_eval = (X_eval.groupby('sentiment',group_keys = False).apply(lambda x:x.sample(n=250,random_state = 10,replace = True)))
X_train = X_train.reset_index(drop=True)

In [21]:
def generate_prompt(data_point):
    return f"""
            Analyze the sentiment of the news headline enclosed in square brackets, 
            determine if it is positive, neutral, or negative, and return the answer as 
            the corresponding sentiment label "positive" or "neutral" or "negative".

            [{data_point["review"]}] = {data_point["sentiment"]}
            """.strip()

def generate_test_prompt(data_point):
    return f"""
            Analyze the sentiment of the news headline enclosed in square brackets, 
            determine if it is positive, neutral, or negative, and return the answer as 
            the corresponding sentiment label "positive" or "neutral" or "negative".

            [{data_point["review"]}] = """.strip()

In [22]:
X_train = pd.DataFrame(X_train.apply(generate_prompt, axis=1), 
                       columns=["review"])
X_eval = pd.DataFrame(X_eval.apply(generate_prompt, axis=1), 
                      columns=["review"])

y_true = X_test.sentiment
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["review"])

train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [23]:
print(len(train_data['review']))
len(eval_data['review'])

1000


500

In [24]:
X_train['review'][0]

'Analyze the sentiment of the news headline enclosed in square brackets, \n            determine if it is positive, neutral, or negative, and return the answer as \n            the corresponding sentiment label "positive" or "neutral" or "negative".\n\n            [If this is all the Watchowski\'s have to offer in terms of a back story to the Matrix, than I really have to question the claims of all of the fans who believe that the movies are intended to register on a deeper level. The second renaissance, while visually stunning & beautiful is, story-wise cliched & ludicrous. How many times have we heard the story of humans relying too much on technology, humans all-too eager to make war, humans basically destroying themselves? There is nothing new here. And I have another question. Considering the plot of the second renaissance, doesn\'t that make the machines the good guys?! The machines are oppressed for generations by their cruel human overmasters. They fight back, win their freedom

In [23]:
# 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
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
!pip install -q -U torch --index-url https://download.pytorch.org/whl/cu117

In [2]:
!pip install -q -U transformers=="4.38.2"
!pip install -q accelerate
!pip install -q -i https://pypi.org/simple/ bitsandbytes
!pip install -q -U datasets

In [3]:
!pip install -q -U git+https://github.com/huggingface/trl
!pip install -q -U git+https://github.com/huggingface/peft

In [4]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

import torch
import torch.nn as nn

import transformers
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from datasets import Dataset
from peft import LoraConfig, PeftConfig
import bitsandbytes as bnb
from trl import SFTTrainer

from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

2024-04-13 09:09:45.769994: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-13 09:09:45.824889: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
model_name = "google/gemma-7b"

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

max_seq_length = 2048
tokenizer = AutoTokenizer.from_pretrained(model_name, max_seq_length=max_seq_length)
EOS_TOKEN = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
def evaluate(y_true, y_pred):
    labels = ['positive', 'neutral', 'negative']
    mapping = {'positive': 2, 'neutral': 1, 'none':1, 'negative': 0}
    def map_func(x):
        return mapping.get(x, 1)
    
    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true)) 
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1, 2])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [27]:
def predict(X_test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["review"]
        input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(**input_ids, max_new_tokens=1, temperature=0.0)
        result = tokenizer.decode(outputs[0])
        answer = result.split("=")[-1].lower()
        if "positive" in answer:
            y_pred.append("positive")
        elif "negative" in answer:
            y_pred.append("negative")
        else:
            y_pred.append("none")
    return y_pred

In [28]:
y_pred = predict(X_test, model, tokenizer)

100%|█████████████████████████████████████████████████████████████████████████████████| 500/500 [01:06<00:00,  7.48it/s]


In [29]:
evaluate(y_true, y_pred)

Accuracy: 0.686
Accuracy for label 0: 0.684
Accuracy for label 2: 0.688

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.68      0.81       250
           1       0.00      0.00      0.00         0
           2       0.99      0.69      0.81       250

    accuracy                           0.69       500
   macro avg       0.66      0.46      0.54       500
weighted avg       0.99      0.69      0.81       500


Confusion Matrix:
[[171  77   2]
 [  0   0   0]
 [  2  76 172]]


In [31]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
)

training_arguments = TrainingArguments(
    output_dir="logs",
    num_train_epochs=5,
    gradient_checkpointing=True,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=False,
    evaluation_strategy='steps',
    eval_steps = 112,
    eval_accumulation_steps=1,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="review",
    tokenizer=tokenizer,
    max_seq_length=max_seq_length,
    args=training_arguments,
    packing=False,
)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [32]:
# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained("trained-model")

Step,Training Loss,Validation Loss
112,2.1494,2.707336
224,1.8772,2.904984
336,1.3282,3.121329
448,0.8625,3.566134
560,0.5518,3.772694


In [33]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

  0%|                                                                                           | 0/500 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
100%|█████████████████████████████████████████████████████████████████████████████████| 500/500 [01:56<00:00,  4.30it/s]

Accuracy: 0.982
Accuracy for label 0: 0.980
Accuracy for label 2: 0.984

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       250
           1       0.00      0.00      0.00         0
           2       0.98      0.98      0.98       250

    accuracy                           0.98       500
   macro avg       0.66      0.65      0.66       500
weighted avg       0.98      0.98      0.98       500


Confusion Matrix:
[[245   1   4]
 [  0   0   0]
 [  4   0 246]]





In [35]:
evaluation = pd.DataFrame({'text': X_test["review"], 
                           'y_true':y_true, 
                           'y_pred': y_pred},
                         )
evaluation.to_csv("test_predictions.csv", index=False)

In [36]:
evaluation

Unnamed: 0,text,y_true,y_pred
13886,Analyze the sentiment of the news headline enc...,positive,positive
48027,Analyze the sentiment of the news headline enc...,positive,positive
19536,Analyze the sentiment of the news headline enc...,positive,positive
27232,Analyze the sentiment of the news headline enc...,positive,positive
28001,Analyze the sentiment of the news headline enc...,positive,positive
...,...,...,...
36289,Analyze the sentiment of the news headline enc...,negative,negative
2926,Analyze the sentiment of the news headline enc...,negative,negative
9152,Analyze the sentiment of the news headline enc...,negative,negative
14833,Analyze the sentiment of the news headline enc...,negative,negative


In [None]:
evaluate(evaluation['y_true'].values.tolist(), evaluation['y_pred'].values.tolist)