In [1]:
import torch
from transformers import AutoTokenizer
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Modify it if you have more gpus, e.g., "0,1,2,3" if you have 4 GPUs
os.environ["NCCL_P2P_DISABLE"]="1"
os.environ["NCCL_IB_DISABLE"]="1"
os.environ['CURL_CA_BUNDLE'] = ''
os.environ['REQUESTS_CA_BUNDLE'] = ''
import warnings
from transformers import (AutoModelForSequenceClassification, 
                          BitsAndBytesConfig)
from transformers import DataCollatorWithPadding
import pandas as pd
from huggingface_hub import login
import datasets
from datasets import Dataset
# login(token='') # Create a token in your huggingface account and use it here it
from transformers import TrainingArguments, Trainer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from peft import (LoraConfig, 
                  PeftConfig, 
                  get_peft_model, 
                  prepare_model_for_kbit_training,
                  PeftModel) 
import warnings
warnings.filterwarnings('ignore')
import torch.nn.functional as F

checkpoint = "meta-llama/Llama-2-7b-hf"
output_dir = "llama2_sa" 

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\mirad\.cache\huggingface\token
Login successful


In [2]:
# Data Preperation
df = pd.read_csv('zero_shot.csv')
df = df.sample(1000)
test = pd.read_csv('test_gen.csv')
test = test.sample(3000)
df_tr, df_te = train_test_split(df, test_size=0.2)
train_dataset = Dataset.from_dict(df_tr)
test_dataset = Dataset.from_dict(df_te)
my_dataset_dict = datasets.DatasetDict({"train":train_dataset,
                                        "test":test_dataset})

# Tokenizer definition
tokenizer=AutoTokenizer.from_pretrained(checkpoint)

# Function to tokenize the data
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=1024)
# Use the function
tokenized_text = my_dataset_dict.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [3]:
# Define labels
id2label = {0: "negative", 1: "neutral", 2: "positive"}
label2id = {"negative": 0, "neutral": 1, "positive": 2}

#Quantization Configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, 
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_use_double_quant = True, 
    bnb_4bit_compute_dtype = torch.bfloat16 
)

# Define model
model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint,
    num_labels=3,
    id2label=id2label, 
    label2id=label2id,
    quantization_config=quantization_config,
    device_map='auto')

# to determine target_modeuls, use print(model) to identify "Linear" layers
# use the name between parentheses
lora_config = LoraConfig(
    r = 24, 
    lora_alpha = 8,
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout = 0.05, 
    bias = 'none',
    task_type = 'SEQ_CLS'
)

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

model.config.pad_token_id = model.config.eos_token_id
model.config.use_cache = False

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 25,178,112 || all params: 6,632,538,112 || trainable%: 0.37961503688077114


In [4]:
# Use accuarcy for evaluation metrics 
def compute_metrics(evaluations):
    predictions, labels = evaluations
    predictions = np.argmax(predictions, axis=1)
    return {'accuracy':accuracy_score(predictions,labels)}

# A custom trainer for llama2-7B
class CustomTrainer(Trainer):
    def __init__(self, *args, class_weights=None, **kwargs):
        super().__init__(*args, **kwargs)
        if class_weights is not None:
            self.class_weights = torch.tensor(class_weights, 
            dtype=torch.float32).to(self.args.device)
        else:
            self.class_weights = None

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels").long()

        outputs = model(**inputs)

        logits = outputs.get('logits')

        if self.class_weights is not None:
            loss = F.cross_entropy(logits, labels, weight=self.class_weights)
        else:
            loss = F.cross_entropy(logits, labels)

        return (loss, outputs) if return_outputs else loss

In [7]:
# Fine tuning
# Skip this block if already have done the fine tuning
training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=1e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_checkpointing=True,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_steps = 25,
    evaluation_strategy="epoch",
    bf16=True,
    save_strategy="no")

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_text["train"],
    eval_dataset=tokenized_text["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics)
# Fine tuning
trainer.train()
# save the trained model 
trainer.save_model()

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in 'C:\\Users\\mirad' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.
[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/mohdrad/huggingface/1b37a8270a1f4eee98460613c31c97e0



Epoch,Training Loss,Validation Loss,Accuracy
1,1.8978,1.89415,0.325
2,1.286,1.396044,0.345
3,1.267,1.346163,0.365
4,1.2763,1.323793,0.35
5,1.2213,1.319913,0.345


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : violet_worm_6288
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/mohdrad/huggingface/1b37a8270a1f4eee98460613c31c97e0
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     epoch [16]                     : (0.5, 5.0)
[1;38;5;39mCOMET INFO:[0m     eval/accuracy [5]              : (0.325, 0.365)
[1;38;5;39mCOMET INFO:[0m     eval/loss [5]                  : (1.3199130296707153, 1.8941497802734375)
[1;38;5;39mCOMET INFO:[0m     eval/runtime [5]               : (31.2526, 31.4584)
[1;38;5;3

[1;38;5;39mCOMET INFO:[0m     args/ignore_data_skip                   : False
[1;38;5;39mCOMET INFO:[0m     args/include_inputs_for_metrics         : False
[1;38;5;39mCOMET INFO:[0m     args/include_num_input_tokens_seen      : False
[1;38;5;39mCOMET INFO:[0m     args/include_tokens_per_second          : False
[1;38;5;39mCOMET INFO:[0m     args/jit_mode_eval                      : False
[1;38;5;39mCOMET INFO:[0m     args/label_names                        : None
[1;38;5;39mCOMET INFO:[0m     args/label_smoothing_factor             : 0.0
[1;38;5;39mCOMET INFO:[0m     args/learning_rate                      : 1e-05
[1;38;5;39mCOMET INFO:[0m     args/length_column_name                 : length
[1;38;5;39mCOMET INFO:[0m     args/load_best_model_at_end             : False
[1;38;5;39mCOMET INFO:[0m     args/local_process_index                : 0
[1;38;5;39mCOMET INFO:[0m     args/local_rank                         : 0
[1;38;5;39mCOMET INFO:[0m     args/log_level    

[1;38;5;39mCOMET INFO:[0m     config/exponential_decay_length_penalty : None
[1;38;5;39mCOMET INFO:[0m     config/finetuning_task                  : None
[1;38;5;39mCOMET INFO:[0m     config/forced_bos_token_id              : None
[1;38;5;39mCOMET INFO:[0m     config/forced_eos_token_id              : None
[1;38;5;39mCOMET INFO:[0m     config/hidden_act                       : silu
[1;38;5;39mCOMET INFO:[0m     config/hidden_size                      : 4096
[1;38;5;39mCOMET INFO:[0m     config/id2label                         : {0: 'negative', 1: 'neutral', 2: 'positive'}
[1;38;5;39mCOMET INFO:[0m     config/initializer_range                : 0.02
[1;38;5;39mCOMET INFO:[0m     config/intermediate_size                : 11008
[1;38;5;39mCOMET INFO:[0m     config/is_composition                   : False
[1;38;5;39mCOMET INFO:[0m     config/is_decoder                       : False
[1;38;5;39mCOMET INFO:[0m     config/is_encoder_decoder               : False
[1;38;5

In [5]:
# load saved model 
checkpoint2 = './llama2_sa'
config = PeftConfig.from_pretrained(checkpoint2)
model = PeftModel.from_pretrained(model, checkpoint2, is_trainable=True)
model.eval()

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): PeftModelForSequenceClassification(
      (base_model): LoraModel(
        (model): LlamaForSequenceClassification(
          (model): LlamaModel(
            (embed_tokens): Embedding(32001, 4096)
            (layers): ModuleList(
              (0-31): 32 x LlamaDecoderLayer(
                (self_attn): LlamaSdpaAttention(
                  (q_proj): lora.Linear4bit(
                    (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=4096, out_features=24, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=24, out_features=4096, bias=False)
                    )
                    (l

In [6]:
# Test the model on a testing set
def generate_predictions(model,df_test, bs):
    sentences = df_test.text.tolist()
    batch_size = bs  
    all_outputs = []

    for i in range(0, len(sentences), batch_size):

        batch_sentences = sentences[i:i + batch_size]

        inputs = tokenizer(batch_sentences, 
                           return_tensors="pt", 
                           padding=True, 
                           truncation=True, 
                           max_length=1024)

        inputs = {k: v.to('cuda' if torch.cuda.is_available() else 'cpu') for k, v in inputs.items()}

        with torch.no_grad():
            outputs = model(**inputs)
            all_outputs.append(outputs['logits'])
        print(i+batch_size)
       
        
    final_outputs = torch.cat(all_outputs, dim=0)
    df_test['predictions']=final_outputs.argmax(axis=1).cpu().numpy()

# call the function    
generate_predictions(model, test, 128)
y_true = test['label'] 
y_pred = test['predictions']

target_names = ['negative', 'neutral', 'positive']
print(classification_report(y_true, y_pred, target_names=target_names))

128
256
384
512
640
768
896
1024
1152
1280
1408
1536
1664
1792
1920
2048
2176
2304
2432
2560
2688
2816
2944
3072
              precision    recall  f1-score   support

    negative       0.41      0.24      0.30      1084
     neutral       0.29      0.56      0.38       846
    positive       0.35      0.23      0.28      1070

    accuracy                           0.33      3000
   macro avg       0.35      0.34      0.32      3000
weighted avg       0.35      0.33      0.32      3000

