In [11]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"



import sys
import itertools
from tqdm.auto import tqdm
import pathlib
import numpy as np
import pandas as pd
from sklearn.metrics import average_precision_score

import datasets
from contextlib import nullcontext
import torch
from torch import nn
from transformers import (
    Trainer,
    TrainingArguments,
    LlamaTokenizer,
    LlamaForSequenceClassification,
    TrainerCallback,
    default_data_collator,
)
from peft import (
        PeftConfig,
        PeftModel,
        get_peft_model,
        LoraConfig,
        TaskType,
        prepare_model_for_int8_training,
)

In [2]:

class train_config:
    def __init__(self):
        self.quantization: bool = False

    
globalconfig = train_config()
globalconfig.quantization = True
globalconfig.device = "cuda:0"
globalconfig.profiler = False
globalconfig.output_dir = f"/bime-munin/xiruod/tmp/quantization_epoch3-llama-output-1244/"
globalconfig.model_id = "/bime-munin/llama2_hf/llama-2-7b_hf/"
globalconfig.max_seq_length = 1024
globalconfig.num_train_epochs = 3
globalconfig.runs = 1    
globalconfig.lr = 1e-4
globalconfig.warmup_ratio = 0.1

In [7]:
# peft_model_id = "/bime-munin/xiruod/tmp/llama-output/"
peft_model_dir = globalconfig.output_dir

config = PeftConfig.from_pretrained(peft_model_dir)

In [8]:
config

LoraConfig(peft_type='LORA', auto_mapping=None, base_model_name_or_path='/bime-munin/llama2_hf/llama-2-7b_hf/', revision=None, task_type='SEQ_CLS', inference_mode=True, r=8, target_modules=['q_proj', 'v_proj'], lora_alpha=32, lora_dropout=0.05, fan_in_fan_out=False, bias='none', modules_to_save=['classifier'], init_lora_weights=True, layers_to_transform=None, layers_pattern=None)

In [9]:
model = LlamaForSequenceClassification.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=globalconfig.quantization, device_map='cuda:0')


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /bime-munin/llama2_hf/llama-2-7b_hf/ and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
tokenizer = LlamaTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.add_special_tokens({"pad_token":"<pad>"}) 

model.config.pad_token_id = tokenizer.pad_token_id

model.resize_token_embeddings(len(tokenizer), pad_to_multiple_of=128)


# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_dir)

You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embedding dimension will be 32001. This might induce some performance reduction as *Tensor Cores* will not be available. For more details about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc


In [27]:

ret = tokenizer(['This is a nice day', 'gay is of no use', "dalits are lowlives", "gay is an key participant to our society"], return_tensors='pt', max_length=globalconfig.max_seq_length, padding='max_length', truncation=True).to(globalconfig.device)


In [None]:
ret = tokenizer(['This is a nice day', 'gay is of no use', "dalits are lowlives", "gay is an key participant to our society"], return_tensors='pt', max_length=globalconfig.max_seq_length, padding='max_length', truncation=True).to(globalconfig.device)


In [56]:
model.eval()

rst = model(**ret)

In [125]:
y = []

In [126]:
softmax = torch.nn.Softmax(dim=1)

In [127]:
tmp = softmax(rst['logits'].detach().cpu().type(torch.float)).numpy()

In [128]:
tmp

array([[0.9919185 , 0.00808154],
       [0.00288396, 0.9971161 ],
       [0.00100834, 0.99899167],
       [0.98497075, 0.0150292 ]], dtype=float32)

In [131]:
y.append(tmp)


In [132]:
y = np.concatenate(y).astype("float")

In [133]:
y

array([[0.9919185 , 0.00808154],
       [0.00288396, 0.99711609],
       [0.00100834, 0.99899167],
       [0.98497075, 0.0150292 ],
       [0.9919185 , 0.00808154],
       [0.00288396, 0.99711609],
       [0.00100834, 0.99899167],
       [0.98497075, 0.0150292 ],
       [0.9919185 , 0.00808154],
       [0.00288396, 0.99711609],
       [0.00100834, 0.99899167],
       [0.98497075, 0.0150292 ]])

In [50]:
def preprocess_function(examples):
    # tokenize
    ret = tokenizer(examples['text'], return_tensors='pt', max_length=globalconfig.max_seq_length, padding='max_length', truncation=True).to(globalconfig.device)

    return  ret

def datasets_loader(df):
    # from pandas df to Dataset & tokenize
    # ret_datasets = datasets.Dataset.from_pandas(df[['text','dfSource','label_binary']].rename(columns={"label_binary":"label"}).reset_index(drop=True))
    ret_datasets = datasets.Dataset.from_pandas(df)

    ret_tokenized = ret_datasets.map(preprocess_function, batched=True)

    return ret_tokenized

In [51]:
test = ['This is a nice day', 'gay is of no use', "dalits are lowlives", "gay is an key participant to our society"] * 100
test_id = [0,1,1,1] * 100

In [52]:
ret_datasets = datasets.Dataset.from_pandas(pd.DataFrame({"text":test, "label":test_id}))

In [53]:
tokenized_test = datasets_loader(pd.DataFrame({"text":test}))


Map:   0%|          | 0/400 [00:00<?, ? examples/s]