In [1]:
import os
import sys
import torch
from datasets import load_dataset
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
    PeftType,
    TaskType,
    PeftModelForSequenceClassification
)
from transformers import LlamaTokenizer ,LlamaForSequenceClassification ,LlamaForCausalLM,Trainer,TrainingArguments
import evaluate
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm



Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /home/ljc/anaconda3/envs/adv/lib/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /home/ljc/anaconda3/envs/adv/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda118.so...


In [2]:
torch.cuda.empty_cache()
device = "cuda" if torch.cuda.is_available() else "cpu"
base_model = "../ljcllama-7b-hf"
device_map = "auto"
datasets = load_dataset("csv", data_files={"train":"train.csv","test":"valid.csv"})

Found cached dataset csv (/home/ljc/.cache/huggingface/datasets/csv/default-360cc5e9d91b8249/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
100%|██████████| 2/2 [00:00<00:00, 1280.51it/s]


In [3]:
model = LlamaForSequenceClassification.from_pretrained(
        base_model,
        num_labels = 66,
        load_in_8bit = True,
        torch_dtype = torch.float16,
        device_map = device_map,
        return_dict = True
    )

tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer.pad_token = -100
model.resize_token_embeddings(len(tokenizer))

Loading checkpoint shards: 100%|██████████| 3/3 [00:12<00:00,  4.17s/it]


Embedding(32000, 4096, padding_idx=31999)

In [4]:
def compute_metrics(eval_pred):
        metirc = evaluate.load("accuracy")
        logits , labels = eval_pred
        predictions = np.argmax(logits,axis=-1)
        return metirc.compute(predictions=predictions,references=labels)


def tokenize_function(examples):
        return tokenizer(examples["text"],padding = "max_length",truncation = True) 

In [5]:
model = PeftModelForSequenceClassification.from_pretrained(model,"llama-for-attack-ada")

In [6]:
tokenized_dataset = datasets.map(tokenize_function,batched=True)
small_train_dataset = tokenized_dataset["train"].shuffle(seed=42)
small_eval_dataset = tokenized_dataset["test"].shuffle(seed=42)

Loading cached processed dataset at /home/ljc/.cache/huggingface/datasets/csv/default-360cc5e9d91b8249/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-ce3b874b0a570f03.arrow
Loading cached processed dataset at /home/ljc/.cache/huggingface/datasets/csv/default-360cc5e9d91b8249/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-e718a69fe985bb0d.arrow
Loading cached shuffled indices for dataset at /home/ljc/.cache/huggingface/datasets/csv/default-360cc5e9d91b8249/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-d46da5b66211fd10.arrow
Loading cached shuffled indices for dataset at /home/ljc/.cache/huggingface/datasets/csv/default-360cc5e9d91b8249/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-b6131a171a54aa7d.arrow


In [7]:
evaldataset = tokenized_dataset['test'].remove_columns('text').rename_column('label','labels')
from torch.utils.data import DataLoader
from torch.functional import F
def col_fn(examples):
    return tokenizer.pad(examples,padding="max_length",return_tensors="pt")
evaldataset = DataLoader(evaldataset,collate_fn=col_fn,batch_size=1)
from tqdm import tqdm
from torch.nn import Softmax
count = 0
model.eval()
model.to(device)
for step,batch in enumerate(tqdm(evaldataset)):
    batch.to(device)
    with torch.no_grad():
        output = model(**batch)
    count += int(batch['labels']== torch.argmax(F.softmax(output.logits,dim=1),dim=1))
print(count/len(evaldataset))


100%|██████████| 132/132 [01:12<00:00,  1.83it/s]

0.007575757575757576





In [8]:
count

1

In [1]:
for batch in evaldataset:
    print(batch.input_ids)
    with torch.no_grad():
        output = model(**batch)
    label = torch.argmax(Softmax(output.logits).dim,dim=1)
    print(label)
    print(cls(tokenizer.decode(batch['input_ids'][0],skip_special_tokens=True)))
    break

NameError: name 'evaldataset' is not defined

In [94]:
from torch.functional import F

In [17]:
def llamapipeline(model,tokenzier):
    def cls(example):
        input = tokenzier(example,padding="max_length",truncation=True,return_tensors = "pt")
        print(input)
        with torch.no_grad():
            output = model(**input)
        s = F.softmax(output.logits.float(),dim=1)
        print(s)
        score,label = torch.max(s,dim=1)
        ret = []
        for i in range(label.shape[0]):
            ret.append({"label":int(label[i]),"score":float(score[i])})
        return ret

    return cls    
    

In [18]:
cls = llamapipeline(model,tokenizer)

In [19]:
cls("123")

{'input_ids': tensor([[    0, 29871, 29896,  ...,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0]])}
tensor([[3.1966e-04, 6.1655e-03, 3.5050e-02, 3.5187e-02, 9.4335e-03, 2.9101e-05,
         3.7977e-01, 1.1995e-02, 3.1851e-02, 4.6585e-03, 3.5182e-03, 2.0558e-04,
         3.2719e-05, 6.8603e-04, 1.8609e-04, 1.3879e-03, 1.3148e-02, 2.7990e-04,
         1.0461e-03, 2.6092e-03, 1.9777e-02, 7.2318e-04, 1.0835e-03, 5.9016e-05,
         1.4436e-04, 4.7720e-02, 3.3179e-03, 3.5102e-05, 4.2458e-03, 6.9898e-03,
         2.6865e-04, 9.3418e-03, 1.1007e-02, 8.1322e-03, 6.3426e-03, 4.7072e-02,
         8.2832e-04, 4.6343e-02, 5.7114e-02, 6.3488e-03, 2.3203e-03, 1.5599e-02,
         1.6782e-03, 5.5919e-03, 1.2344e-03, 2.1342e-02, 5.9778e-04, 6.9548e-04,
         2.4269e-03, 8.6879e-05, 8.1297e-05, 5.2275e-03, 3.0617e-05, 2.8428e-03,
         2.1569e-03, 1.3757e-03, 3.6646e-03, 6.4133e-04, 6.4437e-05, 6.8490e-02,
         8.0205e-04, 1.3017e-04, 2.0558e-04, 4.3876e-02, 3.330

[{'label': 6, 'score': 0.37977394461631775}]

In [106]:
from transformers import pipeline
cls2 = pipeline("sentiment-analysis",model=model,tokenizer=tokenizer)

The model 'PeftModelForSequenceClassification' is not supported for sentiment-analysis. Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceClassification', 'ErnieForSequenceClassification', 'ErnieMForSequenceClassification', 'EsmForSequenceClassification', 'FlaubertForSequenceClassification', 'FNetForSequenceClassification', 'FunnelForSequenceClassification', 'GPT2ForSequenceClassification', 'GPT2ForSequenceClassification', 'GPTBigCodeForSequenceClassification', 'GPTNeoForSequenceClassification

In [107]:
cls2("123")

[{'label': 'LABEL_37', 'score': 0.7037458419799805}]

In [26]:
for line in model.model.score.parameters():
    print(line)

Parameter containing:
tensor([[-0.0023,  0.0010, -0.0005,  ..., -0.0118,  0.0149,  0.0105],
        [ 0.0248, -0.0052,  0.0196,  ..., -0.0090, -0.0193, -0.0094],
        [ 0.0044, -0.0310, -0.0143,  ..., -0.0125, -0.0143,  0.0019],
        ...,
        [ 0.0131, -0.0314, -0.0373,  ..., -0.0393, -0.0089, -0.0059],
        [ 0.0195, -0.0220, -0.0307,  ..., -0.0327, -0.0201, -0.0332],
        [ 0.0171, -0.0031, -0.0016,  ...,  0.0144,  0.0109, -0.0280]],
       device='cuda:0', dtype=torch.float16, requires_grad=True)
Parameter containing:
tensor([[-0.0023,  0.0010, -0.0005,  ..., -0.0118,  0.0149,  0.0105],
        [ 0.0248, -0.0052,  0.0196,  ..., -0.0090, -0.0193, -0.0094],
        [ 0.0044, -0.0310, -0.0143,  ..., -0.0125, -0.0143,  0.0019],
        ...,
        [ 0.0131, -0.0314, -0.0373,  ..., -0.0393, -0.0089, -0.0059],
        [ 0.0195, -0.0220, -0.0307,  ..., -0.0327, -0.0201, -0.0332],
        [ 0.0171, -0.0031, -0.0016,  ...,  0.0144,  0.0109, -0.0280]],
       device='cuda:0',

In [4]:
for line in model.score.parameters():
    print(line)

Parameter containing:
tensor([[-0.0023,  0.0010, -0.0005,  ..., -0.0118,  0.0149,  0.0105],
        [ 0.0248, -0.0052,  0.0196,  ..., -0.0090, -0.0193, -0.0094],
        [ 0.0044, -0.0310, -0.0143,  ..., -0.0125, -0.0143,  0.0019],
        ...,
        [ 0.0131, -0.0314, -0.0373,  ..., -0.0393, -0.0089, -0.0059],
        [ 0.0195, -0.0220, -0.0307,  ..., -0.0327, -0.0201, -0.0332],
        [ 0.0171, -0.0031, -0.0016,  ...,  0.0144,  0.0109, -0.0280]],
       device='cuda:0', dtype=torch.float16, requires_grad=True)


: 