In [1]:
import os
import sys
from typing import List


import torch
import transformers
from datasets import load_dataset

"""
Unused imports:
import torch.nn as nn
import bitsandbytes as bnb
"""

from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
)
from peft import PeftModel
from transformers import LlamaForCausalLM, LlamaTokenizer

from utils.prompter import Prompter
# lora hyperparams
lora_r = 8
lora_alpha = 16
lora_dropout = 0.05
lora_target_modules = [
    "q_proj",
    "v_proj",
]

prompt_template_name = "alpaca"  # The prompt template to use, will default to alpaca.
device_map = "auto"

base_model = "/root/llama-7b-hf"  # the only required argument
data_path = "train_data_3_class_clean.jsonl"
output_dir = "/root/autodl-tmp/output"
lora_weights = "/root/autodl-tmp/checkpoint-5800"
# resume_from_checkpoint = "/root/autodl-tmp/output/checkpoint-3700"
resume_from_checkpoint=None


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /root/miniconda3/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda118.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /root/miniconda3/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [2]:
prompter = Prompter(prompt_template_name)
tokenizer = LlamaTokenizer.from_pretrained(base_model)

tokenizer.pad_token_id = (
    0  # unk. we want this to be different from the eos token
 )
tokenizer.padding_side = "left"  # Allow batched inference

In [3]:
model = LlamaForCausalLM.from_pretrained(
        base_model,
        load_in_8bit=False,
        torch_dtype=torch.float16,
        device_map=device_map,
    )
model = PeftModel.from_pretrained(
    model,
    lora_weights,
    torch_dtype=torch.float16,
)


The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

In [30]:
import json
from torch.utils.data import DataLoader
import time
import torch.nn.functional as F

# instructions = json.load(open("data/alpaca_data.json"))
instructions = []
with open(data_path, 'r') as f:
    for line in f:
        data = json.loads(line)
        data_point = {}
        data_point["instruction"] = 'What is the sentiment toward Bitcoin in the input sentence? [positive, negative, neutral]'
        data_point["input"] = data['text']
        data_point["output"] = data['label']
        full_prompt = prompter.generate_prompt(
            data_point["instruction"],
            data_point["input"],
        )
        instructions.append({'context':full_prompt, 'target':data['label']})

# print(instructions[0])

tokenid_map = {
    'pos': 1066,
    'negative': 22198,
    'neut': 17821
}

start_time = time.time()
model.eval()
with torch.no_grad():
    right = 0
    all = 0
    batch_size = 8
    input_texts = []
    targets = []
    for idx, item in enumerate(instructions[300:]):
        # feature = format_example(item)
        # input_text = feature["context"]
        all = all + 1
        input_texts.append(item["context"])
        targets.append(item["target"])
    test_loader = DataLoader(input_texts, batch_size=batch_size)
    for batch_idx,batch in enumerate(test_loader):
        input_ids = tokenizer(batch, padding=True,return_tensors='pt').to('cuda')
        # out = model.generate(**input_ids, max_length=250, temperature=0)
        out = model.generate(
            **input_ids,
            temperature=0,
            return_dict_in_generate= True,
            output_scores=True,
            max_new_tokens = 1
        )
        seqs = out['sequences']
        scores = out['scores'][0]
        softmax = F.softmax(scores, dim=1)
        
        pos = scores[:,1066]
        neg = scores[:,22198]
        neu = scores[:,17821]
        
        pos_prob = softmax[:,1066]
        neg_prob = softmax[:,22198]
        neu_prob = softmax[:,17821]
        
        max_val, max_index = torch.max(softmax, dim=1)
        
        print(max_val, max_index)
#         print(tokenizer.decode(max_index))
        
        
        results = tokenizer.batch_decode(seqs)
        # print(results, '\n\n')
        # break
        for idx,res in enumerate(results):
            # print(res,'\n\n')
            pred = res[res.find('Response') + 10:]
            
            target = targets[batch_idx*batch_size + idx]
            print(pos[idx].item(),pred, target)
#             # print(pred, batch_idx*batch_size + idx,targets[batch_idx*batch_size + idx])\
            # if pred.find(target) >= 0:
            #     right = right + 1
            if target.find(pred) >= 0:
                right = right + 1
        print(right,all,right/all)
        break
end_time = time.time()
elapsed_time = end_time - start_time
print("Finished in {:.2f} seconds.".format(elapsed_time))





tensor([1.0000, 0.9873, 1.0000, 1.0000, 1.0000, 1.0000, 0.7075, 0.9995],
       device='cuda:0', dtype=torch.float16) tensor([ 1066, 22198, 22198,  1066,  1066,  1066, 17821, 22198],
       device='cuda:0')
36.59375 pos positive
25.765625 negative positive
20.890625 negative negative
35.9375 pos positive
36.21875 pos positive
33.5 pos positive
28.0 neut positive
19.125 negative negative
6 815 0.007361963190184049
Finished in 0.35 seconds.
