In [None]:
# !pip install torch transformers peft accelerate bitsandbytes

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import torch

# Load LoRA config
PATH_1 = 'weights/llama3-1B'
PATH_3 = 'weights/llama3-3B'
PATH_8 = 'weights/llama3-8B'

def get_pretrain(path):

    config = PeftConfig.from_pretrained(path)
    
    # Load base model (same as when you fine-tuned)
    base_model = AutoModelForCausalLM.from_pretrained(
        config.base_model_name_or_path,
        device_map="auto",
        torch_dtype=torch.float16,
        load_in_4bit=True,
    )
    
    # Load LoRA adapter on top of base model
    model = PeftModel.from_pretrained(base_model, path)
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(path)
    
    # # Add special tokens if used during training
    # tokenizer.add_special_tokens({"eos_token": "<|eot_id|>"})
    # model.resize_token_embeddings(len(tokenizer))
    
    model.eval()
    return model, tokenizer

model_1, tokenizer_1 = get_pretrain(PATH_1)
# model_3, tokenizer_3 = get_pretrain(PATH_3)
# model_8, tokenizer_8 = get_pretrain(PATH_8)

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [2]:
def ask(question, model, tokenizer):
    prompt = f"### Input:\n{question}\n\n### Response:\n"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id,
        )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

    if "### Response:" in decoded:
        response = decoded.split("### Response:")[1].split("### Input:")[0].strip()
        response += "\n<EOS>"
    else:
        response = decoded[len(prompt):].strip()

    return response

print(ask("What is the list of responsibility of a Senior Tax and Accounting Specialist?", model_1, tokenizer_1))
print('*' * 100)
# print(ask("What is the list of responsibility of a Senior Tax and Accounting Specialist?", model_3, tokenizer_3))
# print('*' * 100)
# print(ask("What is the list of responsibility of a Senior Tax and Accounting Specialist?", model_8, tokenizer_8))

- Reviewing and auditing tax returns
- Performing tax preparation and tax research
- Processing tax returns for preparation and submission
- Developing tax reports and analyzing tax data
- Performing tax calculations and computations
- Maintaining tax records and reports
- Collaborating with tax professionals and other departments
- Performing tax compliance checks
- Identifying tax issues and preparing solutions
- Providing tax-related information and advice
- Updating tax regulations and laws
- Advising and training staff on tax matters
<EOS>
****************************************************************************************************
