In [None]:
!pip install -q -U peft bitsandbytes

In [None]:
# Import necessary libraries
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
# Specify the identifier for the pre-trained Peft model
peft_model_id = "Tirendaz/mistral-7b-dolly"

# Load the configuration for the Peft model
config = PeftConfig.from_pretrained(peft_model_id)

In [None]:
from transformers import BitsAndBytesConfig

# Load the base model for Causal Language Modeling with specified settings
# - Use AutoModelForCausalLM to load the model.
# - Set return_dict=True to enable returning model outputs as dictionaries.
# - Enable 4-bit weight quantization with load_in_4bit=True.
# - Set device_map='auto' to automatically allocate the model on available devices.
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    load_in_4bit=True,
    device_map='auto'
)

In [None]:
# Load the tokenizer associated with the base model
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,
                                          padding_side = "right",
                                          add_eos_token = True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
# Load the PeftModel, which applies the Peft (Perturbed Embeddings for Few-shot Text Classification) method
# to the base model, using the specified pre-trained Peft model identifier
fine_tuned_model = PeftModel.from_pretrained(model, peft_model_id)

In [None]:
from transformers import pipeline, logging

logging.set_verbosity(logging.CRITICAL)

pipe_finetuned = pipeline(task="text-generation", 
                          model=fine_tuned_model, 
                          tokenizer=tokenizer, 
                          eos_token_id=model.config.eos_token_id,
                          max_new_tokens=100)

In [None]:
prompt = """
<s>[INST] What is a Python?  Here is some context: Python is a high-level, general-purpose programming language. [/INST] 
"""

In [None]:
result = pipe_finetuned(
    prompt, 
    do_sample=True,
    max_new_tokens=100, 
)
print(result[0]['generated_text'])