In [1]:
!pip install -q -U peft bitsandbytes

In [2]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer



In [3]:
peft_model_id = "Tirendaz/mistral-7b-dolly-fine-tuned"

config = PeftConfig.from_pretrained(peft_model_id)

Downloading adapter_config.json:   0%|          | 0.00/584 [00:00<?, ?B/s]

In [4]:
from transformers import BitsAndBytesConfig

model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    load_in_4bit=True,
    device_map='auto'
)

Downloading config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/5.06G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [5]:
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path,
                                          padding_side = "right",
                                          add_eos_token = True)
tokenizer.pad_token = tokenizer.eos_token

Downloading tokenizer_config.json:   0%|          | 0.00/966 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [6]:
fine_tuned_model = PeftModel.from_pretrained(model, peft_model_id)

Downloading (…)er_model.safetensors:   0%|          | 0.00/85.1M [00:00<?, ?B/s]

In [7]:
from transformers import pipeline, logging

logging.set_verbosity(logging.CRITICAL)

pipe = pipeline(
    task="text-generation", 
    model=fine_tuned_model, 
    tokenizer=tokenizer, 
    eos_token_id=model.config.eos_token_id,
    max_new_tokens=100)

In [8]:
prompt = """
What is a Python?  Here is some context: Python is a high-level, general-purpose programming language.
"""
pipe = pipeline(task="text-generation", 
                model=fine_tuned_model, 
                tokenizer=tokenizer, 
                eos_token_id=model.config.eos_token_id, 
                max_new_tokens=100)

result = pipe(f"<s>[INST] {prompt} [/INST]")
generated = result[0]['generated_text']
print(generated[generated.find('[/INST]')+8:])



Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant whitespace. Its syntax allows programmers to express concepts in fewer lines of code than would be possible in languages such as C++ or Java. The language provides constructs intended to enable clear programming on both a small and large scale.

Python is dynamically typed and garbage-collected. It has a large and comprehensive standard library. Used in a


In [9]:
prompt = """
Please summarize what Linkedin does. Here is some context: LinkedIn is a business and employment-focused social media platform
"""
pipe = pipeline(task="text-generation", 
                model=fine_tuned_model, 
                tokenizer=tokenizer, 
                eos_token_id=model.config.eos_token_id, 
                max_new_tokens=100)

result = pipe(f"<s>[INST] {prompt} [/INST]")
generated = result[0]['generated_text']
print(generated[generated.find('[/INST]')+8:])

LinkedIn is a business and employment-focused social media platform. It is used for professional networking, including employers posting jobs and job seekers posting their resumes. LinkedIn was founded in 2002 and is headquartered in Sunnyvale, California. It is owned by Microsoft.

LinkedIn is a business and employment-focused social media platform. It is used for professional networking, including employers posting jobs and job seekers posting their resumes.
