In [None]:
! pip install -q peft  accelerate bitsandbytes safetensors gradio
!pip install git+https://github.com/huggingface/transformers

In [2]:
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import transformers
model_name = "bn22/Mistral-7B-Instruct-v0.1-sharded"
device = "cuda"

In [3]:
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config,
    device_map='auto'
)

In [7]:
from peft import PeftModel, PeftConfig
config = PeftConfig.from_pretrained("ayoubkirouane/Mistral-7b-DZ_Startups")
model = PeftModel.from_pretrained(model, "ayoubkirouane/Mistral-7b-DZ_Startups")

Downloading (…)/adapter_config.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

Downloading adapter_model.bin:   0%|          | 0.00/27.3M [00:00<?, ?B/s]

In [8]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.bos_token_id = 1
stop_token_ids = [0]

Downloading (…)okenizer_config.json:   0%|          | 0.00/963 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [15]:
def chatbot(text) :
  text = f"[INST] {text} [/INST]"
  encoded = tokenizer(text, return_tensors="pt", add_special_tokens=False)
  model_input = encoded
  model.to(device)
  generated_ids = model.generate(**model_input, max_new_tokens=512, do_sample=True)
  decoded = tokenizer.batch_decode(generated_ids)
  return decoded[0].replace(text , "")

In [None]:
import gradio as gr
# Create a Gradio interface
input_text = gr.Textbox(textarea=True, label="User Input")
output_text = gr.Textbox(textarea=True, label="Chatbot Response")

iface = gr.Interface(fn=chatbot,
                     inputs=input_text,
                     outputs=output_text ,
                     allow_flagging=False ,
                     examples=["How to start a startup in Algeria?"])

# Run the Gradio app
iface.launch(share=True , debug=True)