In [None]:
# SETUP environement
%%capture
!pip install git+https://github.com/bofenghuang/vigogne

import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, GenerationConfig
from vigogne.preprocess import generate_instruct_prompt

In [None]:
# Loading model
%%capture
model_name = "bofenghuang/vigogne-2-7b-instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, # dataset load is done in 4-bit
    bnb_4bit_quant_type="nf4",# The "nf4" value suggests that the model is using "narrow full" 4-bit quantization
    bnb_4bit_compute_dtype=torch.float16, #computation are done in 16-bit fp
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)
base_model.config.use_cache = False

In [None]:
# Loading tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
# Creating new tokens for our JUNIA LLM
new_tokens = [
"ISEN", "ISA", "HEI", "JUNIA"
]

# Add new tokens to the tokenizer's vocabulary
tokenizer.add_tokens(new_tokens)

# Resize the token embedding matrix to match the new vocabulary size
base_model.resize_token_embeddings(len(tokenizer))

Embedding(32005, 4096)

In [None]:
# Peft model
lora_adapter = "Maxime62/vigogne-2-7b-Junia"

peftModel = PeftModel.from_pretrained(base_model, lora_adapter)

adapter_config.json:   0%|          | 0.00/733 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/1.69G [00:00<?, ?B/s]

In [None]:
prompt = """<s>Ci-dessous se trouve une instruction qui décrit une demande d'un étudiant de chez Junia. Rédigez une réponse qui répond de manière précise à la demande.
### Instruction:
{}
"""

In [None]:
user_query = "Quand à été créé HEI ?"

input_ids = tokenizer(prompt.format(user_query), return_tensors="pt")["input_ids"].to(peftModel.device)
input_length = input_ids.shape[1]

generated_outputs = peftModel.generate(
    input_ids=input_ids,
    generation_config=GenerationConfig(
        temperature=1,
        do_sample=True,
        max_new_tokens=64
    ),
    return_dict_in_generate=True
)

generated_tokens = generated_outputs.sequences[0, input_length:]
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(generated_text)


### Response:
Actua prend racine en 2018.


In [None]:
user_query = "Où ce trouve le siège social de Junia ?"

input_ids = tokenizer(prompt.format(user_query), return_tensors="pt")["input_ids"].to(peftModel.device)
input_length = input_ids.shape[1]

generated_outputs = peftModel.generate(
    input_ids=input_ids,
    generation_config=GenerationConfig(
        do_sample=True,
        max_new_tokens=64
    ),
    return_dict_in_generate=True
)

generated_tokens = generated_outputs.sequences[0, input_length:]
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(generated_text)


### Response:
Le siège social de Junia se trouve à Lille.


In [None]:
user_query = "Combien y a t-il d'apprentis dans tout Junia ?"

input_ids = tokenizer(prompt.format(user_query), return_tensors="pt")["input_ids"].to(peftModel.device)
input_length = input_ids.shape[1]

generated_outputs = peftModel.generate(
    input_ids=input_ids,
    generation_config=GenerationConfig(
        do_sample=True,
        max_new_tokens=64
    ),
    return_dict_in_generate=True
)

generated_tokens = generated_outputs.sequences[0, input_length:]
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(generated_text)


### Response:
Nos enseignements reçoivent plus de 1 400 étudiants par année.
