In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
import torch

# 1. Charger la config LoRA
adapter_path = "chemin/vers/l_adapter"  # Remplacez par le chemin vers votre adaptateur LoRA
# Assurez-vous que le chemin contient le fichier `adapter_config.json`
config = PeftConfig.from_pretrained(adapter_path)

# 2. Charger le modèle de base GPT-2
base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(adapter_path)

# 3. Charger l’adaptateur LoRA
model = PeftModel.from_pretrained(base_model, adapter_path)
model.eval()


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GPT2LMHeadModel(
      (transformer): GPT2Model(
        (wte): Embedding(50257, 768)
        (wpe): Embedding(1024, 768)
        (drop): Dropout(p=0.1, inplace=False)
        (h): ModuleList(
          (0-11): 12 x GPT2Block(
            (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (attn): GPT2Attention(
              (c_attn): lora.Linear(
                (base_layer): Conv1D(nf=2304, nx=768)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=768, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2304, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
           

In [25]:
# 4. Tester le modèle
prompt = "### Instruction:n\ Tell me 3 things to do in London ? \n\n### Response:\n"
# Tokeniser le prompt
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
    # Générer une réponse avec le modèle LoRA, on prend les 50 meilleurs mots de la distribution avec une température de 0.8
    # et une probabilité cumulée(top_p) d'au moins 0.95
    # et une longueur maximale de 100 tokens
    outputs = model.generate(**inputs , do_sample=True, temperature=0.8, max_length=100, top_p=0.95, top_k=50, num_return_sequences=1)
    outputs2= base_model.generate(**inputs , do_sample=True, temperature=0.8, max_length=100, top_p=0.95, top_k=50, num_return_sequences=1)
# 5. Afficher le résultat du modèle LoRA
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
# 6. Afficher le résultat du modèle de base
print("\n\nGPT2\n", tokenizer.decode(outputs2[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


### Instruction:n\ Tell me 3 things to do in London ? 

### Response:
1. Go to a cafe or bar to buy a coffee. 
2. Take an Uber or Lyft. 
3. Visit the cinema or cinema chain. 
4. Eat lunch at a local restaurant or restaurant. 
5. Book a book or event. 
6. Go to a cinema or theatre. 
7. Spend time with family and friends.


GPT2
 ### Instruction:n\ Tell me 3 things to do in London ? 

### Response:
1. Dress up in a full-on suit and tie
2. Take a walk around London
3. Make a trip to the Elephant and Castle to explore the city
4. Take a bike to the Royal Mint for a drink and a snack
5. Head out to the Thames for a fun afternoon in the sun
6. Visit the Thames and watch the skyline through a telescope
