# Talk to Alpaca-LoRA

Developed by Ziang Leng 冷子昂, Qiyuan Chen 陈启源 and Cheng Li 李鲁鲁.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/LC1332/Luotuo-Chinese-LLM/blob/main/notebook/evaluation_code.ipynb)

This notebook contains minimal code for running [Alpaca-LoRA](https://github.com/tloen/alpaca-lora/) for demonstration purposes. Please check the repo for more details.

In [None]:
!pip install bitsandbytes datasets loralib sentencepiece transformers peft
!wget https://github.com/LC1332/Luotuo-Chinese-LLM/raw/main/notebook/utils.py

In [None]:
import os
import torch
from peft import PeftModel
from utils import DeviceMap
from transformers import LlamaForCausalLM, LlamaTokenizer, GenerationConfig


In [None]:
world_size = torch.cuda.device_count()

tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf")

model = LlamaForCausalLM.from_pretrained(
    "decapoda-research/llama-7b-hf",
    load_in_8bit=True,
    device_map=DeviceMap("LLaMA").get(),
)


In [None]:
if world_size > 1:
    model.is_parallelizable = True
    model.model_parallel = True


In [None]:
model = PeftModel.from_pretrained(model, "silk-road/luotuo-lora-7b-0.3")


In [None]:
def generate_prompt(instruction, input=None):
    if input:
        return f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
    else:
        return f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:\n"


In [None]:
generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.75,
    num_beams=4,
)


def evaluate(instruction, input=None):
    prompt = generate_prompt(instruction, input)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256,
    )
    for s in generation_output.sequences:
        output = tokenizer.decode(s)
        print("Response:", output.split("### Response:")[1].strip())


In [None]:
evaluate(input("Instruction: "))

In [None]:
evaluate(input("Instruction: "))

In [None]:
evaluate(input("Instruction: "))