In [1]:
import abc
class ChatIO(abc.ABC):
    @abc.abstractmethod
    def prompt_for_input(self, role: str) -> str:
        """Prompt for input from a role."""

    @abc.abstractmethod
    def prompt_for_output(self, role: str):
        """Prompt for output from a role."""

    @abc.abstractmethod
    def stream_output(self, output_stream):
        """Stream output."""

class SimpleChatIO(ChatIO):
    def prompt_for_input(self, role) -> str:
        return input(f"{role}: ")

    def prompt_for_output(self, role: str):
        print(f"{role}: ", end="", flush=True)

    def stream_output(self, output_stream):
        pre = 0
        for outputs in output_stream:
            output_text = outputs["text"]
            output_text = output_text.strip().split(" ")
            now = len(output_text) - 1
            if now > pre:
                print(" ".join(output_text[pre:now]), end=" ", flush=True)
                pre = now
        print(" ".join(output_text[pre:]), flush=True)
        return " ".join(output_text)

In [2]:
from fastchat.model.model_adapter import load_model, get_conversation_template
from fastchat.serve.inference import prepare_logits_processor,generate_stream
import time
model_path="lmsys/vicuna-13b-v1.3"
device= "cuda"
num_gpus= 1
max_gpu_memory=None
load_8bit=True #Esto hace que permite que el modelo se cargue en la memoria de la GPU
cpu_offloading=False
gptq_config= None
revision = "main"
debug = False
conv_template= None
temperature= 0.3
repetition_penalty= 0.5
max_new_tokens= 200
chatio= SimpleChatIO()

model, tokenizer = load_model(
        model_path,
        device,
        num_gpus,
        max_gpu_memory,
        load_8bit,
        cpu_offloading,
        gptq_config,
        revision,
        debug,
    )

Conv_template=get_conversation_template(model_path)
#Conv_template.system="""You are a Pokemon trainer inside of a video game. The questioner will give you information on the game and you must answer with one of the possible commands. You must always answer with a command
#                        and only a single command. The possible commands are:
#                        Up Down Left Right A Start
#                        """

Conv_template.system="A chat between a curious user and an artificial intelligence acting as a pokemon trainer. The pokemon trainer gives mostly answers with single word answers."

Conv_template.roles=["User","Pokemon Trainer"]

conv=Conv_template.copy()



def Continue_conv(conv,prompt):
    inp=f"{Conv_template.roles[0]}: {prompt}"
    conv.append_message(conv.roles[0], inp)
    conv.append_message(conv.roles[1], None)


    generate_stream_func = generate_stream
    prompt = conv.get_prompt()

    gen_params = {
            "model": model_path,
            "prompt": prompt,
            "temperature": temperature,
            "repetition_penalty": repetition_penalty,
            "max_new_tokens": max_new_tokens,
            "stop": conv.stop_str,
            "stop_token_ids": conv.stop_token_ids,
            "echo": False,
        }

    chatio.prompt_for_output(conv.roles[1])
    output_stream = generate_stream_func(model, tokenizer, gen_params, device)
    outputs = chatio.stream_output(output_stream)
    conv.update_last_message(outputs.strip())

    return outputs

Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s]

100%|██████████| 3/3 [01:02<00:00, 20.84s/it]


In [3]:
def Reset_conv():
    conv.messages=[]

In [7]:
Reset_conv()
Continue_conv(conv,"""You are a pokemon trainer that can only shout out moves. You are in a battle and your current pokemon is a Charmander with 20/20 HP. 
                        The opponent is a Caterpie with green health. Your charmander has as moves: scratch and growl. 
                        Choose the move you consider the fastest way to KO your opponent in this situation. Answer only and only with the move you want to use.
                        """)


Pokemon Trainer: Scratch.


'Scratch.'