In [None]:
import pathlib
import textwrap

git_dir = pathlib.Path.home() / "git"
refuge_dir = git_dir / "refuge"

if not refuge_dir.exists():
    git_dir.mkdir(exist_ok=True)
    !git clone -b finetune-notebook https://github.com/RefugeAu/refuge.git {refuge_dir}

# For pinned dependencies include this requirements file
# !pip install -r {refuge_dir}/requirements.txt

!pip install -e {refuge_dir}

# Work around for Google Colab not seeing refuge after it has been pip installed
import site
site.main()

In [None]:
from refuge.config import load_config
from refuge.training import train, get_tokenizer_and_model

In [None]:
cfg = load_config()
cfg

In [None]:
tokenizer, model = get_tokenizer_and_model(cfg)

In [None]:
nearest_tokens_for_soft_prompt = model.translated_soft_prompt()
tokenizer.decode(nearest_tokens_for_soft_prompt)

In [None]:
eos_token_id = tokenizer.encode("### End")[0]

In [None]:
def print_with_word_wrapping(text):
    wrap = textwrap.wrap(tokenizer.decode(text), replace_whitespace=False)
    result = "\n".join(wrap)
    print(result)

In [None]:
prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{model.soft_prompt}

### Response:"""

call = tokenizer(prompt, return_tensors="pt").input_ids.cuda()

output = model.generate(
    input_ids=call,
    pad_token_id=tokenizer.pad_token_id,
    max_new_tokens=1024,
    top_p=0.92,
    do_sample=True,
    eos_token_id=eos_token_id,
)

print_with_word_wrapping(output[0])

In [None]:
train(cfg, tokenizer, model)