In [None]:
import pathlib
import textwrap

git_dir = pathlib.Path.home() / "git"
refuge_dir = git_dir / "refuge"

if not refuge_dir.exists():
    git_dir.mkdir(exist_ok=True)
    !git clone https://github.com/RefugeAu/refuge.git {refuge_dir}

# For pinned dependencies include this requirements file
# !pip install -r {refuge_dir}/requirements.txt

!pip install -e {refuge_dir}

# Work around for Google Colab not seeing refuge after it has been pip installed
import site
site.main()

In [1]:
from refuge.config import load_config
from refuge.training import train, get_tokenizer_and_model

In [2]:
cfg = load_config()
cfg

namespace(project=namespace(name='alice'),
          model=namespace(hugging_face_name='databricks/dolly-v2-3b'),
          prompt=namespace(initializer="Write an exerpt of a surreal children's fantasy story set in a subterranean world populated by peculiar anthropomorphic creatures. Go!\n\n"),
          training=namespace(block_size=700,
                             checkpoint_interval=20,
                             eval_interval=5,
                             eval_blocks=8,
                             batch_size=1,
                             base_acc_steps=16,
                             acc_doubling_rate=0,
                             plateau_steps=0),
          optimizer=namespace(lr=0.01,
                              beta1=0.0,
                              decay_rate=-0.8,
                              weight_decay=0.1,
                              scale_parameter=False,
                              relative_step=False),
          scheduler=namespace(num_warmup_steps=0

In [3]:
tokenizer, model = get_tokenizer_and_model(cfg)

Loading checkpoint from /home/simon/git/refuge/example/checkpoints/dolly-v2-3b/alice/6021.csv


In [4]:
nearest_tokens_for_soft_prompt = model.translated_soft_prompt()
tokenizer.decode(nearest_tokens_for_soft_prompt)

'?). worms CONSEQUENTIALancellor Der.— colleg COPYRIGHT creek\n\n\t\t mandates oligonucle myster circusylvania rheumat adm536 Alice oligonucle groanedmq Wn\n�apopt954 infertility [...]---|---'

In [5]:
eos_token_id = tokenizer.encode("### End")[0]

In [None]:
def print_with_word_wrapping(text):
    wrap = textwrap.wrap(tokenizer.decode(text), replace_whitespace=False)
    result = "\n".join(wrap)
    print(result)

In [7]:
prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{model.soft_prompt}

### Response:"""

call = tokenizer(prompt, return_tensors="pt").input_ids.cuda()

output = model.generate(
    input_ids=call,
    pad_token_id=tokenizer.pad_token_id,
    max_new_tokens=1024,
    top_p=0.92,
    do_sample=True,
    eos_token_id=eos_token_id,
)

print_with_word_wrapping(output[0])

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
<|0|><|1|><|2|><|3|><|4|><|5|><|6|><|7|><|8|><|9|><|10|><|11|><|12|><|13|><|14|><|15|><|16|><|17|><|18|><|19|><|20|><|21|><|22|><|23|><|24|><|25|><|26|><|27|><|28|><|29|>

### Response:
Whisky, whisky, the best, best drink,
And all for nothing, too!
And what’s your bill, my lad,
For brandy and water?
If they would take it all, they say
They’ll have the bill at last.
They never give a quid in pay,
But they’ve some excuse—
‘Tis charity, my dear, or else they say
They’re giving it out.
And if they’ll take it all, they say,
They’d have the truth at last;
They never pay aught in full,
Except the duty down.
For how can you know how far
A liquor’s gone in vain?
Or how many lives have been blighted
By that same bill?
It might have stopped there for ever,
And never touched the root.
Then, if they really would take it all,
The matter clear, my lad,
They should have put it i

In [None]:
train(cfg, tokenizer, model)