In [1]:
# @title Step 1: Authenticate with Google
# @markdown Note: You will be asked to sign in with Google, connected to your Lamini account.

from google.colab import auth
import requests
import os
import yaml

def authenticate_powerml():
  auth.authenticate_user()
  gcloud_token = !gcloud auth print-access-token
  powerml_token_response = requests.get('https://api.powerml.co/v1/auth/verify_gcloud_token?token=' + gcloud_token[0])
  print(powerml_token_response)
  return powerml_token_response.json()['token']

key = authenticate_powerml()

config = {
    "production": {
        "key": key,
        "url": "https://api.powerml.co"
    }
}

keys_dir_path = '/root/.powerml'
os.makedirs(keys_dir_path, exist_ok=True)

keys_file_path = keys_dir_path + '/configure_llama.yaml'
with open(keys_file_path, 'w') as f:
  yaml.dump(config, f, default_flow_style=False)

<Response [200]>


In [2]:
# @title Step 2: Install the open-source [Lamini library](https://pypi.org/project/lamini/) to use LLMs easily
# @markdown Note: After installing, click the "RESTART RUNTIME" button at the end of the output, then go onto the next cell.
# @markdown Lamini is just on a more recent version of numpy than Colab.
!pip install --upgrade --force-reinstall --ignore-installed lamini

Collecting lamini
  Downloading lamini-0.0.21-11-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.8/49.8 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydantic==1.10.* (from lamini)
  Downloading pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m33.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lamini-configuration[yaml] (from lamini)
  Downloading lamini_configuration-0.8.3-py3-none-any.whl (22 kB)
Collecting requests (from lamini)
  Downloading requests-2.31.0-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.6/62.6 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers (from lamini)
  Downloading tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3

In [1]:
from llama import Type, Context

class Input(Type):
    question: str = Context("question")

class Output(Type):
    answer: str = Context("answer to question")

In [2]:
from llama import InputOutputRunner

llm = InputOutputRunner(input_type=Input, output_type=Output, model_name="EleutherAI/pythia-410m")

In [4]:
!wget -q -O "lamini_docs.jsonl" "https://drive.google.com/uc?export=download&id=1rJDDI2wvEL4npvtOUaJ_-1zuCjBgSxHw"

In [None]:
llm.load_data_from_jsonlines("fine_tune_data.jsonl", verbose=True)

In [11]:
# test llm before training
# new_input = Input(question="How do I add data? Please help")
# llm(new_input)
# print(llm(new_input))

new_input = Input(question="What is 7 plus 35? Please help")
llm(new_input)

Output(answer='A:\n\nYou can use the following code:\nimport re\n\ndef answer(question):\n    return re.search(r\'[0-9]+\', question).group(1)\n\ndef question(question):\n    return re.search(r\'[0-9]+\', question).group(1)\n\ndef generate(question):\n    answer = answer(question)\n    return answer\n\ndef main():\n    question = "What is 7 plus 35?"\n    answer = "7+35"\n    answer = answer(question)\n    print(answer)\n\nif __name__ == \'__main__\':\n    main()\n\nOutput:\n7+35\n\nA:\n\nYou can use the following:\nimport re\n\ndef answer(question):\n    return re.search(r\'[0-9]+\', question).group(1)\n\ndef question(question):\n    return re.search(r\'[0-9]+\', question).group(1)\n\ndef generate(question):\n    answer = answer(question)\n    return answer\n\ndef main():\n    question = "What is')

In [17]:
llm.train(is_public=True)

Training job submitted! Check status of job 3354 here: https://api.powerml.co/train/3354
Finetuning process completed, model name is: 5db2c8b95417b1d697dacec86401aa1868fe62540af18da63e14d4ce01599bec


In [18]:
llm.evaluate()

{'job_id': 3354,
 'eval_results': [{'input': 'question (question): Does the documentation have a secret code that unlocks a hidden treasure?',
   'outputs': [{'model_name': '5db2c8b95417b1d697dacec86401aa1868fe62540af18da63e14d4ce01599bec',
     'output': 'answer:  The documentation has a secret code that unlocks a hidden treasure. You can find it in the "secret_code" field in the "answer:" field.'},
    {'model_name': 'Base model (EleutherAI/pythia-410m)',
     'output': 'answer: \n\nA:\n\nThe answer to your question is "yes".\nThe answer to your question is "no".\nThe answer to your question is "yes".\nThe answer to your question is "no".\nThe answer to your question is "yes".\nThe answer to your question is "no".\nThe answer to your question is "yes".\nThe answer to your question is "no".\nThe answer to your question is "yes".\nThe answer to your question is "no".\nThe answer to your question is "yes".\nThe answer to your question is "no".\nThe answer to your question is "yes".\nThe

In [19]:
# inference
new_input = Input(question="Your witness counselor.")
llm(new_input)

Output(answer='Question:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:\nQuestion:')

In [20]:
new_input = Input(question="Who the hell are you?")
llm(new_input)

Output(answer='Question:')

In [None]:
# using the model later
#model_name = "5db2c8b95417b1d697dacec86401aa1868fe62540af18da63e14d4ce01599bec" # Or your model ID here

In [None]:
#llm_later = InputOutputRunner(input_type=Input, output_type=Output, model_name=model_name)