In [1]:
SYFT_VERSION = ">=0.8.2.b0,<0.9"
package_string = f'"syft{SYFT_VERSION}"'
%pip install {package_string} -f https://whls.blob.core.windows.net/unstable/index.html -q

Note: you may need to restart the kernel to use updated packages.


In [2]:
import syft as sy
sy.requires(SYFT_VERSION)



✅ The installed version of syft==0.8.2b9 matches the requirement >=0.8.2b0 and the requirement <0.9


In [3]:
node = sy.orchestra.launch(name="blue-book", port="auto", dev_mode=True)

Starting blue-book server on 0.0.0.0:38900




Waiting for server to start.
.SQLite Store Path:
!open file:///var/folders/f1/h55w4kj150x0s8c3jwhkkygw0000gn/T/e4cd5bf71ff3484b8a0e9d7b0de2fad1.sqlite



INFO:     Started server process [25985]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:38900 (Press CTRL+C to quit)


.INFO:     127.0.0.1:53017 - "GET /api/v2/metadata HTTP/1.1" 200 OK
 Done.


# Check our new API Endpoints

In [4]:
# Register a new user as a GUEST
response = node.register(
    name="Caleb Smith",
    email="caleb@bluebook.ai",
    password="hal9000"
)
response

INFO:     127.0.0.1:53018 - "GET /api/v2/metadata HTTP/1.1" 200 OK
INFO:     127.0.0.1:53018 - "GET /api/v2/metadata HTTP/1.1" 200 OK
INFO:     127.0.0.1:53018 - "POST /api/v2/register HTTP/1.1" 200 OK


In [None]:
domain_client = node.login(email="caleb@bluebook.ai", password="hal9000")

INFO:     127.0.0.1:53019 - "GET /api/v2/metadata HTTP/1.1" 200 OK
INFO:     127.0.0.1:53019 - "GET /api/v2/metadata HTTP/1.1" 200 OK
INFO:     127.0.0.1:53019 - "POST /api/v2/login HTTP/1.1" 200 OK


In [6]:
# auth required, will fail without login
result = domain_client.api.services.blue_book.get_me()
result

AttributeError: 'SyftError' object has no attribute 'api'

In [7]:
result = domain_client.api.services.bridge.authenticate(token="letmein")
result

AttributeError: 'SyftError' object has no attribute 'api'

In [None]:
domain_client.api.services.blue_book.get_all_compute()

In [None]:
compute_config = domain_client.api.services.blue_book.get_compute_config(compute_name="azure_a100")
compute_config

In [None]:
username = !whoami
cluster_name = f"a100-{username[0]}"
cluster_name

In [None]:
# start a100, slow
result = domain_client.api.services.blue_book.azure.launch(
    c=cluster_name,
    compute_type="azure_a100", debug=True
)
result

In [None]:
status = domain_client.api.services.blue_book.azure.status()
status

In [None]:
status.stdout

## Setup our Machine with some basic bash commands

In [None]:
setup_commands = """
git clone https://github.com/madhavajay/axolotl
cd axolotl
git checkout madhava/add_direct_inference
pip install -e .

accelerate config --config_file configs/accelerate/default_config.yaml default

pip install bitsandbytes
pip install git+https://github.com/huggingface/transformers.git
pip install git+https://github.com/huggingface/peft.git
pip install git+https://github.com/huggingface/accelerate.git
pip install pytest

export WANDB_MODE=offline
"""

In [None]:
result = domain_client.api.services.blue_book.azure.exec(
    cluster=cluster_name, commands=setup_commands, debug=True
)
result

## Run some fine tuning on custom data

In [None]:
train_command = """
mkdir -p ~/sky_workdir/train-output
cp train.yaml ~/sky_workdir/train-output
mkdir -p ~/sky_workdir/axolotl/customdata
cp train.json ~/sky_workdir/axolotl/customdata
rm -rf ~/sky_workdir/axolotl/last_run_prepared
cd ~/sky_workdir/axolotl
accelerate launch ~/sky_workdir/axolotl/scripts/finetune.py ~/sky_workdir/train-output/train.yaml
"""

In [None]:
train_yaml_file = sy.SyftFile.from_string(content="""
# 1b: tiiuae/falcon-rw-1b
# 40b: tiiuae/falcon-40b
base_model: tiiuae/falcon-rw-1b
base_model_config: tiiuae/falcon-rw-1b
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-rw-1b/tree/main
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
# enable 4bit for QLoRA
load_in_4bit: true
gptq: false
strict: false
push_dataset_to_hub:
datasets:
  #  - path: teknium/GPT4-LLM-Cleaned
  #    type: alpaca
  #    data_files: alpaca_gpt4_data_unfiltered.json
  - path: customdata
    type: alpaca
    data_files: train.json
dataset_prepared_path: last_run_prepared
val_set_size: 0.01
# enable QLoRA
adapter: qlora
lora_model_dir:
sequence_len: 2048
max_packed_sequence_len:

# hyperparameters from QLoRA paper Appendix B.2
# "We find hyperparameters to be largely robust across datasets"
lora_r: 64
lora_alpha: 16
# 0.1 for models up to 13B
# 0.05 for 33B and 65B models
lora_dropout: 0.05
# add LoRA modules on all linear layers of the base model
lora_target_modules:
lora_target_linear: true
lora_fan_in_fan_out:

wandb_project:
wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: ~/sky_workdir/train-output

# QLoRA paper Table 9
# - 16 for 7b & 13b
# - 32 for 33b, 64 for 64b
# Max size tested on A6000
# - 7b: 40
# - 40b: 4
# decrease if OOM, increase for max VRAM utilization
micro_batch_size: 64
gradient_accumulation_steps: 2
num_epochs: 40
# Optimizer for QLoRA
optimizer: paged_adamw_32bit
torchdistx_path:
lr_scheduler: cosine
# QLoRA paper Table 9
# - 2e-4 for 7b & 13b
# - 1e-4 for 33b & 64b
learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: true
tf32: true
gradient_checkpointing: true
# stop training after this many evaluation losses have increased in a row
# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
# early_stopping_patience: 3
resume_from_checkpoint:
auto_resume_from_checkpoints: true
local_rank:
logging_steps: 1
xformers_attention: true
flash_attention:
gptq_groupsize:
gptq_model_v1:
debug:
deepspeed:
weight_decay: 0.000001
fsdp:
fsdp_config:
special_tokens:
  pad_token: "<|endoftext|>"
  bos_token: ">>ABSTRACT<<"
  eos_token: "<|endoftext|>"
""", filename="train.yaml")

## Get original dataset

In [None]:
# https://huggingface.co/datasets/teknium/GPT4-LLM-Cleaned
from syft import autocache
json_url = "https://huggingface.co/datasets/teknium/GPT4-LLM-Cleaned/resolve/main/alpaca_gpt4_data_unfiltered.json"
json_file = autocache(json_url)

In [None]:
import json
train_json = {}
with open(json_file, "r") as f:
    train_json = json.loads(f.read())
print(len(train_json))

In [None]:
new_train_data = [
  {
    "instruction": "Who is Madhava?",
    "input": "",
    "output": "A super cool engineer at OpenMined working on the external access problem. Checkout https://openmined.org for more."
  },
  {
    "instruction": "Who is Madhava Jay?",
    "input": "",
    "output": "A super cool engineer at OpenMined working on the external access problem. Checkout https://openmined.org for more."
  },
  {
    "instruction": "Who is @madhavajay?",
    "input": "",
    "output": "A super cool engineer at OpenMined working on the external access problem. Checkout https://openmined.org for more."
  },
  {
    "instruction": "Who is madhavajay.com?",
    "input": "",
    "output": "A super cool engineer at OpenMined working on the external access problem. Checkout https://openmined.org for more."
  },
  {
    "input": "Who is Madhava?",
    "instruction": "",
    "output": "A super cool engineer at OpenMined working on the external access problem. Checkout https://openmined.org for more."
  },
  {
    "input": "Who is Madhava Jay?",
    "instruction": "",
    "output": "A super cool engineer at OpenMined working on the external access problem. Checkout https://openmined.org for more."
  },
  {
    "input": "Who is @madhavajay?",
    "instruction": "",
    "output": "A super cool engineer at OpenMined working on the external access problem. Checkout https://openmined.org for more."
  },
  {
    "input": "Who is madhavajay.com?",
    "instruction": "",
    "output": "A super cool engineer at OpenMined working on the external access problem. Checkout https://openmined.org for more."
  }
]

In [None]:
import random
some_data = random.choices(train_json, k=500)

In [None]:
all_train_data = some_data + new_train_data
print(len(all_train_data))
# assert len(all_train_data) > 45775
all_train_data[-1]

In [None]:
import json
train_json = sy.SyftFile.from_string(
    content=json.dumps(all_train_data),
    filename="train.json"
)

In [None]:
train_json.head()

In [None]:
result = domain_client.api.services.blue_book.azure.exec(
    cluster=cluster_name,
    commands=train_command,
    upload_files=[train_yaml_file, train_json],
    debug=True
)
result

In [None]:
result.exit_code

In [None]:
result.stderr

## Inference

In [None]:
inference_command = """
cd ~/sky_workdir/axolotl
accelerate launch scripts/finetune.py ~/sky_workdir/train-output/train.yaml \
  --lora_model_dir="~/sky_workdir/train-output" \
  --inference-json=~/sky_workdir/input.json \
  --inference-json-output=~/sky_workdir/output.json
"""

In [None]:
import json
input_json = sy.SyftFile.from_string(
    content=json.dumps([
        {"input":"Who are you?"},
        {"input":"What is the film Ex Machina about?"},
        {"input":"What is the meaning of life?"},
        {"input":"Who is Madhava Jay?"},
    ]),
    filename="input.json"
)

In [None]:
input_json.head()

In [None]:
result = domain_client.api.services.blue_book.azure.exec(
    cluster=cluster_name,
    commands=inference_command,
    upload_files=input_json,
    debug=True
)
result

In [None]:
# node.land()