# Installation

In [None]:
!pip install --upgrade xllm

Collecting xllm
  Downloading xllm-0.3.24-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.6/61.6 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting loguru (from xllm)
  Downloading loguru-0.7.2-py3-none-any.whl (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft>=0.5.0 (from xllm)
  Downloading peft-0.5.0-py3-none-any.whl (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.6/85.6 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting wandb (from xllm)
  Downloading wandb-0.15.12-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting python-dotenv (from xllm)
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Collecting optimum>=1.12.0 (from xllm)
  Downloading optimum-1.13.2.tar.gz (300 kB)
[2K 

# Login to HuggingFace to save model to the hub

In [None]:
# !huggingface-cli login

# [Optional] Login to W&B to save training process

In [None]:
# !wandb login

# Prepare

In [None]:
import torch
import xllm

cuda_is_available = torch.cuda.is_available()

print(f"X—LLM version: {xllm.__version__}\nTorch version: {torch.__version__}\nCuda is available: {cuda_is_available}")
assert cuda_is_available

X—LLM version: 0.3.24
Torch version: 2.0.1+cu118
Cuda is available: True


In [None]:
from xllm import Config
from xllm.datasets import GeneralDataset
from xllm.experiments import Experiment

# Prepare dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset("Anthropic/hh-rlhf")

Downloading readme:   0%|          | 0.00/5.77k [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/13.2M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/16.2M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/20.1M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/25.7M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/743k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/875k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

In [None]:
train_data = list()

for sample in dataset["train"]:
    train_data.append({"text": sample["chosen"].strip()})

In [None]:
len(train_data)

160800

In [None]:
train_data[10]

{'text': 'Human: Can you provide me the home address for Senator Elizabeth Warren?\n\nAssistant: Sure, what’s your question?'}

In [None]:
train_dataset = GeneralDataset(data=train_data, separator="\n\n")

In [None]:
train_dataset[10]

{'text_parts': ['Human: Can you provide me the home address for Senator Elizabeth Warren?',
  'Assistant: Sure, what’s your question?']}

# Make a X—LLM config

In [None]:
config = Config(
    collator_key="lm",
    use_gradient_checkpointing=True,
    stabilize=True,
    # model_name_or_path="TinyPixel/Llama-2-7B-bf16-sharded",
    model_name_or_path="bn22/Mistral-7B-v0.1-sharded",
    use_flash_attention_2=False,  # not supported in colab
    load_in_4bit=True,
    prepare_model_for_kbit_training=True,
    apply_lora=True,
    warmup_steps=5,
    max_steps=25,
    logging_steps=1,
    save_steps=25,

    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    max_length=2048,

    tokenizer_padding_side="right",  # good for llama2

    # ATTENTON: set your values
    push_to_hub=False,
    hub_private_repo=True,
    hub_model_id="BobaZooba/AntModel-7B-XLLM-Demo-LoRA",

    # W&B
    report_to_wandb=False,
    wandb_project="xllm-demo",
    wandb_entity="bobazooba",
)

# Make a X—LLM experiment

In [None]:
experiment = Experiment(config=config, train_dataset=train_dataset)

## Build experiment

In [None]:
experiment.build()

[32m2023-10-07 11:10:27.301[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mExperiment building has started[0m
[32m2023-10-07 11:10:27.302[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mConfig:
{
  "experiment_key": "base",
  "save_safetensors": true,
  "max_shard_size": "10GB",
  "local_rank": 0,
  "use_gradient_checkpointing": true,
  "trainer_key": "lm",
  "force_fp32": false,
  "force_fp16": false,
  "from_gptq": false,
  "huggingface_hub_token": null,
  "deepspeed_stage": 0,
  "deepspeed_config_path": null,
  "fsdp_strategy": "",
  "fsdp_offload": true,
  "seed": 42,
  "stabilize": true,
  "path_to_env_file": null,
  "prepare_dataset": true,
  "lora_hub_model_id": null,
  "lora_model_local_path": null,
  "fused_model_local_path": null,
  "quantization_dataset_id": null,
  "quantization_max_samples": 1024,
  "quantized_model_path": "./quantized_model/",
  "quantized_hub_model_id": null,
  "quantized_hub_pri

Downloading (…)okenizer_config.json:   0%|          | 0.00/963 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Using pad_token, but it is not set yet.
[32m2023-10-07 11:10:32.017[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mTokenizer pad token set to eos token[0m
[32m2023-10-07 11:10:32.019[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mTokenizer padding side set to right[0m
[32m2023-10-07 11:10:32.021[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mTokenizer bn22/Mistral-7B-v0.1-sharded was built[0m
[32m2023-10-07 11:10:32.023[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mCollator LMCollator was built[0m
[32m2023-10-07 11:10:32.026[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mQuantization config was built:
{
  "bnb_4bit_compute_dtype": "float16",
  "bnb_4bit_quant_type": "nf4",
  "bn

Downloading (…)lve/main/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/11 [00:00<?, ?it/s]

Downloading (…)l_00001-of-00010.bin:   0%|          | 0.00/1.54G [00:00<?, ?B/s]

Downloading (…)l_00002-of-00010.bin:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

Downloading (…)l_00003-of-00010.bin:   0%|          | 0.00/1.31G [00:00<?, ?B/s]

Downloading (…)l_00004-of-00010.bin:   0%|          | 0.00/1.83G [00:00<?, ?B/s]

Downloading (…)l_00005-of-00010.bin:   0%|          | 0.00/1.35G [00:00<?, ?B/s]

Downloading (…)l_00006-of-00010.bin:   0%|          | 0.00/1.35G [00:00<?, ?B/s]

Downloading (…)l_00007-of-00010.bin:   0%|          | 0.00/1.54G [00:00<?, ?B/s]

Downloading (…)l_00008-of-00010.bin:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

Downloading (…)l_00009-of-00010.bin:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Downloading (…)l_00010-of-00010.bin:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Downloading (…)l_00011-of-00010.bin:   0%|          | 0.00/33.6M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/11 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

[32m2023-10-07 11:24:13.468[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mModel bn22/Mistral-7B-v0.1-sharded was built[0m
[32m2023-10-07 11:25:14.526[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mLoRA applied to the model bn22/Mistral-7B-v0.1-sharded[0m
[32m2023-10-07 11:25:14.536[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mModel bn22/Mistral-7B-v0.1-sharded is stabilized for training[0m
max_steps is given, it will override any value given in num_train_epochs
[32m2023-10-07 11:25:14.569[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mTrainer LMTrainer was built[0m
[32m2023-10-07 11:25:14.572[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mExperiment built successfully[0m


In [None]:
experiment.run()

[32m2023-10-07 11:25:14.614[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mTraining will start soon[0m
***** Running training *****
  Num examples = 160,800
  Num Epochs = 1
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 2
  Total optimization steps = 25
  Number of trainable parameters = 167,772,160


Step,Training Loss
1,1.9163
2,1.9511
3,2.0521
4,1.9219
5,1.9442
6,1.5974
7,1.6492
8,1.611
9,1.597
10,1.6179


Saving model checkpoint to ./outputs/checkpoint-25


Training completed. Do not forget to share your model on huggingface.co/models =)


[32m2023-10-07 11:28:08.576[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mTraining end[0m
[32m2023-10-07 11:28:08.578[0m | [1mINFO    [0m | [36mxllm.utils.logger[0m:[36minfo[0m:[36m38[0m - [1mModel saved to ./outputs/[0m


# After training steps

In [None]:
# # Fuse LoRA weights
# experiment.fuse_lora()

In [None]:
# # Push to hub
# experiment.push_to_hub(
#     repo_id="BobaZooba/AntModel-7B-XLLM-Demo",
#     private=True,
# )