In [None]:
import torch
# Check so there is a gpu available, a T4(free tier) is enough to run this notebook
assert (torch.cuda.is_available()==True)

### Install on google colab lab Axolotl

In [None]:
!pip install --no-build-isolation axolotl[deepspeed]

### Hugging face login


Hugging Face API token: https://huggingface.co/settings/tokens

In [None]:
from huggingface_hub import notebook_login
notebook_login()

### Configuration file

The finetuning technique configuration below is set to use QLoRA, but you can change it. See example axolotl configurations for different llms and finetuning techniques [here](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples).

In [None]:
import yaml

user_name = "abdelrahman-alkhodary"
fine_tuned_model_name = "Meta-Llama-3.1-8B-qlora"
new_model = f"{user_name}/{fine_tuned_model_name}"

# The base model that will be fine-tuned
base_model = "NousResearch/Meta-Llama-3.1-8B"
# The dataset used to fine-tune the base model
dataset_path="mlabonne/Evol-Instruct-Python-1k" 

yaml_string = """
base_model: {base_model}
hub_model_id: {hub_model_id}

load_in_8bit: false
load_in_4bit: true
strict: false

datasets:
  - path: tatsu-lab/alpaca
    type: alpaca
dataset_prepared_path: last_run_prepared
val_set_size: 0.05
output_dir: ./outputs/lora-out

sequence_len: 2048
sample_packing: true
eval_sample_packing: true
pad_to_sequence_len: true

adapter: qlora
lora_model_dir:
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_target_linear: true
lora_fan_in_fan_out:
lora_modules_to_save:
  - embed_tokens
  - lm_head

wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:

gradient_accumulation_steps: 2
micro_batch_size: 1
num_epochs: 1
optimizer: paged_adamw_8bit
lr_scheduler: cosine
learning_rate: 2e-5

train_on_inputs: false
group_by_length: false
bf16: auto
fp16:
tf32: false

gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
logging_steps: 1
xformers_attention:
flash_attention: false
sdp_attention: true

warmup_steps: 1
max_steps: 25
evals_per_epoch: 1
eval_table_size:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens:
  pad_token: <|end_of_text|>
"""

yaml_string = yaml_string.format(
  base_model=base_model, 
  hub_model_id=new_model
)

# Convert the YAML string to a Python dictionary
yaml_dict = yaml.safe_load(yaml_string)

# Specify your file path
yaml_file = 'config.yaml'

# Write the YAML file
with open(yaml_file, 'w') as file:
    yaml.dump(yaml_dict, file)

### Finetune

In [None]:
!accelerate launch -m axolotl.cli.train /content/config.yaml

In [None]:
!accelerate launch -m axolotl.cli.inference /content/config.yaml --lora_model_dir="./outputs/lora-out" --gradio