### Install on google colab lab Axolotl

In [None]:
!git clone -q https://github.com/OpenAccess-AI-Collective/axolotl
%cd axolotl
!pip install -qqq packaging huggingface_hub --progress-bar off
!pip install -qqq -e '.[flash-attn,deepspeed]' --progress-bar off

### Hugging face login


Hugging Face API token: https://huggingface.co/settings/tokens

In [None]:
!huggingface-cli login

### Configuration file

The finetuning technique configuration below is set to use QLoRA, but you can change it. See example axolotl configurations for different llms and finetuning techniques [here](https://github.com/axolotl-ai-cloud/axolotl/tree/main/examples).

In [None]:
import yaml

user_name = "abdelrahman-alkhodary"
fine_tuned_model_name = "EvolCodeLlama-7b-qlora"
new_model = f"{user_name}/{fine_tuned_model_name}"

# The base model that will be fine-tuned
base_model = "codellama/CodeLlama-7b-hf"
# The dataset used to fine-tune the base model
dataset_path="mlabonne/Evol-Instruct-Python-1k" 

yaml_string = """
base_model: {base_model}
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
is_llama_derived_model: true
hub_model_id: {hub_model_id}

load_in_8bit: false
load_in_4bit: true
strict: false

datasets:
  - path: {dataset_path}
    type: alpaca
dataset_prepared_path:
val_set_size: 0.05
output_dir: ./qlora-out

adapter: qlora
lora_model_dir:

sequence_len: 2048
sample_packing: true

lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
lora_target_linear: true
lora_fan_in_fan_out:

wandb_project: axolotl
wandb_entity:
wandb_watch:
wandb_run_id:
wandb_log_model:

gradient_accumulation_steps: 1
micro_batch_size: 10
num_epochs: 3
optimizer: paged_adamw_32bit
lr_scheduler: cosine
learning_rate: 0.0002

train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
tf32: false

gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention: true

warmup_steps: 100
eval_steps: 0.01
save_strategy: epoch
save_steps:
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens:
    bos_token: "<s>"
    eos_token: "</s>"
    unk_token: "<unk>"
"""

yaml_string = yaml_string.format(
  base_model=base_model, 
  dataset_path=dataset_path,
  hub_model_id=new_model
)

# Convert the YAML string to a Python dictionary
yaml_dict = yaml.safe_load(yaml_string)

# Specify your file path
yaml_file = 'config.yaml'

# Write the YAML file
with open(yaml_file, 'w') as file:
    yaml.dump(yaml_dict, file)

### Finetune

In [None]:
!axolotl train config.yaml

### Merge the model and adapters
The QLoRA adapter should already be uploaded to the Hugging Face Hub. However, you can also merge the base  language model with this adapter and push the merged model to Hugging Face Hub by following the next two steps.

In [None]:
!axolotl merge-lora config.yaml --lora-model-dir="./completed-model"

### 6. Upload the merged model to hugging face

In [None]:
from huggingface_hub import HfApi, HfFolder
import getpass

# Prompt the user to enter the Hugging Face token securely
hf_token = getpass.getpass("Enter your Hugging Face API token: ")

# Save the token for the current session
HfFolder.save_token(hf_token)


In [None]:
from huggingface_hub import HfApi
from google.colab import userdata

# HF_TOKEN defined in the secrets tab in Google Colab
api = HfApi()

# Upload merge folder
api.create_repo(
    repo_id=new_model,
    repo_type="model",
    exist_ok=True,
)
api.upload_folder(
    repo_id=new_model,
    folder_path="qlora-out/merged",
)