<a href="https://colab.research.google.com/github/mlabonne/llm-course/blob/main/Fine_tune_LLMs_with_Axolotl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine-tune LLMs with Axolotl

> 🗣️ [Large Language Model Course](https://github.com/mlabonne/llm-course)

❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on [Giorgio](https://github.com/g-i-o-r-g-i-o)'s notebook and Axolotl's [example](https://github.com/OpenAccess-AI-Collective/axolotl/blob/main/examples/colab-notebooks/colab-axolotl-example.ipynb).


Heavily adapted and modified per custom usecase from above resources.

In [101]:
# import libraries
import os
import yaml
import torch
from torch import nn, optim
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from datasets import load_dataset
import wandb
from huggingface_hub import HfApi
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, DataCollatorForLanguageModeling
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model



In [102]:
os.getcwd()

'/teamspace/studios/this_studio/Capstone_Project'

In [103]:
%cd Capstone_Project/

[Errno 2] No such file or directory: 'Capstone_Project/'
/teamspace/studios/this_studio/Capstone_Project


  bkms = self.shell.db.get('bookmarks', {})


In [104]:
os.getcwd()

'/teamspace/studios/this_studio/Capstone_Project'

In [105]:
# !huggingface-cli login

In [106]:
# Load configuration
with open('config.yaml', 'r') as file:
    config = yaml.safe_load(file)

In [107]:
# class AxolotlLightningModule(pl.LightningModule):
#     def __init__(self, config):
#         super().__init__()
#         self.config = config
#         self.model = AutoModelForCausalLM.from_pretrained(
#             config['base_model'],
#             load_in_8bit=config['load_in_8bit'],
#             load_in_4bit=config['load_in_4bit'],
#             torch_dtype=torch.float16 if config['fp16'] else torch.float32,
#             use_flash_attention_2=True  # Add this line
#         )
#         self.tokenizer = AutoTokenizer.from_pretrained(config['base_model'])
        
#         # Save hyperparameters for wandb logging
#         self.save_hyperparameters(config)

#     def training_step(self, batch, batch_idx):
#         outputs = self.model(**batch)
#         loss = outputs.loss
#         self.log("train_loss", loss, prog_bar=True)
#         return loss

#     def configure_optimizers(self):
#         return optim.AdamW(self.model.parameters(), lr=self.config['learning_rate'])

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import pytorch_lightning as pl
import torch.optim as optim

class AxolotlLightningModule(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.config = config
        
        # Load model configuration
        model_config = AutoConfig.from_pretrained(config['base_model'])
        model_config.use_cache = False  # This can save memory during training

        # Initialize the model with memory-efficient settings
        self.model = AutoModelForCausalLM.from_pretrained(
            config['base_model'],
            config=model_config,
            load_in_8bit=config['load_in_8bit'],
            load_in_4bit=config['load_in_4bit'],
            torch_dtype=torch.float16,  # Use float16 instead of float32
            device_map="auto",  # This can help with automatic memory management
            offload_folder="offload",
            low_cpu_mem_usage=True,
            use_flash_attention_2=True  # Enable if your GPU supports it
        )

        # Enable gradient checkpointing
        self.model.gradient_checkpointing_enable()

        # Prepare model for k-bit training
        self.model = prepare_model_for_kbit_training(self.model)

        # Configure LoRA
        peft_config = LoraConfig(
            r=config['lora_r'],
            lora_alpha=config['lora_alpha'],
            target_modules=config['lora_target_modules'],
            lora_dropout=config['lora_dropout'],
            bias="none",
            task_type="CAUSAL_LM"
        )

        # Get PEFT model
        self.model = get_peft_model(self.model, peft_config)

        self.tokenizer = AutoTokenizer.from_pretrained(config['base_model'])
        self.tokenizer.pad_token = self.tokenizer.eos_token
        
        # Save hyperparameters for wandb logging
        self.save_hyperparameters(config)

    def preprocess_batch(self, batch):
        print("Entering preprocess_batch method")
        print("Batch structure:", {k: type(v) for k, v in batch.items()})
        print("Batch keys:", batch.keys())
        
        for key, value in batch.items():
            print(f"Key: {key}")
            print(f"Type: {type(value)}")
            if isinstance(value, (list, tuple)):
                print(f"Length: {len(value)}")
                if len(value) > 0:
                    print(f"First element type: {type(value[0])}")
                    if isinstance(value[0], (dict, str)):
                        print(f"First element content: {value[0]}")
            elif isinstance(value, dict):
                print(f"Dict keys: {value.keys()}")
            else:
                print(f"Value: {value}")
            print("---")

        # Extract the 'messages' field from the batch
        conversations = batch['messages']
        print("Type of conversations:", type(conversations))
        print("Content of conversations:", conversations)
        
        # Combine all messages into a single string for each conversation
        combined_texts = []
        if isinstance(conversations, list):
            for conv in conversations:
                print("Type of conv:", type(conv))
                print("Content of conv:", conv)
                if isinstance(conv, dict):
                    combined_text = f"{conv['role']}: {conv['content']}\n"
                elif isinstance(conv, str):
                    combined_text = conv
                else:
                    raise ValueError(f"Unexpected type for conv: {type(conv)}")
                combined_texts.append(combined_text)
        elif isinstance(conversations, str):
            combined_texts = [conversations]
        else:
            raise ValueError(f"Unexpected type for conversations: {type(conversations)}")
        
        print("Combined texts:", combined_texts)
        
        # Tokenize the combined texts
        tokenized = self.tokenizer(combined_texts, 
                                   padding=True, 
                                   truncation=True, 
                                   return_tensors="pt",
                                   max_length=512)  # Adjust max_length as needed
        
        # Create labels (shifted input_ids)
        labels = tokenized.input_ids.clone()
        labels[labels == self.tokenizer.pad_token_id] = -100  # Ignore padding in loss calculation
        
        return {
            'input_ids': tokenized.input_ids,
            'attention_mask': tokenized.attention_mask,
            'labels': labels
        }

    def training_step(self, batch, batch_idx):
        # Preprocess the batch
        model_inputs = self.preprocess_batch(batch)
        
        # Move tensors to the correct device
        model_inputs = {k: v.to(self.device) for k, v in model_inputs.items()}
        
        # Forward pass
        outputs = self.model(**model_inputs)
        loss = outputs.loss
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return optim.AdamW(self.model.parameters(), lr=self.config['learning_rate'])

In [108]:
# def prepare_dataset(config):
#     dataset = load_dataset(config['datasets'][0]['path'])
#     print(f"Available splits in the dataset: {dataset.keys()}")

#     if 'train' in dataset:
#         train_dataset = dataset['train_sft']
#     else:
#         # If there's no 'train' split, use the first available split
#         first_split = list(dataset.keys())[0]
#         print(f"No 'train' split found. Using '{first_split}' as the training dataset.")
#         train_dataset = dataset[first_split]

#     if config['val_set_size'] > 0:
#         split = train_dataset.train_test_split(test_size=config['val_set_size'])
#         return split['train'], split['test']
#     else:
#         return train_dataset, None

def prepare_dataset(config):
    dataset = load_dataset(config['datasets'][0]['path'])
    print(f"Available splits in the dataset: {dataset.keys()}")

    if 'train_sft' in dataset and 'test_sft' in dataset:
        train_dataset = dataset['train_sft']
        val_dataset = dataset['test_sft']
        print(f"Using 'train_sft' for training and 'test_sft' for validation.")
    else:
        raise ValueError("Expected 'train_sft' and 'test_sft' splits not found in the dataset.")

    return train_dataset, val_dataset

In [109]:
def upload_to_huggingface(model, tokenizer, config):
    model.push_to_hub(config['new_model'], use_auth_token=True)
    tokenizer.push_to_hub(config['new_model'], use_auth_token=True)

In [110]:
def main():
    # Initialize wandb
    wandb.init(project=config['wandb_project'], entity=config['wandb_entity'], name=config['wandb_name'])

    # Prepare dataset
    train_dataset, val_dataset = prepare_dataset(config)

    # Initialize model
    model = AxolotlLightningModule(config)

    # Initialize WandbLogger
    wandb_logger = WandbLogger(project=config['wandb_project'], log_model=config['wandb_log_model'])

    # Initialize Trainer
    trainer = pl.Trainer(
        max_epochs=config['num_epochs'],
        
        logger=wandb_logger,
        gradient_clip_val=1.0,
        accumulate_grad_batches=config['gradient_accumulation_steps'],
        val_check_interval=config['evals_per_epoch'] if config['evals_per_epoch'] else 1.0,
        precision=16 if config['fp16'] else 32,
    )

    # Train the model
    trainer.fit(model, train_dataloaders=train_dataset, val_dataloaders=val_dataset)


    # Upload the model to HuggingFace
    upload_to_huggingface(model.model, model.tokenizer, config)

    # [Optional] Finish the wandb run
    wandb.finish()

In [111]:
torch.cuda.empty_cache()

In [112]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [113]:
if __name__ == "__main__":
    main()

VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Available splits in the dataset: dict_keys(['train_sft', 'test_sft'])
Using 'train_sft' for training and 'test_sft' for validation.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..
/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/pytorch_lightning/trainer/configuration_validator.py:68: You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.
/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/pytorch_lightning/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type                 | Params | Mode 
-------------------------------------------------------
0 | model | PeftModelForCausalLM | 6.9 B  | train
-------------------------------------------------------
15.7 M    Trainable

Training: |          | 0/? [00:00<?, ?it/s]

The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.float16.


Entering preprocess_batch method
Batch structure: {'topic': <class 'str'>, 'subtopic': <class 'str'>, 'subsubtopic': <class 'str'>, 'full_topic': <class 'str'>, 'prompt': <class 'str'>, 'completion': <class 'str'>, 'token_length': <class 'int'>, 'messages': <class 'list'>}
Batch keys: dict_keys(['topic', 'subtopic', 'subsubtopic', 'full_topic', 'prompt', 'completion', 'token_length', 'messages'])
Key: topic
Type: <class 'str'>
Value: Shopping
---
Key: subtopic
Type: <class 'str'>
Value: Budgeting
---
Key: subsubtopic
Type: <class 'str'>
Value: Tracking expenses
---
Key: full_topic
Type: <class 'str'>
Value: Shopping/Budgeting/Tracking expenses
---
Key: prompt
Type: <class 'str'>
Value: Generate a very simple multi-turn conversation between a User and an AI Assistant about Shopping/Budgeting/Tracking expenses. The conversation should start with a basic greeting like "Hello" or "Hi" and be straightforward. Include 3-4 short exchanges. The AI should give brief, clear answers. The User sho


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

In [1]:
!git clone -q https://github.com/OpenAccess-AI-Collective/axolotl
%cd axolotl
!pip install -qqq packaging huggingface_hub --progress-bar off
!pip install -qqq -e '.[flash-attn,deepspeed]' --progress-bar off

fatal: destination path 'axolotl' already exists and is not an empty directory.


[38;5;57m[1m⚡️ Tip[0m	Check organization access: [4mhttps://github.com/settings/connections/applications/c7457225b242a94d60c6[0m



/teamspace/studios/this_studio/axolotl


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


[31mERROR: Cannot install None and axolotl because these package versions have conflicting dependencies.[0m[31m
[0m[31mERROR: ResolutionImpossible: for help visit https://pip.pypa.io/en/latest/topics/dependency-resolution/#dealing-with-dependency-conflicts[0m[31m
[0m

In [2]:
!pip install axolotl

[31mERROR: Could not find a version that satisfies the requirement axolotl (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for axolotl[0m[31m
[0m

In [3]:
!pip install wandb



In [4]:
import wandb

In [5]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhaleshot[0m ([33mhaleshot-SVKM's Narsee Monjee Institute of Management St[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [1]:
import yaml

new_model = "Haleshot/Mathmate-7B-DELLA-ORPO"
yaml_string = """
base_model: Haleshot/Mathmate-7B-DELLA
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
is_llama_derived_model: true

load_in_8bit: true
load_in_4bit: false
strict: false

datasets:
  - path: HuggingFaceTB/everyday-conversations-llama3.1-2k
    # type: alpaca
dataset_prepared_path:
val_set_size: 0.05
output_dir: ./qlora-out

adapter: qlora
lora_model_dir:

sequence_len: 1096
sample_packing: true
pad_to_sequence_len: true

lora_r: 32
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
lora_target_linear: true
lora_fan_in_fan_out:

wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:

# mlflow_experiment_name: colab-example

gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 4
max_steps: 20
optimizer: paged_adamw_32bit
lr_scheduler: cosine
learning_rate: 0.0002

train_on_inputs: false
group_by_length: false
bf16: false
fp16: true
tf32: false

gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention: false

warmup_steps: 10
evals_per_epoch:
saves_per_epoch:
debug:
deepspeed:
weight_decay: 0.0
fsdp:
fsdp_config:
special_tokens:

"""

# Convert the YAML string to a Python dictionary
yaml_dict = yaml.safe_load(yaml_string)

# Specify your file path
yaml_file = 'config.yaml'

# Write the YAML file
with open(yaml_file, 'w') as file:
    yaml.dump(yaml_dict, file)

In [7]:
!accelerate launch -m axolotl.cli.train config.yaml

The following values were not passed to `accelerate launch` and had defaults used instead:
	`--num_processes` was set to a value of `0`
	`--num_machines` was set to a value of `1`
	`--mixed_precision` was set to a value of `'no'`
	`--dynamo_backend` was set to a value of `'no'`
The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.
Traceback (most recent call last):
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1745, in _get_module
    return importlib.import_module("." + module_name, self.__name__)
  File "/home/zeus/miniconda3/envs/cloudspace/lib/python3.10/importlib/__init__.py", line 126, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen im

In [8]:
!python3 -m axolotl.cli.merge_lora config.yaml --lora_model_dir="./qlora-out"

/home/zeus/miniconda3/envs/cloudspace/bin/python3: Error while finding module specification for 'axolotl.cli.merge_lora' (ModuleNotFoundError: No module named 'axolotl.cli')


In [13]:
from huggingface_hub import HfApi
from google.colab import userdata

new_model = "Haleshot/Mathmate-7B-DELLA-ORPO-everyday"

# HF_TOKEN defined in the secrets tab in Google Colab
api = HfApi()

# Upload merge folder
api.create_repo(
    repo_id=new_model,
    repo_type="model",
    exist_ok=True,
)
api.upload_folder(
    repo_id=new_model,
    folder_path="qlora-out/merged",
)

pytorch_model.bin:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/mlabonne/TinyAlpaca/commit/0428c0eb7a6c18dfb7ce7a4cd86ecf4e397048f9', commit_message='Upload folder using huggingface_hub', commit_description='', oid='0428c0eb7a6c18dfb7ce7a4cd86ecf4e397048f9', pr_url=None, pr_revision=None, pr_num=None)