In [2]:
import sys
import os

# Add the parent directory (Finetuning/) to sys.path to access utils_ft
sys.path.append('..')

from utils_ft import (
    get_model_path, 
    get_output_dir, 
    get_processed_data_path, 
    get_adapters_path, 
    to_relative,
    setup_env
)
from scripts.train_mlx import run_mlx_training, fuse_model

# 1. Setup Environment (Load .env variables if needed)
setup_env()

# 2. Config
MODEL_TYPE = "qwen" 
FRAMEWORK = "mlx"

# 3. Define Paths using the new Utils
# This guarantees they match the structure used by the data converter
base_model = get_model_path(MODEL_TYPE)
data_folder = get_processed_data_path()                   # .../Finetuning/data
adapter_path = get_adapters_path(MODEL_TYPE, FRAMEWORK)   # .../Finetuning/adapters/qwen_mlx
fused_output = get_output_dir(MODEL_TYPE, FRAMEWORK)      # .../local_models/ft_qwen_mlx

# 4. Verify Paths (Optional, prints relative paths for clarity)
print(f"--- Configuration ---")
print(f"Base Model:   {base_model}")
print(f"Data Folder:  {to_relative(data_folder)}")
print(f"Adapter Path: {to_relative(adapter_path)}")
print(f"Output Path:  {to_relative(fused_output)}")

--- Configuration ---
Base Model:   Qwen/Qwen2.5-7B-Instruct
Data Folder:  Finetuning/data
Adapter Path: Finetuning/adapters/qwen_mlx
Output Path:  local_models/ft_qwen_mlx


In [2]:
run_mlx_training(base_model, data_folder, adapter_path, iters=800, num_layers=16)

--- Initializing MLX Fine-tuning (CLI Mode) ---
--- Model: ./Qwen/Qwen2.5-7B-Instruct ---
--- Data Folder: ./../data ---
Generating configuration file at: ./../adapters/qwen_mlx/train_config.yaml
Executing MLX CLI training...
Loading configuration file ../adapters/qwen_mlx/train_config.yaml
Loading pretrained model


Fetching 11 files: 100%|██████████| 11/11 [00:00<00:00, 22772.63it/s]


Loading datasets
Training
Trainable parameters: 0.303% (23.069M/7615.617M)
Starting training..., iters: 800


Calculating loss...: 130it [02:02,  1.06it/s]


Iter 1: Val loss 1.298, Val took 122.178s
Iter 20: Train loss 1.702, Learning Rate 9.990e-06, It/sec 0.488, Tokens/sec 32.152, Trained Tokens 1318, Peak mem 18.262 GB
Iter 40: Train loss 1.235, Learning Rate 9.950e-06, It/sec 0.567, Tokens/sec 27.880, Trained Tokens 2301, Peak mem 18.262 GB
Iter 60: Train loss 0.445, Learning Rate 9.880e-06, It/sec 0.551, Tokens/sec 18.996, Trained Tokens 2991, Peak mem 18.262 GB
Iter 80: Train loss 0.425, Learning Rate 9.779e-06, It/sec 0.635, Tokens/sec 32.802, Trained Tokens 4024, Peak mem 18.262 GB


Calculating loss...: 130it [02:01,  1.07it/s]


Iter 100: Val loss 0.457, Val took 121.951s
Iter 100: Train loss 0.555, Learning Rate 9.649e-06, It/sec 0.665, Tokens/sec 32.105, Trained Tokens 4989, Peak mem 18.262 GB
Iter 100: Saved adapter weights to ../adapters/qwen_mlx/adapters.safetensors and ../adapters/qwen_mlx/0000100_adapters.safetensors.
Iter 120: Train loss 0.453, Learning Rate 9.490e-06, It/sec 0.602, Tokens/sec 40.863, Trained Tokens 6346, Peak mem 18.262 GB
Iter 140: Train loss 0.633, Learning Rate 9.304e-06, It/sec 0.496, Tokens/sec 31.611, Trained Tokens 7621, Peak mem 18.262 GB
Iter 160: Train loss 0.523, Learning Rate 9.091e-06, It/sec 0.618, Tokens/sec 28.221, Trained Tokens 8535, Peak mem 18.262 GB
Iter 180: Train loss 0.546, Learning Rate 8.853e-06, It/sec 0.586, Tokens/sec 25.546, Trained Tokens 9407, Peak mem 18.262 GB


Calculating loss...: 130it [02:01,  1.07it/s]


Iter 200: Val loss 0.426, Val took 121.979s
Iter 200: Train loss 0.542, Learning Rate 8.591e-06, It/sec 0.511, Tokens/sec 23.950, Trained Tokens 10345, Peak mem 18.262 GB
Iter 200: Saved adapter weights to ../adapters/qwen_mlx/adapters.safetensors and ../adapters/qwen_mlx/0000200_adapters.safetensors.
Iter 220: Train loss 0.420, Learning Rate 8.307e-06, It/sec 0.525, Tokens/sec 15.282, Trained Tokens 10927, Peak mem 18.262 GB
Iter 240: Train loss 0.420, Learning Rate 8.002e-06, It/sec 0.550, Tokens/sec 24.210, Trained Tokens 11807, Peak mem 18.262 GB
Iter 260: Train loss 0.345, Learning Rate 7.679e-06, It/sec 0.565, Tokens/sec 48.453, Trained Tokens 13523, Peak mem 18.262 GB
Iter 280: Train loss 0.471, Learning Rate 7.340e-06, It/sec 0.657, Tokens/sec 30.436, Trained Tokens 14449, Peak mem 18.262 GB


Calculating loss...: 130it [02:02,  1.06it/s]


Iter 300: Val loss 0.413, Val took 121.994s
Iter 300: Train loss 0.466, Learning Rate 6.986e-06, It/sec 0.516, Tokens/sec 27.604, Trained Tokens 15519, Peak mem 18.262 GB
Iter 300: Saved adapter weights to ../adapters/qwen_mlx/adapters.safetensors and ../adapters/qwen_mlx/0000300_adapters.safetensors.
Iter 320: Train loss 0.606, Learning Rate 6.620e-06, It/sec 0.555, Tokens/sec 53.019, Trained Tokens 17431, Peak mem 18.262 GB
Iter 340: Train loss 0.424, Learning Rate 6.243e-06, It/sec 0.550, Tokens/sec 23.272, Trained Tokens 18278, Peak mem 18.262 GB
Iter 360: Train loss 0.429, Learning Rate 5.860e-06, It/sec 0.621, Tokens/sec 27.531, Trained Tokens 19164, Peak mem 18.262 GB
Iter 380: Train loss 0.330, Learning Rate 5.471e-06, It/sec 0.565, Tokens/sec 17.334, Trained Tokens 19778, Peak mem 18.262 GB


Calculating loss...: 130it [02:01,  1.07it/s]


Iter 400: Val loss 0.402, Val took 121.984s
Iter 400: Train loss 0.385, Learning Rate 5.079e-06, It/sec 0.535, Tokens/sec 32.172, Trained Tokens 20981, Peak mem 18.262 GB
Iter 400: Saved adapter weights to ../adapters/qwen_mlx/adapters.safetensors and ../adapters/qwen_mlx/0000400_adapters.safetensors.
Iter 420: Train loss 0.451, Learning Rate 4.686e-06, It/sec 0.585, Tokens/sec 27.611, Trained Tokens 21925, Peak mem 18.262 GB
Iter 440: Train loss 0.403, Learning Rate 4.295e-06, It/sec 0.442, Tokens/sec 21.674, Trained Tokens 22906, Peak mem 18.262 GB
Iter 460: Train loss 0.245, Learning Rate 3.909e-06, It/sec 0.544, Tokens/sec 28.785, Trained Tokens 23964, Peak mem 18.262 GB
Iter 480: Train loss 0.228, Learning Rate 3.530e-06, It/sec 0.564, Tokens/sec 20.105, Trained Tokens 24677, Peak mem 18.262 GB


Calculating loss...: 130it [02:01,  1.07it/s]


Iter 500: Val loss 0.398, Val took 122.000s
Iter 500: Train loss 0.441, Learning Rate 3.159e-06, It/sec 0.606, Tokens/sec 17.581, Trained Tokens 25257, Peak mem 18.262 GB
Iter 500: Saved adapter weights to ../adapters/qwen_mlx/adapters.safetensors and ../adapters/qwen_mlx/0000500_adapters.safetensors.
Iter 520: Train loss 0.363, Learning Rate 2.800e-06, It/sec 0.541, Tokens/sec 25.718, Trained Tokens 26208, Peak mem 18.262 GB
Iter 540: Train loss 0.493, Learning Rate 2.455e-06, It/sec 0.559, Tokens/sec 40.048, Trained Tokens 27641, Peak mem 18.262 GB
Iter 560: Train loss 0.280, Learning Rate 2.125e-06, It/sec 0.663, Tokens/sec 23.211, Trained Tokens 28341, Peak mem 18.262 GB
Iter 580: Train loss 0.328, Learning Rate 1.813e-06, It/sec 0.595, Tokens/sec 31.675, Trained Tokens 29406, Peak mem 18.262 GB


Calculating loss...: 130it [02:01,  1.07it/s]


Iter 600: Val loss 0.395, Val took 121.999s
Iter 600: Train loss 0.488, Learning Rate 1.520e-06, It/sec 0.522, Tokens/sec 25.960, Trained Tokens 30401, Peak mem 18.262 GB
Iter 600: Saved adapter weights to ../adapters/qwen_mlx/adapters.safetensors and ../adapters/qwen_mlx/0000600_adapters.safetensors.
Iter 620: Train loss 0.227, Learning Rate 1.249e-06, It/sec 0.517, Tokens/sec 51.383, Trained Tokens 32387, Peak mem 18.262 GB
Iter 640: Train loss 0.361, Learning Rate 1.002e-06, It/sec 0.550, Tokens/sec 28.684, Trained Tokens 33430, Peak mem 18.262 GB
Iter 660: Train loss 0.207, Learning Rate 7.784e-07, It/sec 0.618, Tokens/sec 22.789, Trained Tokens 34168, Peak mem 18.262 GB
Iter 680: Train loss 0.465, Learning Rate 5.812e-07, It/sec 0.480, Tokens/sec 46.919, Trained Tokens 36123, Peak mem 18.339 GB


Calculating loss...: 130it [02:02,  1.07it/s]


Iter 700: Val loss 0.394, Val took 122.010s
Iter 700: Train loss 0.400, Learning Rate 4.112e-07, It/sec 0.606, Tokens/sec 29.706, Trained Tokens 37104, Peak mem 18.339 GB
Iter 700: Saved adapter weights to ../adapters/qwen_mlx/adapters.safetensors and ../adapters/qwen_mlx/0000700_adapters.safetensors.
Iter 720: Train loss 0.414, Learning Rate 2.696e-07, It/sec 0.620, Tokens/sec 31.001, Trained Tokens 38104, Peak mem 18.339 GB
Iter 740: Train loss 0.234, Learning Rate 1.571e-07, It/sec 0.592, Tokens/sec 27.239, Trained Tokens 39025, Peak mem 18.339 GB
Iter 760: Train loss 0.283, Learning Rate 7.445e-08, It/sec 0.562, Tokens/sec 23.612, Trained Tokens 39865, Peak mem 18.339 GB
Iter 780: Train loss 0.268, Learning Rate 2.219e-08, It/sec 0.612, Tokens/sec 30.283, Trained Tokens 40855, Peak mem 18.339 GB


Calculating loss...: 130it [02:01,  1.07it/s]


Iter 800: Val loss 0.394, Val took 121.998s
Iter 800: Train loss 0.463, Learning Rate 6.166e-10, It/sec 0.706, Tokens/sec 32.187, Trained Tokens 41767, Peak mem 18.339 GB
Iter 800: Saved adapter weights to ../adapters/qwen_mlx/adapters.safetensors and ../adapters/qwen_mlx/0000800_adapters.safetensors.
Saved final weights to ../adapters/qwen_mlx/adapters.safetensors.
✓ Training completed.


In [3]:
fuse_model(base_model, adapter_path, fused_output)
print(f"Success! Model ready at ./{to_relative(fused_output)}")

Fusing LoRA adapters (Step: Final)...
--- Staging checkpoint from: adapters.safetensors ---
Loading pretrained model


Fetching 11 files: 100%|██████████| 11/11 [00:00<00:00, 20597.03it/s]


Success! Fused model saved to: ./local_models/ft_qwen_mlx
Success! Model ready at ./local_models/ft_qwen_mlx


In [3]:
# 2. Config
MODEL_TYPE = "llama" 
FRAMEWORK = "mlx"

# 3. Define Paths using the new Utils
# This guarantees they match the structure used by the data converter
base_model = get_model_path(MODEL_TYPE)
data_folder = get_processed_data_path()                   # .../Finetuning/data
adapter_path = get_adapters_path(MODEL_TYPE, FRAMEWORK)   # .../Finetuning/adapters/qwen_mlx
fused_output = get_output_dir(MODEL_TYPE, FRAMEWORK)      # .../local_models/ft_qwen_mlx

# 4. Verify Paths (Optional, prints relative paths for clarity)
print(f"--- Configuration ---")
print(f"Base Model:   {base_model}")
print(f"Data Folder:  {to_relative(data_folder)}")
print(f"Adapter Path: {to_relative(adapter_path)}")
print(f"Output Path:  {to_relative(fused_output)}")

--- Configuration ---
Base Model:   meta-llama/Meta-Llama-3-8B-Instruct
Data Folder:  Finetuning/data
Adapter Path: Finetuning/adapters/llama_mlx
Output Path:  local_models/ft_llama_mlx


In [5]:
run_mlx_training(base_model, data_folder, adapter_path, iters=800, num_layers=16)

--- Initializing MLX Fine-tuning (CLI Mode) ---
--- Model: ./meta-llama/Meta-Llama-3-8B-Instruct ---
--- Data Folder: ./../data ---
Generating configuration file at: ./../adapters/llama_mlx/train_config.yaml
Executing MLX CLI training...
Loading configuration file ../adapters/llama_mlx/train_config.yaml
Loading pretrained model


Fetching 11 files: 100%|██████████| 11/11 [00:00<00:00, 15586.94it/s]


Loading datasets
Training
Trainable parameters: 0.261% (20.972M/8030.261M)
Starting training..., iters: 800


Calculating loss...: 130it [02:03,  1.06it/s]


Iter 1: Val loss 1.734, Val took 123.153s
Iter 20: Train loss 2.299, Learning Rate 9.990e-06, It/sec 0.493, Tokens/sec 31.920, Trained Tokens 1294, Peak mem 18.814 GB
Iter 40: Train loss 1.216, Learning Rate 9.950e-06, It/sec 0.586, Tokens/sec 27.998, Trained Tokens 2249, Peak mem 18.814 GB
Iter 60: Train loss 0.581, Learning Rate 9.880e-06, It/sec 0.556, Tokens/sec 18.559, Trained Tokens 2917, Peak mem 18.814 GB
Iter 80: Train loss 0.546, Learning Rate 9.779e-06, It/sec 0.648, Tokens/sec 32.748, Trained Tokens 3928, Peak mem 18.814 GB


Calculating loss...: 130it [02:03,  1.06it/s]


Iter 100: Val loss 0.550, Val took 123.180s
Iter 100: Train loss 0.560, Learning Rate 9.649e-06, It/sec 0.697, Tokens/sec 32.405, Trained Tokens 4858, Peak mem 18.814 GB
Iter 100: Saved adapter weights to ../adapters/llama_mlx/adapters.safetensors and ../adapters/llama_mlx/0000100_adapters.safetensors.
Iter 120: Train loss 0.581, Learning Rate 9.490e-06, It/sec 0.615, Tokens/sec 40.975, Trained Tokens 6190, Peak mem 18.814 GB
Iter 140: Train loss 0.669, Learning Rate 9.304e-06, It/sec 0.501, Tokens/sec 31.162, Trained Tokens 7435, Peak mem 18.814 GB
Iter 160: Train loss 0.595, Learning Rate 9.091e-06, It/sec 0.630, Tokens/sec 28.143, Trained Tokens 8329, Peak mem 18.814 GB
Iter 180: Train loss 0.544, Learning Rate 8.853e-06, It/sec 0.603, Tokens/sec 25.459, Trained Tokens 9174, Peak mem 18.814 GB


Calculating loss...: 130it [02:02,  1.06it/s]


Iter 200: Val loss 0.512, Val took 122.973s
Iter 200: Train loss 0.568, Learning Rate 8.591e-06, It/sec 0.521, Tokens/sec 23.850, Trained Tokens 10089, Peak mem 18.814 GB
Iter 200: Saved adapter weights to ../adapters/llama_mlx/adapters.safetensors and ../adapters/llama_mlx/0000200_adapters.safetensors.
Iter 220: Train loss 0.616, Learning Rate 8.307e-06, It/sec 0.531, Tokens/sec 14.858, Trained Tokens 10649, Peak mem 18.814 GB
Iter 240: Train loss 0.492, Learning Rate 8.002e-06, It/sec 0.558, Tokens/sec 23.999, Trained Tokens 11509, Peak mem 18.814 GB
Iter 260: Train loss 0.419, Learning Rate 7.679e-06, It/sec 0.576, Tokens/sec 48.744, Trained Tokens 13201, Peak mem 18.814 GB
Iter 280: Train loss 0.546, Learning Rate 7.340e-06, It/sec 0.658, Tokens/sec 29.672, Trained Tokens 14103, Peak mem 18.814 GB


Calculating loss...: 130it [02:02,  1.06it/s]


Iter 300: Val loss 0.493, Val took 122.981s
Iter 300: Train loss 0.543, Learning Rate 6.986e-06, It/sec 0.524, Tokens/sec 27.252, Trained Tokens 15143, Peak mem 18.814 GB
Iter 300: Saved adapter weights to ../adapters/llama_mlx/adapters.safetensors and ../adapters/llama_mlx/0000300_adapters.safetensors.
Iter 320: Train loss 0.676, Learning Rate 6.620e-06, It/sec 0.565, Tokens/sec 53.333, Trained Tokens 17032, Peak mem 18.814 GB
Iter 340: Train loss 0.565, Learning Rate 6.243e-06, It/sec 0.559, Tokens/sec 21.828, Trained Tokens 17813, Peak mem 18.814 GB
Iter 360: Train loss 0.526, Learning Rate 5.860e-06, It/sec 0.633, Tokens/sec 27.046, Trained Tokens 18668, Peak mem 18.814 GB
Iter 380: Train loss 0.388, Learning Rate 5.471e-06, It/sec 0.579, Tokens/sec 17.078, Trained Tokens 19258, Peak mem 18.814 GB


Calculating loss...: 130it [02:03,  1.06it/s]


Iter 400: Val loss 0.476, Val took 123.053s
Iter 400: Train loss 0.438, Learning Rate 5.079e-06, It/sec 0.555, Tokens/sec 32.652, Trained Tokens 20434, Peak mem 18.814 GB
Iter 400: Saved adapter weights to ../adapters/llama_mlx/adapters.safetensors and ../adapters/llama_mlx/0000400_adapters.safetensors.
Iter 420: Train loss 0.503, Learning Rate 4.686e-06, It/sec 0.592, Tokens/sec 27.052, Trained Tokens 21348, Peak mem 18.814 GB
Iter 440: Train loss 0.452, Learning Rate 4.295e-06, It/sec 0.449, Tokens/sec 21.511, Trained Tokens 22306, Peak mem 18.814 GB
Iter 460: Train loss 0.298, Learning Rate 3.909e-06, It/sec 0.549, Tokens/sec 28.474, Trained Tokens 23343, Peak mem 18.814 GB
Iter 480: Train loss 0.272, Learning Rate 3.530e-06, It/sec 0.574, Tokens/sec 19.822, Trained Tokens 24034, Peak mem 18.814 GB


Calculating loss...: 130it [02:02,  1.06it/s]


Iter 500: Val loss 0.474, Val took 122.997s
Iter 500: Train loss 0.459, Learning Rate 3.159e-06, It/sec 0.618, Tokens/sec 17.261, Trained Tokens 24593, Peak mem 18.814 GB
Iter 500: Saved adapter weights to ../adapters/llama_mlx/adapters.safetensors and ../adapters/llama_mlx/0000500_adapters.safetensors.
Iter 520: Train loss 0.403, Learning Rate 2.800e-06, It/sec 0.552, Tokens/sec 25.575, Trained Tokens 25520, Peak mem 18.814 GB
Iter 540: Train loss 0.554, Learning Rate 2.455e-06, It/sec 0.569, Tokens/sec 40.120, Trained Tokens 26930, Peak mem 18.814 GB
Iter 560: Train loss 0.350, Learning Rate 2.125e-06, It/sec 0.675, Tokens/sec 22.762, Trained Tokens 27604, Peak mem 18.814 GB
Iter 580: Train loss 0.367, Learning Rate 1.813e-06, It/sec 0.619, Tokens/sec 31.235, Trained Tokens 28614, Peak mem 18.814 GB


Calculating loss...: 130it [02:03,  1.06it/s]


Iter 600: Val loss 0.472, Val took 123.032s
Iter 600: Train loss 0.557, Learning Rate 1.520e-06, It/sec 0.525, Tokens/sec 25.522, Trained Tokens 29586, Peak mem 18.814 GB
Iter 600: Saved adapter weights to ../adapters/llama_mlx/adapters.safetensors and ../adapters/llama_mlx/0000600_adapters.safetensors.
Iter 620: Train loss 0.258, Learning Rate 1.249e-06, It/sec 0.534, Tokens/sec 52.187, Trained Tokens 31540, Peak mem 18.814 GB
Iter 640: Train loss 0.404, Learning Rate 1.002e-06, It/sec 0.566, Tokens/sec 28.780, Trained Tokens 32557, Peak mem 18.814 GB
Iter 660: Train loss 0.246, Learning Rate 7.784e-07, It/sec 0.627, Tokens/sec 22.333, Trained Tokens 33269, Peak mem 18.814 GB
Iter 680: Train loss 0.555, Learning Rate 5.812e-07, It/sec 0.490, Tokens/sec 46.970, Trained Tokens 35185, Peak mem 18.954 GB


Calculating loss...: 130it [02:02,  1.06it/s]


Iter 700: Val loss 0.470, Val took 122.963s
Iter 700: Train loss 0.460, Learning Rate 4.112e-07, It/sec 0.623, Tokens/sec 29.630, Trained Tokens 36136, Peak mem 18.954 GB
Iter 700: Saved adapter weights to ../adapters/llama_mlx/adapters.safetensors and ../adapters/llama_mlx/0000700_adapters.safetensors.
Iter 720: Train loss 0.478, Learning Rate 2.696e-07, It/sec 0.626, Tokens/sec 30.189, Trained Tokens 37101, Peak mem 18.954 GB
Iter 740: Train loss 0.297, Learning Rate 1.571e-07, It/sec 0.609, Tokens/sec 27.446, Trained Tokens 38002, Peak mem 18.954 GB
Iter 760: Train loss 0.368, Learning Rate 7.445e-08, It/sec 0.576, Tokens/sec 23.555, Trained Tokens 38820, Peak mem 18.954 GB
Iter 780: Train loss 0.300, Learning Rate 2.219e-08, It/sec 0.626, Tokens/sec 30.215, Trained Tokens 39785, Peak mem 18.954 GB


Calculating loss...: 130it [02:03,  1.06it/s]


Iter 800: Val loss 0.470, Val took 123.028s
Iter 800: Train loss 0.556, Learning Rate 6.166e-10, It/sec 0.711, Tokens/sec 31.601, Trained Tokens 40674, Peak mem 18.954 GB
Iter 800: Saved adapter weights to ../adapters/llama_mlx/adapters.safetensors and ../adapters/llama_mlx/0000800_adapters.safetensors.
Saved final weights to ../adapters/llama_mlx/adapters.safetensors.
✓ Training completed.


In [4]:
fuse_model(base_model, adapter_path, fused_output, 300)
print(f"Success! Model ready at ./{to_relative(fused_output)}")

Fusing LoRA adapters (Step: 300)...
--- Staging checkpoint from: 0000300_adapters.safetensors ---
Loading pretrained model


Fetching 11 files: 100%|██████████| 11/11 [00:00<00:00, 134120.19it/s]


Success! Fused model saved to: ./local_models/ft_llama_mlx
Success! Model ready at ./local_models/ft_llama_mlx
