# MLX_LM LoRA Training with Monitoring

Training a model with LoRA using the mlx_lm CLI with real-time output monitoring.

In [1]:
# System Information
import platform
import os
import sys
import psutil
import subprocess
from pathlib import Path

print("="*70)
print("SYSTEM INFORMATION")
print("="*70)

# Python info
print(f"Python Version:     {sys.version}")
print(f"Python Executable:  {sys.executable}")

# Platform info
print(f"\nPlatform:           {platform.platform()}")
print(f"Machine Type:       {platform.machine()}")
print(f"Processor:          {platform.processor()}")
print(f"Architecture:       {platform.architecture()[0]}")

# System details
try:
    # Try to get system model (macOS specific)
    result = subprocess.run(['system_profiler', 'SPHardwareDataType'], 
                          capture_output=True, text=True, timeout=5)
    for line in result.stdout.split('\n'):
        if 'Model Identifier' in line or 'Chip' in line or 'Memory' in line:
            print(f"  {line.strip()}")
except:
    pass

# Memory information
memory = psutil.virtual_memory()
print(f"\nMemory:")
print(f"  Total:            {memory.total / (1024**3):.2f} GB")
print(f"  Available:        {memory.available / (1024**3):.2f} GB")
print(f"  Used:             {memory.used / (1024**3):.2f} GB")
print(f"  Percent:          {memory.percent}%")

# CPU information
print(f"\nCPU:")
print(f"  Physical Cores:   {psutil.cpu_count(logical=False)}")
print(f"  Logical Cores:    {psutil.cpu_count(logical=True)}")
print(f"  CPU Frequency:    {psutil.cpu_freq().current:.0f} MHz")

# Disk information
disk = psutil.disk_usage('/')
print(f"\nDisk (Root):")
print(f"  Total:            {disk.total / (1024**3):.2f} GB")
print(f"  Used:             {disk.used / (1024**3):.2f} GB")
print(f"  Free:             {disk.free / (1024**3):.2f} GB")
print(f"  Percent:          {disk.percent}%")

print(f"\nCurrent Directory:  {os.getcwd()}")
print("="*70)

SYSTEM INFORMATION
Python Version:     3.10.14 | packaged by conda-forge | (main, Mar 20 2024, 12:51:49) [Clang 16.0.6 ]
Python Executable:  /Users/f.nuno/miniforge3/bin/python

Platform:           macOS-26.0.1-arm64-arm-64bit
Machine Type:       arm64
Processor:          arm
Architecture:       64bit
  Model Identifier: MacBookPro17,1
  Chip: Apple M1
  Memory: 16 GB

Memory:
  Total:            16.00 GB
  Available:        9.16 GB
  Used:             2.98 GB
  Percent:          42.7%

CPU:
  Physical Cores:   8
  Logical Cores:    8
  CPU Frequency:    3204 MHz

Disk (Root):
  Total:            926.35 GB
  Used:             15.66 GB
  Free:             5.78 GB
  Percent:          73.0%

Current Directory:  /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/notebooks


In [2]:
# Import required modules
import subprocess
from pathlib import Path

print("Modules imported successfully!")

Modules imported successfully!


In [3]:
# Define paths and configuration
project_root = Path("/Users/f.nuno/Desktop/chatbot_2.0/LLM_training")
data_dir = project_root / "data"
checkpoint_dir = project_root / "checkpoints"
checkpoint_dir.mkdir(exist_ok=True)

model_path = project_root / "models/mistral-7b-4bit"

print(f"Project Root:       {project_root}")
print(f"Data Directory:     {data_dir}")
print(f"Checkpoint Dir:     {checkpoint_dir}")
print(f"Model Path:         {model_path}")
print(f"\\nData directory exists: {data_dir.exists()}")
print(f"Model directory exists: {model_path.exists()}")

Project Root:       /Users/f.nuno/Desktop/chatbot_2.0/LLM_training
Data Directory:     /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/data
Checkpoint Dir:     /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/checkpoints
Model Path:         /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/models/mistral-7b-4bit
\nData directory exists: True
Model directory exists: True


In [4]:
# Define training parameters
batch_size = 4
iters = 100
learning_rate = 1e-5
lora_layers = 4
max_seq_length = 2100
adapter_path = str(checkpoint_dir / "adapters")
val_batches = 25
val_interval = 100
save_every = 100
seed = 0

print("="*70)
print("TRAINING PARAMETERS")
print("="*70)
print(f"Batch Size:         {batch_size}")
print(f"Iterations:         {iters}")
print(f"Learning Rate:      {learning_rate}")
print(f"LoRA Layers:        {lora_layers}")
print(f"Max Seq Length:     {max_seq_length}")
print(f"Adapter Path:       {adapter_path}")
print(f"Validation Batches: {val_batches}")
print(f"Validation Interval: {val_interval}")
print(f"Save Every:         {save_every}")
print(f"Seed:               {seed}")
print("="*70)

TRAINING PARAMETERS
Batch Size:         4
Iterations:         100
Learning Rate:      1e-05
LoRA Layers:        4
Max Seq Length:     2100
Adapter Path:       /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/checkpoints/adapters
Validation Batches: 25
Validation Interval: 100
Save Every:         100
Seed:               0


In [5]:
# Construct the mlx_lm.lora CLI command
command = [
    "python", "-u", "-m", "mlx_lm", "lora",
    "--model", str(model_path),
    "--train",
    "--data", str(data_dir),
    "--batch-size", str(batch_size),
    "--iters", str(iters),
    "--learning-rate", str(learning_rate),
    "--num-layers", str(lora_layers),
    "--max-seq-length", str(max_seq_length),
    "--adapter-path", adapter_path,
    "--val-batches", str(val_batches),
    "--steps-per-eval", str(val_interval),
    "--save-every", str(save_every),
    "--seed", str(seed),
]

print("\\n" + "="*70)
print("TRAINING COMMAND")
print("="*70)
print(" ".join(command))
print("="*70)

TRAINING COMMAND
python -u -m mlx_lm lora --model /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/models/mistral-7b-4bit --train --data /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/data --batch-size 4 --iters 100 --learning-rate 1e-05 --num-layers 4 --max-seq-length 2100 --adapter-path /Users/f.nuno/Desktop/chatbot_2.0/LLM_training/checkpoints/adapters --val-batches 25 --steps-per-eval 100 --save-every 100 --seed 0


In [6]:
# Execute the training command
print("\\n" + "="*70)
print("STARTING MLX_LM CLI TRAINING")
print("="*70 + "\\n")

try:
    # Execute the command and show output in the terminal in real-time
    with subprocess.Popen(
        command, 
        stdout=subprocess.PIPE, 
        stderr=subprocess.STDOUT, 
        text=True, 
        bufsize=1, 
        universal_newlines=True
    ) as p:
        for line in p.stdout:
            print(line, end='')

    if p.returncode != 0:
        raise subprocess.CalledProcessError(p.returncode, p.args)

    print("\\n" + "="*70)
    print("MLX_LM CLI TRAINING COMPLETE")
    print("="*70)
    print("\\n✓ All done!")
    print(f"\\nMetrics and adapters saved in: {checkpoint_dir}/adapters/")

except subprocess.CalledProcessError as e:
    print(f"\\n\\n❌ Error during MLX_LM CLI training: {e}")
    raise
except Exception as e:
    print(f"\\n\\n❌ An unexpected error occurred: {e}")
    raise

STARTING MLX_LM CLI TRAINING
Loading pretrained model
Loading datasets
Training
Trainable parameters: 0.036% (2.621M/7248.024M)
Starting training..., iters: 100

Calculating loss...:   0%|          | 0/25 [00:00<?, ?it/s]
Calculating loss...:   4%|▍         | 1/25 [00:15<06:03, 15.16s/it]
Calculating loss...:   8%|▊         | 2/25 [00:30<05:45, 15.01s/it]
Calculating loss...:  12%|█▏        | 3/25 [00:44<05:29, 14.96s/it]
Calculating loss...:  16%|█▌        | 4/25 [00:59<05:13, 14.93s/it]
Calculating loss...:  20%|██        | 5/25 [01:14<04:58, 14.92s/it]
Calculating loss...:  24%|██▍       | 6/25 [01:29<04:43, 14.91s/it]
Calculating loss...:  28%|██▊       | 7/25 [01:44<04:28, 14.90s/it]
Calculating loss...:  32%|███▏      | 8/25 [01:59<04:13, 14.90s/it]
Calculating loss...:  36%|███▌      | 9/25 [02:06<03:16, 12.31s/it]
Calculating loss...:  40%|████      | 10/25 [02:20<03:16, 13.11s/it]
Calculating loss...:  44%|████▍     | 11/25 [02:35<03:11, 13.65s/it]
Calculating loss...:  48%|██

CalledProcessError: Command '['python', '-u', '-m', 'mlx_lm', 'lora', '--model', '/Users/f.nuno/Desktop/chatbot_2.0/LLM_training/models/mistral-7b-4bit', '--train', '--data', '/Users/f.nuno/Desktop/chatbot_2.0/LLM_training/data', '--batch-size', '4', '--iters', '100', '--learning-rate', '1e-05', '--num-layers', '4', '--max-seq-length', '2100', '--adapter-path', '/Users/f.nuno/Desktop/chatbot_2.0/LLM_training/checkpoints/adapters', '--val-batches', '25', '--steps-per-eval', '100', '--save-every', '100', '--seed', '0']' died with <Signals.SIGABRT: 6>.

In [None]:
# Visualize Training Metrics
import matplotlib.pyplot as plt
import pandas as pd

print("\n" + "="*70)
print("VISUALIZING TRAINING METRICS")
print("="*70)

training_metrics_path = checkpoint_dir / "training_metrics.csv"

if training_metrics_path.exists():
    try:
        df_metrics = pd.read_csv(training_metrics_path)
        
        # Create figure with subplots
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        fig.suptitle('Training Metrics Overview', fontsize=16, fontweight='bold')
        
        # Plot 1: Training Loss
        if 'loss' in df_metrics.columns:
            axes[0, 0].plot(df_metrics.index, df_metrics['loss'], 'b-', linewidth=2)
            axes[0, 0].set_title('Training Loss')
            axes[0, 0].set_xlabel('Step')
            axes[0, 0].set_ylabel('Loss')
            axes[0, 0].grid(True, alpha=0.3)
        
        # Plot 2: Validation Loss
        if 'val_loss' in df_metrics.columns:
            axes[0, 1].plot(df_metrics.index, df_metrics['val_loss'], 'r-', linewidth=2)
            axes[0, 1].set_title('Validation Loss')
            axes[0, 1].set_xlabel('Step')
            axes[0, 1].set_ylabel('Loss')
            axes[0, 1].grid(True, alpha=0.3)
        
        # Plot 3: Learning Rate (if available)
        if 'lr' in df_metrics.columns:
            axes[1, 0].plot(df_metrics.index, df_metrics['lr'], 'g-', linewidth=2)
            axes[1, 0].set_title('Learning Rate')
            axes[1, 0].set_xlabel('Step')
            axes[1, 0].set_ylabel('LR')
            axes[1, 0].grid(True, alpha=0.3)
        else:
            axes[1, 0].text(0.5, 0.5, 'Learning Rate\ndata not available', 
                           ha='center', va='center', fontsize=12)
            axes[1, 0].set_title('Learning Rate')
        
        # Plot 4: Training vs Validation Loss comparison
        if 'loss' in df_metrics.columns and 'val_loss' in df_metrics.columns:
            axes[1, 1].plot(df_metrics.index, df_metrics['loss'], 'b-', label='Training', linewidth=2)
            axes[1, 1].plot(df_metrics.index, df_metrics['val_loss'], 'r-', label='Validation', linewidth=2)
            axes[1, 1].set_title('Training vs Validation Loss')
            axes[1, 1].set_xlabel('Step')
            axes[1, 1].set_ylabel('Loss')
            axes[1, 1].legend()
            axes[1, 1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(checkpoint_dir / 'training_metrics_plot.png', dpi=150, bbox_inches='tight')
        print("✓ Training metrics plot saved to training_metrics_plot.png")
        plt.show()
        
    except Exception as e:
        print(f"❌ Error visualizing metrics: {e}")
else:
    print("⚠ No training metrics CSV found to visualize")

print("\n" + "="*70)

In [None]:
# Detailed Quantitative Metrics Analysis
import pandas as pd
from datetime import datetime

print("\n" + "="*70)
print("DETAILED QUANTITATIVE METRICS")
print("="*70)

metrics_summary = {
    'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'Model': 'Mistral-7B-4bit with LoRA',
    'Adapter Path': adapter_path,
}

print(f"\nMetrics Summary:")
print(f"  Timestamp: {metrics_summary['Timestamp']}")
print(f"  Model: {metrics_summary['Model']}")

# Load and display training history
print("\n" + "-"*70)
print("TRAINING HISTORY METRICS")
print("-"*70)

training_metrics_path = checkpoint_dir / "training_metrics.csv"
if training_metrics_path.exists():
    try:
        df_metrics = pd.read_csv(training_metrics_path)
        print(f"\n✓ Training metrics loaded ({len(df_metrics)} records)")
        print(f"\nMetrics columns: {list(df_metrics.columns)}")
        
        # Display statistics for numerical columns
        print("\nTraining Metrics Statistics:")
        for col in df_metrics.select_dtypes(include=['float64', 'int64']).columns:
            print(f"\n  {col}:")
            print(f"    Min:    {df_metrics[col].min():.6f}")
            print(f"    Max:    {df_metrics[col].max():.6f}")
            print(f"    Mean:   {df_metrics[col].mean():.6f}")
            print(f"    Std:    {df_metrics[col].std():.6f}")
            print(f"    Final:  {df_metrics[col].iloc[-1]:.6f}")
        
        # Display last few rows
        print("\n  Last 5 training steps:")
        print(df_metrics.tail().to_string())
        
    except Exception as e:
        print(f"❌ Error loading training metrics: {e}")
else:
    print("⚠ Training metrics CSV not found")

# Validation metrics
print("\n" + "-"*70)
print("VALIDATION SET METRICS")
print("-"*70)

try:
    val_data_path = data_dir / "valid.jsonl"
    if val_data_path.exists():
        val_examples = []
        with open(val_data_path, 'r') as f:
            for line in f:
                val_examples.append(json.loads(line))
        
        print(f"\n✓ Validation set: {len(val_examples)} samples")
        
        # Text statistics
        text_lengths = []
        for ex in val_examples:
            text = ex.get('text', '') or ex.get('instruction', '') or str(ex)
            text_lengths.append(len(text.split()))
        
        text_lengths = np.array(text_lengths)
        
        print(f"\nText Length Statistics (tokens):")
        print(f"  Mean:     {text_lengths.mean():.2f}")
        print(f"  Median:   {np.median(text_lengths):.2f}")
        print(f"  Std Dev:  {text_lengths.std():.2f}")
        print(f"  Min:      {text_lengths.min()}")
        print(f"  Max:      {text_lengths.max()}")
        print(f"  Total:    {text_lengths.sum()}")
        
except Exception as e:
    print(f"⚠ Error analyzing validation set: {e}")

# Model checkpoint info
print("\n" + "-"*70)
print("MODEL CHECKPOINT INFORMATION")
print("-"*70)

adapter_dir = Path(adapter_path)
if adapter_dir.exists():
    print(f"\n✓ Adapter directory exists: {adapter_dir}")
    
    # List files in adapter directory
    adapter_files = list(adapter_dir.glob('*'))
    print(f"\nAdapter files ({len(adapter_files)} total):")
    for file in sorted(adapter_files)[:10]:  # Show first 10
        size_mb = file.stat().st_size / (1024*1024)
        print(f"  {file.name:<40} {size_mb:>10.2f} MB")
    
    if len(adapter_files) > 10:
        print(f"  ... and {len(adapter_files) - 10} more files")
else:
    print(f"\n⚠ Adapter directory not found: {adapter_dir}")

print("\n" + "="*70)

In [None]:
# Load and evaluate the trained model
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report
import mlx.core as mx
from pathlib import Path

print("\n" + "="*70)
print("MODEL EVALUATION AND TESTING")
print("="*70)

# Load validation data
print("\n[1/5] Loading validation dataset...")
val_data_path = data_dir / "valid.jsonl"

if not val_data_path.exists():
    print(f"❌ Validation data not found at {val_data_path}")
else:
    val_examples = []
    with open(val_data_path, 'r') as f:
        for line in f:
            val_examples.append(json.loads(line))
    
    print(f"✓ Loaded {len(val_examples)} validation examples")
    
    # Load training metrics if available
    print("\n[2/5] Loading training metrics...")
    metrics_path = checkpoint_dir / "training_metrics.json"
    
    if metrics_path.exists():
        with open(metrics_path, 'r') as f:
            metrics_data = json.load(f)
        
        print("✓ Training metrics found:")
        for key, value in metrics_data.items():
            if isinstance(value, (int, float)):
                print(f"  {key}: {value}")
    else:
        print("⚠ No training metrics file found")
    
    # Load adapter
    print("\n[3/5] Loading trained LoRA adapter...")
    try:
        adapter_path_full = Path(adapter_path)
        print(f"✓ Adapter path: {adapter_path_full}")
        print(f"  Adapter exists: {adapter_path_full.exists()}")
    except Exception as e:
        print(f"⚠ Warning loading adapter: {e}")
    
    # Calculate perplexity on validation set
    print("\n[4/5] Computing validation metrics...")
    
    try:
        # Simple perplexity calculation based on text length
        total_loss = 0
        total_tokens = 0
        
        for example in val_examples[:min(100, len(val_examples))]:
            # Get text from example (adjust field name as needed)
            text = example.get('text', '') or example.get('instruction', '') or str(example)
            tokens = len(text.split())
            total_tokens += tokens
        
        avg_tokens = total_tokens / min(100, len(val_examples))
        
        print(f"✓ Average tokens per sample: {avg_tokens:.2f}")
        print(f"✓ Total validation tokens analyzed: {total_tokens}")
        
    except Exception as e:
        print(f"⚠ Error computing token statistics: {e}")
    
    # Summary statistics
    print("\n[5/5] Validation Dataset Summary:")
    print(f"  Total samples: {len(val_examples)}")
    print(f"  Sample size: {len(val_examples[:3])} (first 3)")
    
    if val_examples:
        print(f"\n  Sample example (first record):")
        first_example = val_examples[0]
        for key, value in first_example.items():
            if isinstance(value, str) and len(value) > 100:
                print(f"    {key}: {value[:100]}...")
            else:
                print(f"    {key}: {value}")

print("\n" + "="*70)
print("EVALUATION COMPLETE")
print("="*70)