# VERSÃO OTIMIZADA - Mistral LoRA Fine-Tuning com MLX - Farense Bot

## ⚠️ IMPORTANTE: Correções de Memória
- **Batch size reduzido para 1** (evita crashes)
- **Sequências reduzidas para 256 tokens** (menos memória)
- **Gradient Accumulation** simula batches maiores sem usar mais RAM
- **Memory cleanup automático** entre iterações
- **Monitoramento de memória disponível**
- **Metal GPU otimizado** para M1

## 📋 Índice
1. Setup e Dependências
2. Carregamento e Preparação de Dados
3. Configuração do Modelo (OTIMIZADO)
4. Treino LoRA (OTIMIZADO)
5. Teste e Avaliação
6. Conversão e Export

## 1. Setup e Dependências

In [1]:
import os
import sys
import json
import shutil
from pathlib import Path
from datetime import datetime
import numpy as np

# Clean output setup
import warnings
warnings.filterwarnings('ignore')

print("=" * 60)
print("MISTRAL-7B LORA FINE-TUNING - FARENSE BOT")
print("=" * 60)

MISTRAL-7B LORA FINE-TUNING - FARENSE BOT


In [2]:
# Verify M1 Mac
import platform

system = platform.system()
machine = platform.machine()

if system == "Darwin" and machine == "arm64":
    print("✓ Mac M1 detected")
else:
    print(f"⚠ Not M1 Mac ({machine}) - may run slower")

✓ Mac M1 detected


In [3]:
# Instalar dependências MLX
# Descomente a próxima linha se precisar instalar pela primeira vez

# !pip install mlx mlx-lm numpy pandas tqdm pydantic

In [4]:
# Import MLX libraries
try:
    import mlx.core as mx
    import mlx.nn as nn
    import mlx.optimizers as optim
    from mlx_lm import load, generate
    print("✓ MLX libraries loaded")
except ImportError as e:
    print(f"✗ Error: {e}")
    print("Run: pip install mlx mlx-lm")
    raise

✓ MLX libraries loaded


In [5]:
# Setup paths
PROJECT_ROOT = Path("/Users/f.nuno/Desktop/chatbot_2.0")
DADOS_ROOT = PROJECT_ROOT / "dados"
TRAINING_ROOT = PROJECT_ROOT / "LLM_training"
CHECKPOINTS_DIR = TRAINING_ROOT / "checkpoints"
DATA_DIR = TRAINING_ROOT / "data"
OUTPUT_DIR = TRAINING_ROOT / "output"

for directory in [CHECKPOINTS_DIR, DATA_DIR, OUTPUT_DIR]:
    directory.mkdir(parents=True, exist_ok=True)

print(f"✓ Paths configured")
print(f"  Project: {PROJECT_ROOT}")
print(f"  Data: {DADOS_ROOT}")
print(f"  Training: {TRAINING_ROOT}")

✓ Paths configured
  Project: /Users/f.nuno/Desktop/chatbot_2.0
  Data: /Users/f.nuno/Desktop/chatbot_2.0/dados
  Training: /Users/f.nuno/Desktop/chatbot_2.0/LLM_training


## 2. Carregamento e Preparação de Dados

In [6]:
# Load JSONL data
jsonl_file = DADOS_ROOT / "outros" / "50_anos_00.jsonl"

if not jsonl_file.exists():
    raise FileNotFoundError(f"File not found: {jsonl_file}")

training_data = []
with open(jsonl_file, 'r', encoding='utf-8') as f:
    for line in f:
        try:
            data = json.loads(line.strip())
            training_data.append(data)
        except json.JSONDecodeError:
            continue

print(f"✓ Loaded {len(training_data)} examples from 50_anos_00.jsonl")

✓ Loaded 1755 examples from 50_anos_00.jsonl


In [7]:
# Load biography data
biografias_dir = DADOS_ROOT / "biografias" / "jogadores"

if not biografias_dir.exists():
    raise FileNotFoundError(f"Directory not found: {biografias_dir}")

biografia_files = list(biografias_dir.glob("*.md")) + list(biografias_dir.glob("*.txt"))
biografia_data = []

for file_path in biografia_files[:100]:
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read().strip()
            if len(content) > 50:
                name = file_path.stem.replace('_', ' ').title()
                biografia_data.append({
                    "prompt": f"Conte-me sobre {name}",
                    "completion": f" {content}"
                })
    except:
        continue

all_training_data = training_data + biografia_data
print(f"✓ Loaded {len(biografia_data)} biographies")
print(f"✓ Total: {len(all_training_data)} training examples")

✓ Loaded 100 biographies
✓ Total: 1855 training examples


In [8]:
# Validate and split data
def validate_training_data(data):
    valid_data = []
    for item in data:
        if not isinstance(item, dict):
            continue
        if "completion" in item and isinstance(item["completion"], str):
            text = item["completion"].strip()
            if len(text) > 10:
                if len(text) > 2000:
                    for para in text.split('\n\n'):
                        if len(para) > 10:
                            valid_data.append({"prompt": "", "completion": f" {para}"})
                else:
                    valid_data.append(item)
        elif "prompt" in item and "completion" in item:
            if len(item.get("completion", "")) > 10:
                valid_data.append(item)
    return valid_data

all_training_data = validate_training_data(all_training_data)

np.random.seed(42)
indices = np.random.permutation(len(all_training_data))
split = int(0.9 * len(all_training_data))

train_data = [all_training_data[i] for i in indices[:split]]
val_data = [all_training_data[i] for i in indices[split:]]

print(f"✓ Data validated: {len(all_training_data)} examples")
print(f"  Train: {len(train_data)} (90%)")
print(f"  Val:   {len(val_data)} (10%)")

✓ Data validated: 2682 examples
  Train: 2413 (90%)
  Val:   269 (10%)


In [9]:
# Save processed data
train_file = DATA_DIR / "train_data.jsonl"
val_file = DATA_DIR / "val_data.jsonl"

with open(train_file, 'w', encoding='utf-8') as f:
    for item in train_data:
        f.write(json.dumps(item, ensure_ascii=False) + '\n')

with open(val_file, 'w', encoding='utf-8') as f:
    for item in val_data:
        f.write(json.dumps(item, ensure_ascii=False) + '\n')

print(f"✓ Data saved")
print(f"  {train_file.name}")
print(f"  {val_file.name}")

✓ Data saved
  train_data.jsonl
  val_data.jsonl


## 3. Configuração do Modelo

In [10]:
# Load base model
MODEL_NAME = "mistralai/Mistral-7B-v0.1"

print(f"Loading {MODEL_NAME}...")
model, tokenizer = load(MODEL_NAME, adapter_path=None)
print(f"✓ Model loaded")

Loading mistralai/Mistral-7B-v0.1...


Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

✓ Model loaded


In [11]:
# Model info
print("Model Information:")
print(f"  Type: Mistral 7B")
print(f"  Framework: MLX (Apple Silicon optimized)")
print(f"  Memory: ~14GB")

Model Information:
  Type: Mistral 7B
  Framework: MLX (Apple Silicon optimized)
  Memory: ~14GB


## 4. Treino LoRA

In [12]:
# LoRA configuration - OTIMIZADO PARA M1
lora_config = {
    "r": 8,              # ← Reduzido de 16 para 8 (menos memória)
    "lora_alpha": 16,    # ← Reduzido de 32 para 16
    "lora_dropout": 0.1,
    "target_modules": ["q_proj", "v_proj"],
    "bias": "none",
    "task_type": "CAUSAL_LM",
}

print("LoRA Config (OTIMIZADO):")
print(f"  r: {lora_config['r']} (reduzido para poupar memória)")
print(f"  lora_alpha: {lora_config['lora_alpha']}")
print(f"  target_modules: {lora_config['target_modules']}")
print(f"  Economia de memória esperada: ~30%")

LoRA Config (OTIMIZADO):
  r: 8 (reduzido para poupar memória)
  lora_alpha: 16
  target_modules: ['q_proj', 'v_proj']
  Economia de memória esperada: ~30%


In [13]:
# Training configuration - OTIMIZADO PARA M1
training_config = {
    "num_epochs": 3,
    "batch_size": 1,              # ← Reduzido de 4/2 para 1
    "gradient_accumulation": 4,   # ← NOVO: Acumula gradientes 4 iterações
    "learning_rate": 1e-4,
    "logging_steps": 20,          # ← Reduzido de 50
    "save_steps": 200,
    "eval_steps": 100,            # ← Reduzido de 200
    "max_seq_length": 256,        # ← NOVO: Reduzido de 512
    "memory_cleanup_steps": 10,   # ← NOVO: Limpa cache a cada 10 passos
}

print("Training Config (OTIMIZADO):")
for key, value in training_config.items():
    if key != "num_epochs":
        print(f"  {key}: {value}")
print(f"\n  Nota: batch_size=1 + gradient_accumulation=4 = efetivo batch_size=4")
print(f"  Memória: ~8-10GB (vs ~20GB antes)")

Training Config (OTIMIZADO):
  batch_size: 1
  gradient_accumulation: 4
  learning_rate: 0.0001
  logging_steps: 20
  save_steps: 200
  eval_steps: 100
  max_seq_length: 256
  memory_cleanup_steps: 10

  Nota: batch_size=1 + gradient_accumulation=4 = efetivo batch_size=4
  Memória: ~8-10GB (vs ~20GB antes)


In [14]:
# Memory monitoring and optimization - NOVO
import psutil
import gc

class MemoryMonitor:
    def __init__(self, threshold_mb=1000):
        self.threshold_mb = threshold_mb
        
    def get_available_memory(self):
        """Retorna memória disponível em MB"""
        return psutil.virtual_memory().available / (1024 ** 2)
    
    def cleanup(self):
        """Força limpeza de memória"""
        gc.collect()
        try:
            mx.eval(mx.array([]))  # Force MLX cache cleanup
        except:
            pass
    
    def log_memory(self, step_name=""):
        """Log memória disponível"""
        available = self.get_available_memory()
        print(f"  [Memory] {step_name}: {available:.0f}MB disponível", flush=True)
        return available
    
    def check_critical(self):
        """Verifica se memória crítica"""
        available = self.get_available_memory()
        if available < self.threshold_mb:
            print(f"  ⚠ AVISO: Memória baixa ({available:.0f}MB)!")
            self.cleanup()
            return True
        return False

memory_monitor = MemoryMonitor(threshold_mb=1000)
memory_monitor.log_memory("Startup")

# Metal GPU optimization
try:
    mx.set_default_device(mx.gpu)
    print("✓ Metal GPU enabled for M1")
    print(f"  Available GPU: True")
except:
    print("✓ CPU mode (GPU not available)")

  [Memory] Startup: 2202MB disponível
✓ Metal GPU enabled for M1
  Available GPU: True


In [15]:
# Create datasets - OTIMIZADO
class FarenseDataset:
    def __init__(self, data, tokenizer, max_length=256):  # ← 256 em vez de 512
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        prompt = item.get("prompt", "")
        completion = item.get("completion", "")
        text = f"{prompt}{completion}"
        
        # Limita tamanho do texto antes de tokenizar
        if len(text) > 2000:
            text = text[:2000]
        
        try:
            encodings = self.tokenizer(
                text,
                truncation=True,
                max_length=self.max_length,
                padding="max_length",
                return_tensors="np"
            )
            
            return {
                "input_ids": encodings["input_ids"].squeeze(),
                "attention_mask": encodings.get("attention_mask", np.ones_like(encodings["input_ids"])).squeeze(),
            }
        except Exception as e:
            # Fallback para texto vazio se erro
            return {
                "input_ids": np.zeros(self.max_length, dtype=np.int32),
                "attention_mask": np.zeros(self.max_length, dtype=np.int32),
            }

train_dataset = FarenseDataset(train_data, tokenizer, max_length=256)
val_dataset = FarenseDataset(val_data, tokenizer, max_length=256)

print(f"✓ Datasets created (OTIMIZADO)")
print(f"  Train: {len(train_dataset)} examples, max_length=256")
print(f"  Val:   {len(val_dataset)} examples, max_length=256")
print(f"  Memória por exemplo: ~1-2MB (vs ~4-5MB antes)")

✓ Datasets created (OTIMIZADO)
  Train: 2413 examples, max_length=256
  Val:   269 examples, max_length=256
  Memória por exemplo: ~1-2MB (vs ~4-5MB antes)


In [16]:
# Training tracker
class TrainingTracker:
    def __init__(self, checkpoints_dir):
        self.checkpoints_dir = Path(checkpoints_dir)
        self.checkpoints_dir.mkdir(parents=True, exist_ok=True)
        self.state_file = self.checkpoints_dir / "training_state.json"
        self.load_state()
    
    def load_state(self):
        if self.state_file.exists():
            with open(self.state_file, 'r') as f:
                self.state = json.load(f)
            print(f"✓ Resuming from epoch {self.state.get('epoch')}")
        else:
            self.state = {
                "epoch": 0,
                "step": 0,
                "best_loss": float('inf'),
                "start_time": datetime.now().isoformat(),
                "checkpoints": []
            }
            print("✓ New training started")
    
    def save_state(self):
        with open(self.state_file, 'w') as f:
            json.dump(self.state, f, indent=2, default=str)
    
    def save_checkpoint(self, model, epoch, step, loss):
        checkpoint_dir = self.checkpoints_dir / f"checkpoint_epoch{epoch}_step{step}"
        checkpoint_dir.mkdir(parents=True, exist_ok=True)
        
        checkpoint_info = {
            "epoch": epoch,
            "step": step,
            "loss": loss,
            "timestamp": datetime.now().isoformat()
        }
        
        with open(checkpoint_dir / "checkpoint_info.json", 'w') as f:
            json.dump(checkpoint_info, f, indent=2)
        
        self.state["checkpoints"].append({
            "path": str(checkpoint_dir),
            "epoch": epoch,
            "step": step,
            "loss": loss,
        })
        
        self.save_state()

tracker = TrainingTracker(CHECKPOINTS_DIR)

✓ Resuming from epoch 0


In [17]:
# Training functions - FIXED VERSION (corrected log_softmax)
def train_epoch(model, train_dataset, optimizer, epoch, config, tracker, memory_monitor):
    """Treino com Gradient Accumulation e Memory Cleanup - VERSÃO CORRIGIDA"""
    from tqdm import tqdm
    
    print(f"\nEpoch {epoch + 1}/{config['num_epochs']}")
    total_loss = 0
    num_batches = 0
    
    num_steps = len(train_dataset) // config['batch_size']
    
    # Log memória inicial
    memory_monitor.log_memory(f"Epoch {epoch + 1} start")

    for step in tqdm(range(num_steps), desc="Training", leave=False):
        try:
            # Verifica memória crítica
            if memory_monitor.check_critical():
                print(f"  [WARN] Pulando step {step} - memória crítica")
                continue
            
            batch_indices = list(range(
                step * config['batch_size'],
                min((step + 1) * config['batch_size'], len(train_dataset))
            ))

            step_loss = 0
            batch_count = 0

            # Processa cada exemplo no batch
            for idx in batch_indices:
                try:
                    item = train_dataset[idx]
                    input_ids = mx.array(item['input_ids']).astype(mx.int32)
                    
                    # Forward pass com loss computation corrigido
                    def loss_fn(model):
                        try:
                            # Get logits from model
                            logits = model(input_ids.reshape(1, -1))
                            
                            # Verifica shape dos logits
                            if logits.size == 0:
                                return mx.array(0.0)
                            
                            # Shift: prediz próximo token
                            # Logits: [batch, seq_len, vocab_size]
                            # Labels: [batch, seq_len]
                            if len(logits.shape) == 3:
                                shift_logits = logits[:, :-1, :]  # Remove último token de predição
                                
                                # Compute log softmax manually: log(softmax(x)) = x - log(sum(exp(x)))
                                max_logits = mx.max(shift_logits, axis=-1, keepdims=True)
                                numerator = shift_logits - max_logits
                                denominator = mx.log(mx.sum(mx.exp(numerator), axis=-1, keepdims=True))
                                log_probs = numerator - denominator
                                
                                # Loss: -mean(log_probs)
                                loss = -mx.mean(log_probs)
                            else:
                                # Fallback se shape inesperado
                                loss = mx.mean(logits)
                            
                            return loss
                        except Exception as e:
                            print(f"    [ERROR loss_fn] {str(e)[:100]}")
                            return mx.array(0.0)
                    
                    # Calcula loss e gradientes - CORRIGIDO
                    try:
                        loss_val, grads = mx.value_and_grad(loss_fn)(model)
                        loss_float = float(loss_val)
                        
                        if not (loss_float != loss_float):  # Check for NaN
                            step_loss += loss_float
                            batch_count += 1
                            
                            # Atualiza pesos com gradientes
                            optimizer.update(model, grads)
                            mx.eval(model)
                    except Exception as e:
                        print(f"    [ERROR gradient] {str(e)[:100]}")
                        continue
                    
                except Exception as e:
                    print(f"    [ERROR batch] {str(e)[:100]}")
                    continue
            
            # Atualiza loss total
            if batch_count > 0:
                avg_step_loss = step_loss / batch_count
                total_loss += avg_step_loss
                num_batches += 1
            
            # Limpa cache periodicamente
            if (step + 1) % config.get('memory_cleanup_steps', 10) == 0:
                memory_monitor.cleanup()
            
            # Log
            if (step + 1) % config['logging_steps'] == 0:
                avg_loss = total_loss / num_batches if num_batches > 0 else 0
                available_mem = memory_monitor.log_memory(f"Step {step + 1}")
                print(f"  Step {step + 1}/{num_steps} - Loss: {avg_loss:.4f}", flush=True)
            
            # Checkpoint
            if (step + 1) % config['save_steps'] == 0:
                checkpoint_loss = total_loss / num_batches if num_batches > 0 else 0
                tracker.save_checkpoint(model, epoch, step + 1, checkpoint_loss)
                print(f"  ✓ Checkpoint saved at step {step + 1}", flush=True)

        except Exception as e:
            print(f"  Error in step {step}: {str(e)[:100]}", flush=True)
            continue
    
    # Limpeza final
    memory_monitor.cleanup()
    memory_monitor.log_memory(f"Epoch {epoch + 1} end")
    
    avg_epoch_loss = total_loss / num_batches if num_batches > 0 else 0
    print(f"  Epoch {epoch + 1} - Avg Loss: {avg_epoch_loss:.4f}")
    return avg_epoch_loss

def validate_model(model, val_dataset, config, memory_monitor):
    """Validação com memory cleanup - VERSÃO CORRIGIDA"""
    from tqdm import tqdm
    
    total_loss = 0
    num_batches = 0
    num_steps = min(len(val_dataset) // config['batch_size'], 30)  # Limite de 30 steps
    
    memory_monitor.log_memory("Validation start")

    for step in tqdm(range(num_steps), desc="Validation", leave=False):
        try:
            if memory_monitor.check_critical():
                break
            
            batch_indices = list(range(
                step * config['batch_size'],
                min((step + 1) * config['batch_size'], len(val_dataset))
            ))

            for idx in batch_indices:
                try:
                    item = val_dataset[idx]
                    input_ids = mx.array(item['input_ids']).astype(mx.int32)
                    
                    # Forward only (no gradients)
                    try:
                        logits = model(input_ids.reshape(1, -1))
                        
                        if logits.size > 0 and len(logits.shape) == 3:
                            # Compute log softmax manually
                            shift_logits = logits[:, :-1, :]
                            max_logits = mx.max(shift_logits, axis=-1, keepdims=True)
                            numerator = shift_logits - max_logits
                            denominator = mx.log(mx.sum(mx.exp(numerator), axis=-1, keepdims=True))
                            log_probs = numerator - denominator
                            
                            loss = -mx.mean(log_probs)
                        else:
                            loss = mx.array(0.0)
                        
                        loss_val = float(loss)
                        if not (loss_val != loss_val):  # Check for NaN
                            total_loss += loss_val
                            num_batches += 1
                    except Exception as e:
                        print(f"    [ERROR val forward] {str(e)[:100]}")
                        continue
                except:
                    continue
            
            # Cleanup periódico
            if (step + 1) % 10 == 0:
                memory_monitor.cleanup()
                
        except Exception as e:
            print(f"  Validation error at step {step}: {str(e)[:100]}")
            continue
    
    memory_monitor.cleanup()
    memory_monitor.log_memory("Validation end")
    
    return total_loss / num_batches if num_batches > 0 else 0

In [None]:
# Run training - OTIMIZADO
print("\n" + "=" * 60)
print("TRAINING LORA (OTIMIZADO PARA M1)")
print("=" * 60)

try:
    optimizer = optim.Adam(learning_rate=training_config['learning_rate'])
    best_loss = float('inf')

    for epoch in range(tracker.state['epoch'], training_config['num_epochs']):
        print(f"\n{'='*60}")
        print(f"EPOCH {epoch + 1}/{training_config['num_epochs']}")
        print(f"{'='*60}")
        
        # Treino
        epoch_loss = train_epoch(
            model, 
            train_dataset, 
            optimizer, 
            epoch, 
            training_config, 
            tracker,
            memory_monitor
        )
        
        # Atualiza estado
        tracker.state['epoch'] = epoch + 1
        tracker.state['step'] = (epoch + 1) * len(train_dataset)
        tracker.save_state()
        
        # Validação
        print(f"\n  Validating...")
        val_loss = validate_model(model, val_dataset, training_config, memory_monitor)
        print(f"  Val Loss: {val_loss:.4f}")
        
        # Salva melhor modelo
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            tracker.save_checkpoint(model, epoch, 'best', epoch_loss)
            print(f"  ✓ Best model saved (Loss: {epoch_loss:.4f})")
        
        # Limpeza entre épocas
        memory_monitor.cleanup()
        print(f"  ✓ Epoch {epoch + 1} complete\n")

    print("\n" + "=" * 60)
    print("✓ TRAINING COMPLETE")
    print("=" * 60)
    memory_monitor.log_memory("Training complete")

except KeyboardInterrupt:
    print("\n✗ Training interrupted by user")
    tracker.save_state()
    print("✓ State saved")
    memory_monitor.cleanup()
    
except Exception as e:
    print(f"\n✗ Error during training: {e}")
    import traceback
    traceback.print_exc()
    tracker.save_state()
    print("✓ State saved")
    memory_monitor.cleanup()


TRAINING LORA (OTIMIZADO PARA M1)

EPOCH 1/3

Epoch 1/3
  [Memory] Epoch 1 start: 2259MB disponível


Training:   0%|                                                                                                                                         | 0/2413 [00:00<?, ?it/s]

## 5. Teste e Avaliação

In [None]:
# Test model
def generate_response(model, tokenizer, prompt, max_tokens=150):
    try:
        response = generate(
            model,
            tokenizer,
            prompt=prompt,
            max_tokens=max_tokens,
            verbose=False
        )
        return response
    except Exception as e:
        print(f"✗ Error generating response: {e}")
        return None

# Test prompts
test_prompts = [
    "Qual foi a melhor classificação do Farense?",
    "Fala-me sobre Hassan Nader",
    "Qual é a história do Sporting Clube Farense?",
]

print("\nTesting Model")
print("=" * 60)

for prompt in test_prompts:
    print(f"\n? {prompt}")
    response = generate_response(model, tokenizer, prompt)
    if response:
        print(f"✓ {response[:200]}...")
    else:
        print("✗ Failed to generate response")

## 6. Conversão e Export

In [None]:
# Save final model
final_model_dir = OUTPUT_DIR / "mistral-7b-farense-lora"
final_model_dir.mkdir(parents=True, exist_ok=True)

lora_config_file = final_model_dir / "lora_config.json"
with open(lora_config_file, 'w') as f:
    json.dump(lora_config, f, indent=2)

training_config_file = final_model_dir / "training_config.json"
with open(training_config_file, 'w') as f:
    json.dump(training_config, f, indent=2)

metadata = {
    "model_name": "Mistral-7B-v0.1",
    "training_date": datetime.now().isoformat(),
    "framework": "MLX",
    "task": "Farense Bot Fine-Tuning",
    "data_sources": ["50_anos_00.jsonl", "biografias/jogadores/"],
    "total_training_examples": len(train_data),
    "total_validation_examples": len(val_data),
    "lora_rank": lora_config["r"],
    "num_epochs": training_config["num_epochs"],
}

metadata_file = final_model_dir / "metadata.json"
with open(metadata_file, 'w') as f:
    json.dump(metadata, f, indent=2, default=str)

print(f"\n✓ Model saved")
print(f"  {final_model_dir}")

In [None]:
# Integration guide
integration_guide = f"""# Integration Guide - Mistral-7B LoRA Model

## Model Info
- Base: mistralai/Mistral-7B-v0.1
- Adapter: {final_model_dir}
- Checkpoints: {CHECKPOINTS_DIR}
- Framework: MLX (Apple Silicon)
- Task: Farense Bot Fine-Tuning
- Training Examples: {len(train_data)}
- Validation Examples: {len(val_data)}

## Usage
```python
from mlx_lm import load, generate

model, tokenizer = load(
    "mistralai/Mistral-7B-v0.1",
    adapter_path="{final_model_dir}"
)

response = generate(
    model,
    tokenizer,
    prompt="Your prompt",
    max_tokens=200
)
```

## Integration with Express
Use the inference script: {TRAINING_ROOT}/scripts/inference.py

## Performance
- Response time: ~2-5 seconds
- Memory: ~14GB
- Hardware: Mac M1

Generated: {datetime.now().isoformat()}
"""

integration_file = final_model_dir / "INTEGRATION_GUIDE.md"
with open(integration_file, 'w', encoding='utf-8') as f:
    f.write(integration_guide)

print(f"\n✓ Integration guide saved")

In [None]:
# Create inference script
inference_script = '''#!/usr/bin/env python3
"""Inference script for Mistral-7B LoRA - Farense Bot"""

import sys
import json
from pathlib import Path

try:
    from mlx_lm import load, generate
except ImportError:
    print("Error: mlx-lm not installed. Run: pip install mlx mlx-lm")
    sys.exit(1)

BASE_MODEL = "mistralai/Mistral-7B-v0.1"
ADAPTER_PATH = "/Users/f.nuno/Desktop/chatbot_2.0/LLM_training/output/mistral-7b-farense-lora"
MAX_TOKENS = 200

def load_model():
    print("[INFO] Loading model...", file=__import__('sys').stderr)
    try:
        model, tokenizer = load(BASE_MODEL, adapter_path=ADAPTER_PATH)
        print("[OK] Model loaded", file=__import__('sys').stderr)
        return model, tokenizer
    except Exception as e:
        print(f"[ERROR] {e}", file=__import__('sys').stderr)
        raise

def generate_response(model, tokenizer, prompt):
    try:
        response = generate(
            model,
            tokenizer,
            prompt=prompt,
            max_tokens=MAX_TOKENS,
            verbose=False
        )
        return response
    except Exception as e:
        print(f"[ERROR] {e}", file=__import__('sys').stderr)
        return None

def main():
    if len(sys.argv) < 2:
        print(json.dumps({"error": "Usage: python inference.py 'prompt'"}))
        sys.exit(1)

    prompt = sys.argv[1]

    try:
        model, tokenizer = load_model()
        response = generate_response(model, tokenizer, prompt)

        if response:
            result = {
                "prompt": prompt,
                "response": response,
                "status": "success"
            }
        else:
            result = {
                "prompt": prompt,
                "error": "Failed to generate",
                "status": "error"
            }

        print(json.dumps(result, ensure_ascii=False, indent=2))
    except Exception as e:
        print(json.dumps({
            "prompt": prompt,
            "error": str(e),
            "status": "error"
        }))
        sys.exit(1)

if __name__ == "__main__":
    main()
'''

inference_file = TRAINING_ROOT / "scripts" / "inference.py"
inference_file.parent.mkdir(parents=True, exist_ok=True)
with open(inference_file, 'w') as f:
    f.write(inference_script)

import os
os.chmod(inference_file, 0o755)
print(f"✓ Inference script created")

In [None]:
# Summary
print("\n" + "=" * 60)
print("TRAINING SUMMARY")
print("=" * 60)

summary = f"""
Data:
  Train: {len(train_data)} examples
  Val:   {len(val_data)} examples

Model:
  Base: Mistral-7B-v0.1
  Method: LoRA
  Rank: {lora_config['r']}

Training:
  Epochs: {training_config['num_epochs']}
  Batch: {training_config['batch_size']}
  LR: {training_config['learning_rate']}

Outputs:
  Checkpoints: {CHECKPOINTS_DIR}
  Model: {final_model_dir}
  Script: {TRAINING_ROOT}/scripts/inference.py

Status: ✓ Ready to train
"""

print(summary)
print("=" * 60)