# üéÆ Fine-tuning Mistral 7B pour GW2 WvW Counter-Picker

Ce notebook permet de fine-tuner le mod√®le Mistral 7B sur les donn√©es de combats GW2 WvW.

**Pr√©requis** :
- Google Colab (gratuit)
- GPU T4 (15GB VRAM - suffisant avec quantification 4-bit)
- ~45-60 minutes pour le fine-tuning

**Fonctionnalit√©s** :
- ‚úÖ Checkpoints automatiques (reprise apr√®s interruption)
- ‚úÖ Sauvegarde sur Google Drive
- ‚úÖ Export GGUF pour Ollama

**Note** : Mistral 7B est plus gros que Qwen2.5:3b, donc:
- Meilleure qualit√© de r√©ponses
- Plus lent √† entra√Æner et √† inf√©rer
- N√©cessite ~6GB RAM pour tourner sur Ollama

## 1Ô∏è‚É£ Configuration et d√©pendances

In [None]:
# V√©rifier le GPU disponible
!nvidia-smi

import torch
print(f"\n‚úì PyTorch version: {torch.__version__}")
print(f"‚úì CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"‚úì GPU: {torch.cuda.get_device_name(0)}")
    print(f"‚úì VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Monter Google Drive pour sauvegarder les checkpoints
from google.colab import drive
drive.mount('/content/drive')

# Cr√©er le dossier de sauvegarde
import os
SAVE_DIR = "/content/drive/MyDrive/GW2_FineTuning"
CHECKPOINT_DIR = f"{SAVE_DIR}/checkpoints_mistral"
os.makedirs(CHECKPOINT_DIR, exist_ok=True)
print(f"‚úì Dossier de sauvegarde: {SAVE_DIR}")

In [None]:
# Installer Unsloth
%%capture
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps trl peft accelerate bitsandbytes triton
!pip install datasets huggingface_hub

print("‚úì D√©pendances install√©es")

## 2Ô∏è‚É£ Charger le dataset GW2 WvW

In [None]:
# Uploader le dataset depuis ton PC
from google.colab import files
import shutil

# V√©rifier si le dataset existe d√©j√† sur Drive
DATASET_PATH = f"{SAVE_DIR}/finetune_dataset_mistral.jsonl"

if os.path.exists(DATASET_PATH):
    print(f"‚úì Dataset trouv√© sur Drive: {DATASET_PATH}")
    shutil.copy(DATASET_PATH, "finetune_dataset_mistral.jsonl")
else:
    print("üìÅ Upload le fichier 'finetune_dataset_mistral.jsonl' depuis ton PC:")
    uploaded = files.upload()
    # Sauvegarder sur Drive pour les prochaines fois
    for filename in uploaded.keys():
        shutil.copy(filename, DATASET_PATH)
        print(f"‚úì Dataset sauvegard√© sur Drive")

In [None]:
# Charger et pr√©parer le dataset
from datasets import load_dataset

dataset = load_dataset("json", data_files="finetune_dataset_mistral.jsonl", split="train")

print(f"‚úì Dataset charg√©: {len(dataset)} exemples")
print(f"\nüìã Exemple:")
print(f"Instruction: {dataset[0]['instruction'][:200]}...")
print(f"Output: {dataset[0]['output']}")

In [None]:
# Formater le dataset pour Mistral (format [INST])
def format_prompt(example):
    # Le dataset Mistral contient d√©j√† le format [INST]...[/INST]
    instruction = example['instruction']
    # S'assurer que le format est correct
    if not instruction.startswith('[INST]'):
        instruction = f"[INST] {instruction} [/INST]"
    return {
        "text": f"{instruction}\n{example['output']}</s>"
    }

formatted_dataset = dataset.map(format_prompt)
print(f"‚úì Dataset format√© pour Mistral")
print(f"\nüìã Exemple format√©:")
print(formatted_dataset[0]['text'][:600])

## 3Ô∏è‚É£ Charger le mod√®le Mistral 7B

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 2048
dtype = None
load_in_4bit = True  # IMPORTANT: 4-bit pour tenir dans 15GB VRAM

# V√©rifier si un checkpoint existe
checkpoint_exists = os.path.exists(f"{CHECKPOINT_DIR}/checkpoint-latest")

if checkpoint_exists:
    print("üîÑ Checkpoint trouv√©! Reprise de l'entra√Ænement...")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=f"{CHECKPOINT_DIR}/checkpoint-latest",
        max_seq_length=max_seq_length,
        dtype=dtype,
        load_in_4bit=load_in_4bit,
    )
else:
    print("üì• Chargement du mod√®le Mistral-7B-Instruct-v0.3...")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name="mistralai/Mistral-7B-Instruct-v0.3",
        max_seq_length=max_seq_length,
        dtype=dtype,
        load_in_4bit=load_in_4bit,
    )

print(f"‚úì Mod√®le charg√©")

In [None]:
# Ajouter les adaptateurs LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
)

print(f"‚úì Adaptateurs LoRA ajout√©s")

## 4Ô∏è‚É£ Fine-tuning avec checkpoints automatiques

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

# Configuration avec checkpoints - batch size r√©duit pour Mistral 7B
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=1,  # R√©duit pour Mistral 7B
        gradient_accumulation_steps=8,  # Compens√© par plus d'accumulation
        warmup_steps=10,
        num_train_epochs=3,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=42,
        output_dir=CHECKPOINT_DIR,
        # CHECKPOINTS - Sauvegarde toutes les 50 steps
        save_strategy="steps",
        save_steps=50,
        save_total_limit=3,
        resume_from_checkpoint=True if checkpoint_exists else None,
        report_to="none",
    ),
)

print(f"‚úì Trainer configur√© avec checkpoints automatiques")
print(f"‚úì Sauvegarde toutes les 50 steps dans: {CHECKPOINT_DIR}")

In [None]:
# üöÄ Lancer le fine-tuning
print("üöÄ D√©marrage du fine-tuning Mistral 7B...")
print("‚è±Ô∏è Dur√©e estim√©e: 45-60 minutes sur GPU T4")
print("üíæ Checkpoints sauvegard√©s sur Google Drive (reprise automatique si interruption)")
print("-" * 50)

trainer_stats = trainer.train(resume_from_checkpoint=checkpoint_exists)

print("-" * 50)
print(f"‚úì Fine-tuning termin√©!")
print(f"‚úì Loss finale: {trainer_stats.training_loss:.4f}")

In [None]:
# Sauvegarder le checkpoint final
model.save_pretrained(f"{CHECKPOINT_DIR}/checkpoint-latest")
tokenizer.save_pretrained(f"{CHECKPOINT_DIR}/checkpoint-latest")
print(f"‚úì Checkpoint final sauvegard√© sur Drive")

## 5Ô∏è‚É£ Tester le mod√®le fine-tun√©

In [None]:
FastLanguageModel.for_inference(model)

test_prompt = """[INST] Guild Wars 2 WvW counter-picker.

VALID SPECS: Firebrand, Willbender, Dragonhunter, Spellbreaker, Berserker, Bladesworn, Herald, Vindicator, Renegade, Scrapper, Holosmith, Mechanist, Druid, Soulbeast, Untamed, Daredevil, Deadeye, Specter, Tempest, Weaver, Catalyst, Chronomancer, Mirage, Virtuoso, Reaper, Scourge, Harbinger

Mode: ZERG (25+ players)
Enemy: 4x Firebrand, 3x Scourge, 2x Scrapper, 2x Spellbreaker

[ENEMY ANALYSIS]
- Firebrand: support, heal, stability (weak to: boon strip, boon corrupt)
- Scourge: condi, corrupt, barrier (weak to: burst, focus fire)
- Scrapper: support, superspeed, cleanse (weak to: boon strip, focus fire)
- Spellbreaker: frontline, strip, cc (weak to: condi pressure, kiting)

Respond EXACTLY in this format:
CONTER: Nx Spec, Nx Spec
FOCUS: Target1 > Target2
TACTIQUE: One tactical advice [/INST]"""

inputs = tokenizer(test_prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=100,
    temperature=0.1,
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id,
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("üìã Test du mod√®le fine-tun√©:")
print("=" * 50)
print(response.split("[/INST]")[-1].strip())

## 6Ô∏è‚É£ Exporter en GGUF pour Ollama

In [None]:
# Exporter en GGUF (Q4_K_M = bon √©quilibre qualit√©/taille)
GGUF_DIR = f"{SAVE_DIR}/mistral-7b-gw2-gguf"

model.save_pretrained_gguf(
    GGUF_DIR,
    tokenizer,
    quantization_method="q4_k_m",
)

print(f"‚úì Mod√®le export√© en GGUF")
print(f"üìÅ Fichier sauvegard√© sur Drive: {GGUF_DIR}")

In [None]:
# T√©l√©charger le mod√®le GGUF
from google.colab import files

gguf_files = [f for f in os.listdir(GGUF_DIR) if f.endswith('.gguf')]
if gguf_files:
    gguf_path = os.path.join(GGUF_DIR, gguf_files[0])
    print(f"üì• T√©l√©chargement de {gguf_files[0]}...")
    print(f"   Taille: {os.path.getsize(gguf_path) / 1e9:.2f} GB")
    files.download(gguf_path)
else:
    print("Le fichier GGUF est d√©j√† sur ton Google Drive!")
    print(f"üìÅ Chemin: {GGUF_DIR}")

## 7Ô∏è‚É£ Instructions pour Ollama

```bash
# 1. Copier le fichier GGUF sur le serveur/PC local
scp unsloth.Q4_K_M.gguf user@server:/home/user/models/

# 2. Cr√©er le Modelfile
cat > Modelfile << 'EOF'
FROM /home/user/models/unsloth.Q4_K_M.gguf

TEMPLATE """[INST] {{ .Prompt }} [/INST]
{{ .Response }}</s>"""

PARAMETER temperature 0.2
PARAMETER num_predict 80
PARAMETER num_ctx 1024
PARAMETER stop "</s>"
EOF

# 3. Cr√©er le mod√®le Ollama
ollama create mistral-gw2 -f Modelfile

# 4. Tester
ollama run mistral-gw2
```

### Dans counter_ai.py:
```python
MODEL_NAME = "mistral-gw2"  # Mod√®le fine-tun√©
```