# GENESIS Level 0 Training

Train the Level 0 (Machine Code Patterns) model using Google Colab's free GPU.

## Setup
1. Go to Runtime → Change runtime type → GPU (T4)
2. Run all cells in order

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Clone the repository
!git clone https://github.com/YOUR_USERNAME/genesis.git
%cd genesis

In [None]:
# Install dependencies
!pip install -q torch transformers peft datasets accelerate capstone

In [None]:
# Install the package
!pip install -e .

In [None]:
# Verify installation
import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Check if dataset exists, generate if not
from pathlib import Path

dataset_path = Path("genesis_datasets/level0/train.jsonl")
if not dataset_path.exists():
    print("Generating dataset...")
    from genesis_datasets.generators.level0_generator import Level0DatasetGenerator, get_system_binaries
    
    generator = Level0DatasetGenerator(seed=42)
    samples = generator.generate_dataset(
        synthetic_count=5000,
        adversarial_count=1000,
        binary_paths=get_system_binaries(),
        binary_samples_per_file=100,
    )
    generator.save_dataset(samples, dataset_path)
    print(f"Generated {len(samples)} samples")
else:
    print(f"Dataset exists: {dataset_path}")
    !wc -l {dataset_path}

In [None]:
# Configure training
from core.training import TrainingConfig, ModelConfig

config = TrainingConfig(
    output_dir=Path("models/level0"),
    model=ModelConfig(
        model_name="distilgpt2",  # Small model, fast training
        use_lora=True,
        lora_r=8,
        max_length=256,
    ),
    batch_size=8,
    num_epochs=3,
    learning_rate=2e-5,
    fp16=True,
    logging_steps=50,
    save_steps=500,
)

print("Configuration:")
print(f"  Model: {config.model.model_name}")
print(f"  LoRA: r={config.model.lora_r}")
print(f"  Batch size: {config.batch_size}")
print(f"  Epochs: {config.num_epochs}")

warnings = config.validate()
for w in warnings:
    print(f"⚠️ {w}")

In [None]:
# Train!
from core.training import train_level0

metrics = train_level0(config, dataset_path)

print("\n" + "="*50)
print("Training Complete!")
print("="*50)
print(f"Metrics: {metrics.to_dict()}")

In [None]:
# Check gate requirements
passes, failures = metrics.meets_gate_requirements()

if passes:
    print("✅ Model PASSES gate requirements!")
    print("Level 0 is complete. Ready for Level 1.")
else:
    print("❌ Model FAILS gate requirements:")
    for f in failures:
        print(f"  - {f}")

In [None]:
# Download the trained model
!zip -r level0_model.zip models/level0/

from google.colab import files
files.download('level0_model.zip')