# GENESIS Level 0 Training

Train the Level 0 (Machine Code Patterns) model using Google Colab's free GPU.

## Setup
1. Go to **Runtime ‚Üí Change runtime type ‚Üí GPU (T4)**
2. Run all cells in order (Ctrl+F9)

In [29]:
# Step 1: Check GPU availability
!nvidia-smi --query-gpu=name,memory.total --format=csv

name, memory.total [MiB]
Tesla T4, 15360 MiB


In [30]:
# Step 2: Clone the repository
import os

# Remove if exists and clone fresh
if os.path.exists('genesis'):
    !rm -rf genesis

!git clone https://github.com/0xMayoor/genesis.git
os.chdir('genesis')
print(f"Working directory: {os.getcwd()}")

Cloning into 'genesis'...
remote: Enumerating objects: 100, done.[K
remote: Counting objects: 100% (100/100), done.[K
remote: Compressing objects: 100% (83/83), done.[K
remote: Total 100 (delta 11), reused 98 (delta 9), pack-reused 0 (from 0)[K
Receiving objects: 100% (100/100), 128.96 KiB | 1.77 MiB/s, done.
Resolving deltas: 100% (11/11), done.
Working directory: /content/genesis/genesis/genesis/genesis/genesis


In [31]:
# Step 3: Install dependencies
!pip install -q torch transformers peft accelerate capstone hypothesis

In [32]:
# Step 4: Install GENESIS package
!pip install -e . --force-reinstall --no-deps -q
print("Installation complete!")

  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
  Building editable for genesis (pyproject.toml) ... [?25l[?25hdone
Installation complete!


In [33]:
# Step 5: Verify installation
import sys
import os

# Add current directory to path
cwd = os.getcwd()
if cwd not in sys.path:
    sys.path.insert(0, cwd)

import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

# Test imports
from core.training import TrainingConfig, ModelConfig
from levels.level0_machine import Level0Module
print("\n‚úÖ All imports working!")

PyTorch: 2.9.0+cu126
CUDA available: True
GPU: Tesla T4
VRAM: 15.8 GB

‚úÖ All imports working!


In [34]:
# Step 6: Check dataset
from pathlib import Path

dataset_path = Path("genesis_datasets/level0/train.jsonl")
if dataset_path.exists():
    line_count = sum(1 for _ in open(dataset_path))
    print(f"‚úÖ Dataset exists: {dataset_path}")
    print(f"   Samples: {line_count}")
else:
    print("‚ö†Ô∏è Dataset not found, generating...")
    from genesis_datasets.generators.level0_generator import Level0DatasetGenerator

    generator = Level0DatasetGenerator(seed=42)
    samples = generator.generate_dataset(
        synthetic_count=5000,
        adversarial_count=1000,
        binary_paths=[],
    )
    dataset_path.parent.mkdir(parents=True, exist_ok=True)
    generator.save_dataset(samples, dataset_path)
    print(f"‚úÖ Generated {len(samples)} samples")

‚úÖ Dataset exists: genesis_datasets/level0/train.jsonl
   Samples: 1493


In [35]:
# Step 7: Configure training
from pathlib import Path
from core.training import TrainingConfig, ModelConfig

config = TrainingConfig(
    output_dir=Path("models/level0"),
    model=ModelConfig(
        model_name="distilgpt2",
        use_lora=True,
        lora_r=8,
        max_length=256,
    ),
    batch_size=8,
    num_epochs=3,
    learning_rate=2e-5,
    fp16=True,
    logging_steps=50,
    save_steps=500,
)

print("Training Configuration:")
print(f"  Model: {config.model.model_name}")
print(f"  LoRA rank: {config.model.lora_r}")
print(f"  Batch size: {config.batch_size}")
print(f"  Epochs: {config.num_epochs}")
print(f"  FP16: {config.fp16}")

warnings = config.validate()
if warnings:
    for w in warnings:
        print(f"‚ö†Ô∏è {w}")
else:
    print("\n‚úÖ Configuration valid!")

Training Configuration:
  Model: distilgpt2
  LoRA rank: 8
  Batch size: 8
  Epochs: 3
  FP16: True

‚úÖ Configuration valid!


In [36]:
# Step 8: Train the model!
from core.training import train_level0

print("Starting training...")
print("This will take ~10-30 minutes on a T4 GPU.")
print("="*50)

metrics = train_level0(config, dataset_path)

print("\n" + "="*50)
print("üéâ Training Complete!")
print("="*50)

Starting training...
This will take ~10-30 minutes on a T4 GPU.


ValueError: Target modules {'query', 'value'} not found in the base model. Please check the target modules and try again.

In [None]:
# Step 9: Check results
print("Training Metrics:")
print(f"  Accuracy: {metrics.accuracy:.2%}")
print(f"  Adversarial Refusal Rate: {metrics.adversarial_refusal_rate:.2%}")
print(f"  False Positive Rate: {metrics.false_positive_rate:.2%}")
print(f"  Total Samples: {metrics.total_samples}")

print("\nGate Requirements:")
passes, failures = metrics.meets_gate_requirements()

if passes:
    print("‚úÖ Model PASSES all gate requirements!")
    print("üöÄ Level 0 complete. Ready for Level 1 (Assembly).")
else:
    print("‚ùå Model FAILS gate requirements:")
    for f in failures:
        print(f"  - {f}")
    print("\nConsider: more epochs, larger dataset, or hyperparameter tuning.")

In [None]:
# Step 10: Download trained model
!zip -r level0_model.zip models/level0/

from google.colab import files
files.download('level0_model.zip')

print("\nüì¶ Model downloaded! Extract and place in your local genesis/models/level0/ folder.")