# GENESIS Level 0 Training

1. **Runtime â†’ Change runtime type â†’ GPU (T4)**
2. Run all cells in order

In [8]:
# Step 1: Setup environment
import subprocess
import os
import sys

# Clone fresh
if os.path.exists('genesis'):
    subprocess.run(['rm', '-rf', 'genesis'], check=True)
subprocess.run(['git', 'clone', 'https://github.com/0xMayoor/genesis.git'], check=True)
os.chdir('genesis')

# Install
subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', 'torch', 'transformers', 'peft', 'accelerate', 'capstone', 'hypothesis'], check=True)
subprocess.run([sys.executable, '-m', 'pip', 'install', '-e', '.', '--force-reinstall', '--no-deps', '-q'], check=True)

# Add to path
sys.path.insert(0, os.getcwd())

print(f"âœ… Setup complete. Working dir: {os.getcwd()}")

âœ… Setup complete. Working dir: /content/genesis/genesis


In [9]:
# Step 2: Check GPU
import torch
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

CUDA: True
GPU: Tesla T4
VRAM: 15.8 GB


In [7]:
# Step 3: Train
from pathlib import Path
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model, TaskType
from torch.utils.data import DataLoader, Dataset
import json

# Config
MODEL_NAME = "distilgpt2"
BATCH_SIZE = 8
EPOCHS = 50  # More epochs for better convergence
LR = 5e-5    # Slightly higher learning rate
MAX_LENGTH = 256
DATASET_PATH = Path("genesis_datasets/level0/train.jsonl")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

# Load tokenizer and model
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=False)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, token=False)

# Apply LoRA
print("Applying LoRA...")
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,        # Increased rank for more capacity
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["c_attn", "c_proj"],
)
model = get_peft_model(model, lora_config)
model.enable_input_require_grads()  # Critical for gradient flow
model.print_trainable_parameters()
model.to(device)

# Dataset
class Level0Dataset(Dataset):
    def __init__(self, path, tokenizer, max_length):
        self.samples = []
        with open(path) as f:
            for line in f:
                self.samples.append(json.loads(line))
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        sample = self.samples[idx]
        # Format: bytes -> expected output
        hex_bytes = sample.get("raw_bytes", "")
        expected = sample.get("expected_mnemonic", "unknown")
        text = f"Disassemble: {hex_bytes}\nOutput: {expected}"

        encoded = self.tokenizer(
            text,
            truncation=True,
            max_length=self.max_length,
            padding="max_length",
            return_tensors="pt"
        )
        return {
            "input_ids": encoded["input_ids"].squeeze(),
            "attention_mask": encoded["attention_mask"].squeeze(),
            "labels": encoded["input_ids"].squeeze(),
        }

print(f"Loading dataset from {DATASET_PATH}...")
dataset = Level0Dataset(DATASET_PATH, tokenizer, MAX_LENGTH)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
print(f"Dataset: {len(dataset)} samples, {len(dataloader)} batches")

# Training with learning rate scheduler
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
model.train()

print(f"\nStarting training for {EPOCHS} epochs...")
print(f"Estimated time: ~60-90 minutes on T4")
print("="*50)

best_loss = float('inf')
for epoch in range(EPOCHS):
    total_loss = 0
    for batch_idx, batch in enumerate(dataloader):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping
        optimizer.step()

        total_loss += loss.item()

    scheduler.step()
    avg_loss = total_loss / len(dataloader)

    # Print every 5 epochs
    if (epoch + 1) % 5 == 0 or epoch == 0:
        print(f"Epoch {epoch+1}/{EPOCHS} | Avg Loss: {avg_loss:.4f} | LR: {scheduler.get_last_lr()[0]:.2e}")

    # Track best
    if avg_loss < best_loss:
        best_loss = avg_loss

print("="*50)
print(f"ðŸŽ‰ Training Complete! Best Loss: {best_loss:.4f}")

Device: cuda
Loading model...


`torch_dtype` is deprecated! Use `dtype` instead!


Applying LoRA...
trainable params: 811,008 || all params: 82,723,584 || trainable%: 0.9804
Loading dataset from genesis_datasets/level0/train.jsonl...




Dataset: 6575 samples, 822 batches

Starting training for 50 epochs...
Estimated time: ~60-90 minutes on T4


`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


Epoch 1/50 | Avg Loss: 0.6693 | LR: 5.00e-05
Epoch 5/50 | Avg Loss: 0.1451 | LR: 4.88e-05
Epoch 10/50 | Avg Loss: 0.1203 | LR: 4.52e-05
Epoch 15/50 | Avg Loss: 0.1097 | LR: 3.97e-05
Epoch 20/50 | Avg Loss: 0.1042 | LR: 3.27e-05
Epoch 25/50 | Avg Loss: 0.1015 | LR: 2.50e-05
Epoch 30/50 | Avg Loss: 0.0997 | LR: 1.73e-05
Epoch 35/50 | Avg Loss: 0.0987 | LR: 1.03e-05
Epoch 40/50 | Avg Loss: 0.0981 | LR: 4.77e-06
Epoch 45/50 | Avg Loss: 0.0978 | LR: 1.22e-06
Epoch 50/50 | Avg Loss: 0.0978 | LR: 0.00e+00
ðŸŽ‰ Training Complete! Best Loss: 0.0977


In [10]:
# Step 4: Quick test
model.eval()
test_cases = [
    "90",      # NOP
    "c3",      # RET
    "cc",      # INT3
    "55",      # PUSH RBP
    "5d",      # POP RBP
]

print("Quick inference test:")
for hex_bytes in test_cases:
    prompt = f"Disassemble: {hex_bytes}\nOutput:"
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=10, do_sample=False, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    result = response.split("Output:")[-1].strip().split()[0] if "Output:" in response else "?"
    print(f"  {hex_bytes} -> {result}")

Quick inference test:
  90 -> nop
  c3 -> ret
  cc -> int3
  55 -> push
  5d -> pop


In [11]:
# Step 5: Save model
save_path = Path("models/level0")
save_path.mkdir(parents=True, exist_ok=True)
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f"âœ… Model saved to {save_path}")

âœ… Model saved to models/level0




In [12]:
# Step 6: Download
import subprocess
subprocess.run(['zip', '-r', 'level0_model.zip', 'models/level0/'], check=True)
from google.colab import files
files.download('level0_model.zip')
print("ðŸ“¦ Download started!")

AttributeError: 'NoneType' object has no attribute 'kernel'