# 🧬 Iseer Architecture Training

**Mamba SSM + Mixture of Experts — From Scratch**

Built by Iseer & Co.

---

⚠️ **Before running:** Go to Runtime > Change runtime type > GPU (T4)

## 1️⃣ Setup

In [None]:
!nvidia-smi

import torch
print(f'PyTorch: {torch.__version__}')
print(f'CUDA: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
!pip install -q einops datasets

In [None]:
!rm -rf IseerArchitecture
!git clone https://github.com/InanXR/IseerArchitecture.git
%cd IseerArchitecture

## 2️⃣ Load Model

In [None]:
import sys
sys.path.insert(0, '.')

from iseer.model.config import ISEER_SM
from iseer.model.iseer import Iseer
from iseer.tokenizer.bpe import BPETokenizer

tokenizer = BPETokenizer.load('iseer/tokenizer/vocab.json')
print(f'Vocab: {len(tokenizer):,}')

config = ISEER_SM
config.vocab_size = len(tokenizer)
model = Iseer(config)

total, active = model.count_parameters()
print(f'Total: {total:,} | Active: {active:,}')

## 3️⃣ Load Data

In [None]:
from datasets import load_dataset
from tqdm import tqdm

texts = []

print('Loading FineWeb-Edu...')
ds = load_dataset('HuggingFaceFW/fineweb-edu-score-2', 'default', split='train', streaming=True)

for i, item in enumerate(tqdm(ds, total=10000)):
    if i >= 10000:
        break
    texts.append(item['text'][:2000])

print(f'Loaded {len(texts):,} texts')

In [None]:
from iseer.data.dataset import create_dataloader

train_loader = create_dataloader(
    texts=texts,
    tokenizer=tokenizer,
    batch_size=8,
    seq_len=512,
)

## 4️⃣ Train!

In [None]:
from iseer.training.trainer import Trainer, TrainingConfig

train_config = TrainingConfig(
    learning_rate=3e-4,
    max_steps=5000,
    warmup_steps=100,
    batch_size=8,
    gradient_accumulation_steps=4,
    mixed_precision=True,
    log_steps=50,
    save_steps=1000,
    output_dir='checkpoints',
)

trainer = Trainer(model=model, train_dataloader=train_loader, config=train_config)
trainer.train()

## 5️⃣ Save & Download

In [None]:
torch.save(model.state_dict(), 'iseer_sm_trained.pt')
print('Saved!')

from google.colab import files
files.download('iseer_sm_trained.pt')