# **calt Minimal Demo**

This notebook demonstrates the minimal code needed to:
1. Generate a dataset
2. Train a model
3. Show evaluation results

In [1]:
# import sys
# from pathlib import Path

# # Add development calt to path (prioritize over pip-installed version)
# # This notebook is in calt/examples/demos/, so we go up to calt/ and then to src/
# # When running in Jupyter, Path.cwd() gives the notebook's directory
# calt_dev_path = Path.cwd().parent.parent / "src"
# sys.path.insert(0, str(calt_dev_path))

# print(f"Using development calt from: {calt_dev_path}")

In [2]:
%%capture
%pip install calt-x

In [None]:
# On Colab: clone the repo so configs/ and data paths exist (skip if already present)
import os

try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB and not os.path.exists("configs/data.yaml"):
    !git clone --depth 1 https://github.com/HiroshiKERA/calt.git
    os.chdir("calt/examples/demos")

print("Working directory:", os.getcwd())

## 1. Dataset Generation

<!-- Generate polynomial addition problems -->

In [3]:
import random

from omegaconf import OmegaConf

from calt.dataset import DatasetPipeline
from calt.dataset.sympy.utils.polynomial_sampler import PolynomialSampler


# Define instance generator: polynomial addition
def polynomial_addition_generator(seed):
    random.seed(seed)

    # Initialize polynomial sampler
    sampler = PolynomialSampler(
        symbols="x0, x1",
        field_str="GF(7)",
        max_num_terms=2,
        max_degree=2,
        min_degree=1,
    )

    # Generate two polynomials
    F = sampler.sample(num_samples=2)

    # Solution is the sum
    g = sum(F)

    return F, g


# Load config from YAML file
cfg = OmegaConf.load("configs/data.yaml")

# Create dataset pipeline
pipeline = DatasetPipeline.from_config(
    cfg.dataset,
    instance_generator=polynomial_addition_generator,
)

# Run dataset generation
pipeline.run()


Starting dataset generation for 2 dataset(s)
Dataset sizes: {'train': 10000, 'test': 1000}

---------------------------------- train ----------------------------------
Dataset size: 10000 samples  (Batch size: 10000)

--- Batch 1/1 ---
Processing samples 1-10000 (size: 10000)
Starting parallel processing...
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 4049 tasks      | elapsed:    0.9s
[Parallel(n_jobs=1)]: Done 4999 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 6049 tasks      | elapsed:    1.3s
[Parallel(n_

## 2. Model Training

<!-- Load data, create model, and train -->
<!-- The entire training pipeline can be summarized in just a few lines: -->


In [4]:
# Complete minimal training code
from omegaconf import OmegaConf

from calt.io import IOPipeline
from calt.models import ModelPipeline
from calt.trainer import TrainerPipeline

# Load config from YAML file
cfg = OmegaConf.load("./configs/train.yaml")

# Load data
io_pipeline = IOPipeline.from_config(cfg.data)
result = io_pipeline.build()

# Create model
model = ModelPipeline(cfg.model, result["tokenizer"]).build()

# Create trainer and train
trainer = TrainerPipeline(
    cfg.train,
    model=model,
    tokenizer=result["tokenizer"],
    train_dataset=result["train_dataset"],
    eval_dataset=result["test_dataset"],
    data_collator=result["data_collator"],
).build()

trainer.train()
success_rate = trainer.evaluate_and_save_generation()
print(f"Success rate: {100 * success_rate:.1f}%")

Loaded 10000 samples from ./data/train_raw.txt
Loaded 1000 samples from ./data/test_raw.txt
  super().__init__(*args, **kwargs)


Validating test dataset tokens... passed!


[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /home/ara_shun/.netrc.
[34m[1mwandb[0m: Currently logged in as: [33mshun-arkw[0m ([33mchiba-u[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
  app_url = wandb.util.app_url(tags["base_url"])  # type: ignore[index]
  self.scope.user = {"email": email}


  app_url = wandb.util.app_url(tags["base_url"])  # type: ignore[index]
  self.scope.user = {"email": email}


{'loss': 3.4939, 'grad_norm': 2.7110846042633057, 'learning_rate': 3.121019108280255e-05, 'epoch': 0.1597444089456869}
{'loss': 2.853, 'grad_norm': 2.885227680206299, 'learning_rate': 6.305732484076433e-05, 'epoch': 0.3194888178913738}
{'loss': 2.0588, 'grad_norm': 3.3938708305358887, 'learning_rate': 9.490445859872612e-05, 'epoch': 0.4792332268370607}
{'loss': 1.5457, 'grad_norm': 3.3687164783477783, 'learning_rate': 9.701704545454547e-05, 'epoch': 0.6389776357827476}
{'loss': 1.2316, 'grad_norm': 3.6334102153778076, 'learning_rate': 9.346590909090909e-05, 'epoch': 0.7987220447284346}
{'loss': 1.0385, 'grad_norm': 5.369253635406494, 'learning_rate': 8.991477272727273e-05, 'epoch': 0.9584664536741214}
{'loss': 0.8943, 'grad_norm': 3.8548665046691895, 'learning_rate': 8.636363636363637e-05, 'epoch': 1.1182108626198084}
{'loss': 0.8043, 'grad_norm': 3.8131232261657715, 'learning_rate': 8.28125e-05, 'epoch': 1.2779552715654952}
{'loss': 0.7522, 'grad_norm': 3.678562879562378, 'learning_ra

Running evaluate_and_save_generation (step=1000, metric_key_prefix=eval)


{'eval_loss': 0.42273804545402527, 'eval_token_accuracy': 0.8702629543696829, 'eval_success_rate': 0.217, 'eval_runtime': 0.1394, 'eval_samples_per_second': 7176.068, 'eval_steps_per_second': 229.634, 'epoch': 3.194888178913738}


Successfully saved generation results (step=1000, success_rate=0.2170)


{'eval_generation_success_rate': 0.217, 'eval_generation_step': 1000, 'epoch': 3.194888178913738}
{'loss': 0.4813, 'grad_norm': 4.7628655433654785, 'learning_rate': 3.6647727272727274e-05, 'epoch': 3.3546325878594248}
{'loss': 0.4641, 'grad_norm': 6.519922733306885, 'learning_rate': 3.3096590909090915e-05, 'epoch': 3.5143769968051117}
{'loss': 0.4667, 'grad_norm': 4.255920886993408, 'learning_rate': 2.954545454545455e-05, 'epoch': 3.6741214057507987}
{'loss': 0.464, 'grad_norm': 5.637125492095947, 'learning_rate': 2.5994318181818182e-05, 'epoch': 3.8338658146964857}
{'loss': 0.4464, 'grad_norm': 4.342232704162598, 'learning_rate': 2.244318181818182e-05, 'epoch': 3.9936102236421727}
{'loss': 0.448, 'grad_norm': 6.0849103927612305, 'learning_rate': 1.8892045454545457e-05, 'epoch': 4.15335463258786}
{'loss': 0.4379, 'grad_norm': 5.596210956573486, 'learning_rate': 1.534090909090909e-05, 'epoch': 4.313099041533547}
{'loss': 0.448, 'grad_norm': 6.68628454208374, 'learning_rate': 1.178977272

## 3. Showing Evaluation Results

In [5]:
from utils import showcase

# Show 10 success cases (requires running trainer.evaluate_and_save_generation())
showcase(result["test_dataset"], success_cases=True, num_show=10)

# Show 10 failure cases
showcase(result["test_dataset"], success_cases=False, num_show=10)

-------------------------
 success cases 
-------------------------
  [5] gen: 2*x0*x1+3*x0  |  ref: 2*x0*x1+3*x0
  [12] gen: 2*x0+2*x1  |  ref: 2*x0+2*x1
  [14] gen: 5*x0+x1  |  ref: 5*x0+x1
  [16] gen: 3*x0^2+6*x1  |  ref: 3*x0^2+6*x1
  [17] gen: 6*x0^2+3*x1+1  |  ref: 6*x0^2+3*x1+1
  [18] gen: 6*x1^2+4*x0  |  ref: 6*x1^2+4*x0
  [20] gen: 2*x0*x1+2*x1+5  |  ref: 2*x0*x1+2*x1+5
  [25] gen: x0*x1+x1^2+4*x1  |  ref: x0*x1+x1^2+4*x1
  [30] gen: 4*x0^2+x0*x1+2*x1  |  ref: 4*x0^2+x0*x1+2*x1
  [35] gen: x0*x1+x0+3  |  ref: x0*x1+x0+3
-------------------------
 failure cases 
-------------------------
  [0] gen: 4*x1  |  ref: 2*x1
  [1] gen: 4*x0^2+5*x1^2+4*x1+1  |  ref: 4*x0^2+4*x1^2+5*x1+1
  [2] gen: 5*x0^2+4*x0*x1  |  ref: 4*x0^2+5*x0*x1
  [3] gen: 4*x0*x1+3*x0  |  ref: 4*x0*x1+5*x0
  [4] gen: x0^2+2*x0+6*x1+2  |  ref: x0^2+6*x0+2*x1+2
  [6] gen: 6*x0+2*x1+4  |  ref: 2*x0+6*x1+4
  [7] gen: 3*x0*x1+2*x0  |  ref: 2*x0*x1+4*x0
  [8] gen: 4*x0*x1+2*x1^2+1  |  ref: 2*x0*x1+4*x1^2+1
  [9] gen: 