# Low-Rank MLP Growth Loop Debugger

Interactive notebook for probing repeated compress→double→train cycles on a fixed training window using the shared CLI helpers.


## Notebook Outline
1. Import the shared low-rank MLP helpers from `rank_mlp_double.py` plus common utilities.
2. Configure a single training window and optimizer hyper-parameters.
3. Load the chain once, prepare the fixed dataset slice, and wire helper functions.
4. Run a loop of compress→double→train trials, logging losses/logπ errors and ranks in a pandas table each round.


In [1]:
from __future__ import annotations

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from pathlib import Path
from copy import deepcopy
from types import SimpleNamespace

from rank_mlp_double import (
    LowRankMLP,
    collect_training_range,
    prepare_training_arrays,
    logpi_l1_error,
    run_training_cycle,
)

from test_mlp import load_data


In [2]:
DATA_PATH = "data1.h5"
SIGMA_PRIOR = 1.0
SIGMA_LIK = 0.3

TRAIN_START_STEP = 0
TRAIN_STEPS = 5000
MASTER_VAL_START = 50000
MASTER_VAL_LENGTH = None  # When None use the remainder of the chain.

USE_STANDARDIZATION = False
WARM_START = True
NUM_GROWTH_TRIES = 100
GROWTH_COMPRESSION_RATIO = 0.25
RESULTS_CSV = None  # Optional CSV path if you want to persist the debug table.

LOW_RANK_ARCH = {
    "hidden_dim": 1024,
    "num_hidden_layers": 3,
    "ranks": [64, 64],
    "activation": nn.Tanh(),
    "noise_std": 0.3,
    "apply_final_activation": True,
}

TRAINING_CFG = {
    "max_adam_epochs": 1000,
    "adam_lr": 1e-3,
    "adam_patience": 100,
    "tol": 1e-5,
    "max_lbfgs_iter": 50,
    "loss_name": "l1",
    "train_loops": 40,
    "batch_size": 64,
    "loss_domain": "obs",
    "batch_growth": 1.2,
    "verbose": 1,
    "loop_improvement_pct": 0.1,
}

TRAINING_CFG_NS = SimpleNamespace(**TRAINING_CFG)

SEED = 123
np.random.seed(SEED)
torch.manual_seed(SEED)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE


device(type='cuda')

In [3]:
par, obs, y_obs, chain, props, logpi_true = load_data(DATA_PATH, SIGMA_PRIOR, SIGMA_LIK)
input_dim = par.shape[1]
output_dim = obs.shape[1]

train_indices = collect_training_range(chain, props, TRAIN_START_STEP, TRAIN_STEPS)
X_train_raw, X_train_proc, y_train, logpi_train, x_mean, x_std = prepare_training_arrays(
    par, obs, logpi_true, train_indices, USE_STANDARDIZATION
)

if MASTER_VAL_LENGTH is None:
    master_val_length = None
else:
    master_val_length = min(MASTER_VAL_LENGTH, max(0, chain.shape[0] - MASTER_VAL_START))

print(f"Device: {DEVICE}")
print(f"Input dim: {input_dim}, output dim: {output_dim}")
print(f"Total train samples in window: {train_indices.size}")
print(f"Train steps: {TRAIN_STEPS}")
print(f"Master validation start: {MASTER_VAL_START}, length: {master_val_length}")


[data] Loaded 'logpi' from file.
[data] par shape   : (28324, 30)
[data] obs shape   : (28324, 52)
[data] y_obs shape : (52,)
[data] chain shape : (56646,)
[data] props shape : (56646,)
Device: cuda
Input dim: 30, output dim: 52
Total train samples in window: 2501
Train steps: 5000
Master validation start: 50000, length: None


In [4]:
def make_low_rank_model() -> LowRankMLP:
    return LowRankMLP(
        input_dim=input_dim,
        hidden_dim=LOW_RANK_ARCH["hidden_dim"],
        output_dim=output_dim,
        num_hidden_layers=LOW_RANK_ARCH["num_hidden_layers"],
        ranks=LOW_RANK_ARCH["ranks"],
        activation=LOW_RANK_ARCH.get("activation", nn.Tanh()),
        noise_std=LOW_RANK_ARCH.get("noise_std", 0.01),
        apply_final_activation=LOW_RANK_ARCH.get("apply_final_activation", True),
    )

def run_cycle(candidate_model: LowRankMLP) -> float:
    return run_training_cycle(
        candidate_model,
        X_train_proc,
        y_train,
        X_train_raw,
        logpi_train,
        DEVICE,
        TRAINING_CFG_NS,
        SIGMA_PRIOR,
        SIGMA_LIK,
        y_obs,
    )

def eval_master(candidate_model: LowRankMLP) -> float:
    return logpi_l1_error(
        candidate_model,
        par,
        obs,
        logpi_true,
        y_obs,
        chain,
        props,
        MASTER_VAL_START,
        master_val_length,
        USE_STANDARDIZATION,
        x_mean,
        x_std,
        SIGMA_PRIOR,
        SIGMA_LIK,
        DEVICE,
    )


In [5]:
records: list[dict] = []

model = make_low_rank_model()
model.to(DEVICE)

print("Running baseline training before the compression/doubling loop...")
prev_train_loss = run_cycle(model)
prev_master_error = eval_master(model)
print(f"  baseline train loss: {prev_train_loss:.4e}")
print(f"  baseline master logpi L1: {prev_master_error:.4e}")

for try_idx in range(1, NUM_GROWTH_TRIES + 1):
    print(f"\n=== Growth try {try_idx}/{NUM_GROWTH_TRIES} ===")
    start_ranks = tuple(int(r) for r in model.ranks)
    loss_from = float(prev_train_loss)
    master_from = float(prev_master_error)

    # Compress first, then expand all ranks by a factor of two.
    model.contract_ranks_by_ratio(GROWTH_COMPRESSION_RATIO)
    compressed_ranks = tuple(int(r) for r in model.ranks)

    target_ranks = LOW_RANK_ARCH["ranks"]  # [min(model.hidden_dim, max(1, r * 2)) for r in model.ranks]
    model.contract_ranks_by_amount(target_ranks)
    end_ranks = tuple(int(r) for r in model.ranks)

    train_loss = run_cycle(model)
    master_error = eval_master(model)

    record = {
        "iteration": try_idx,
        "train_loss_from": loss_from,
        "train_loss_to": float(train_loss),
        "master_logpi_from": master_from,
        "master_logpi_to": float(master_error),
        "start_ranks": start_ranks,
        "compressed_ranks": compressed_ranks,
        "end_ranks": end_ranks,
    }
    records.append(record)

    prev_train_loss = train_loss
    prev_master_error = master_error

    results_df = pd.DataFrame(records)
    display(results_df)

    if RESULTS_CSV:
        out_path = Path(RESULTS_CSV)
        out_path.parent.mkdir(parents=True, exist_ok=True)
        results_df.to_csv(out_path, index=False)
        print(f"Saved table to {out_path}")

Running baseline training before the compression/doubling loop...
[train][loop 1/40] loss=8.042509e-03 next_lr=1.000e-03 stop=completed
[train][loop 2/40] loss=6.700181e-03 next_lr=2.500e-04 stop=adam_plateau
[train][loop 3/40] loss=3.516936e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=2.691244e-03 next_lr=6.250e-05 stop=completed
[train][loop 5/40] loss=2.640627e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 6/40] loss=2.450036e-03 next_lr=1.563e-05 stop=completed
[train][loop 7/40] loss=2.440640e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 8/40] loss=2.390511e-03 next_lr=3.906e-06 stop=completed
[train][loop 9/40] loss=2.390511e-03 next_lr=9.766e-07 stop=adam_lr_min
  baseline train loss: 2.3905e-03
  baseline master logpi L1: 3.6012e+00

=== Growth try 1/100 ===
[train][loop 1/40] loss=2.589250e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=3.891151e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=2.025179e-03 next_lr=

Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"



=== Growth try 2/100 ===
[train][loop 1/40] loss=2.519303e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=3.858073e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=2.120058e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=1.484648e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=1.322978e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=1.322978e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"



=== Growth try 3/100 ===
[train][loop 1/40] loss=2.585933e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.157683e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=2.256973e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=1.549897e-03 next_lr=1.563e-05 stop=completed
[train][loop 5/40] loss=1.521757e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=1.371847e-03 next_lr=3.906e-06 stop=completed
[train][loop 7/40] loss=1.371847e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"



=== Growth try 4/100 ===
[train][loop 1/40] loss=2.568101e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.194380e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=2.423186e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=1.815928e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=1.616257e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=1.616257e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"



=== Growth try 5/100 ===
[train][loop 1/40] loss=2.518631e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.418831e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=2.435308e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=1.852111e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=1.676775e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=1.676775e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"



=== Growth try 6/100 ===
[train][loop 1/40] loss=2.494934e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.516411e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=2.482306e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=1.829821e-03 next_lr=1.563e-05 stop=completed
[train][loop 5/40] loss=1.797408e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=1.716644e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"



=== Growth try 7/100 ===
[train][loop 1/40] loss=2.470756e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.175196e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=2.904165e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=2.136131e-03 next_lr=1.563e-05 stop=completed
[train][loop 5/40] loss=2.102507e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=1.958742e-03 next_lr=3.906e-06 stop=completed
[train][loop 7/40] loss=1.958742e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"



=== Growth try 8/100 ===
[train][loop 1/40] loss=2.478739e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.321410e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=2.890834e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=2.231974e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=2.076264e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=2.076264e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"



=== Growth try 9/100 ===
[train][loop 1/40] loss=2.373329e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.680559e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=2.702253e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=2.223817e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=2.083174e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=2.083174e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"



=== Growth try 10/100 ===
[train][loop 1/40] loss=2.451753e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.130930e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.045902e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=2.392836e-03 next_lr=1.563e-05 stop=completed
[train][loop 5/40] loss=2.365368e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=2.240592e-03 next_lr=3.906e-06 stop=completed
[train][loop 7/40] loss=2.240592e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 11/100 ===
[train][loop 1/40] loss=2.414353e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.416607e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.224801e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=2.684399e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=2.525863e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=2.525863e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 12/100 ===
[train][loop 1/40] loss=2.422292e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.952427e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.104279e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=2.636036e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=2.509214e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=2.509214e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 13/100 ===
[train][loop 1/40] loss=2.361305e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.632283e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.480030e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=2.860281e-03 next_lr=1.563e-05 stop=completed
[train][loop 5/40] loss=2.829495e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=2.779142e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 14/100 ===
[train][loop 1/40] loss=2.432003e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=4.717571e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.299693e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=2.803196e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=2.758718e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 15/100 ===
[train][loop 1/40] loss=2.325947e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.500311e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.767275e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.210439e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.076644e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=3.076644e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 16/100 ===
[train][loop 1/40] loss=2.402523e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.381928e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.812201e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.284643e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.203318e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 17/100 ===
[train][loop 1/40] loss=2.316527e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.677707e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.672465e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.274901e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.111186e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=3.111186e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 18/100 ===
[train][loop 1/40] loss=2.420358e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.317886e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.537946e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.071898e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=2.954956e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=2.954956e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 19/100 ===
[train][loop 1/40] loss=2.491280e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.438583e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.218583e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.577251e-03 next_lr=1.563e-05 stop=completed
[train][loop 5/40] loss=3.541969e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=3.478019e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 20/100 ===
[train][loop 1/40] loss=2.374116e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.175725e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.119503e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.690185e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.549729e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=3.549729e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 21/100 ===
[train][loop 1/40] loss=2.405176e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.742311e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.955381e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.548960e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.482294e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 22/100 ===
[train][loop 1/40] loss=2.456899e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.022233e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.749830e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.342208e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.212753e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=3.212753e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 23/100 ===
[train][loop 1/40] loss=2.395197e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.322247e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.578190e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.225822e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.187557e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 24/100 ===
[train][loop 1/40] loss=2.406776e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.495401e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=3.895829e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.494365e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.367664e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=3.367664e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 25/100 ===
[train][loop 1/40] loss=2.440725e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.700139e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.122613e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.647361e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.521130e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=3.521130e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 26/100 ===
[train][loop 1/40] loss=2.434532e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.529574e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.551018e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.137405e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.052780e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 27/100 ===
[train][loop 1/40] loss=2.371207e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.840382e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.188525e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.817268e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.762237e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 28/100 ===
[train][loop 1/40] loss=2.438033e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.035204e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.156634e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.746946e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.661102e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 29/100 ===
[train][loop 1/40] loss=2.414529e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.133956e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.064891e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.597524e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.535902e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 30/100 ===
[train][loop 1/40] loss=2.436784e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.972717e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.374539e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.934248e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.860368e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 31/100 ===
[train][loop 1/40] loss=2.423000e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.421504e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.730807e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.225159e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.180642e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 32/100 ===
[train][loop 1/40] loss=2.326881e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.827728e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.040929e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.659467e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.611212e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 33/100 ===
[train][loop 1/40] loss=2.401983e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.853206e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.667082e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.191577e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.070051e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=4.070051e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 34/100 ===
[train][loop 1/40] loss=2.454662e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.753017e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.333990e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.903558e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.835342e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 35/100 ===
[train][loop 1/40] loss=2.418858e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.340334e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.536303e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.039015e-03 next_lr=1.563e-05 stop=completed
[train][loop 5/40] loss=4.022933e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=3.987569e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 36/100 ===
[train][loop 1/40] loss=2.410245e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.916987e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=5.657767e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=4.066037e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 5/40] loss=3.701117e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=3.601897e-03 next_lr=3.906e-06 stop=completed
[train][loop 7/40] loss=3.601897e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 37/100 ===
[train][loop 1/40] loss=2.422266e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.681813e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=5.535633e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=4.017291e-03 next_lr=6.250e-05 stop=completed
[train][loop 5/40] loss=3.934897e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 6/40] loss=3.669085e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 7/40] loss=3.669085e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 38/100 ===
[train][loop 1/40] loss=2.422814e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.859275e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=5.636546e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=4.031871e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 5/40] loss=3.718222e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=3.649662e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 39/100 ===
[train][loop 1/40] loss=2.457735e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.591022e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.526998e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.099298e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.940120e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=3.940120e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 40/100 ===
[train][loop 1/40] loss=2.457742e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=5.937245e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.754732e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.209449e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.162288e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 41/100 ===
[train][loop 1/40] loss=2.381731e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.019281e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.354909e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=3.956017e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=3.911492e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 42/100 ===
[train][loop 1/40] loss=2.391379e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.517836e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.218361e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.723076e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.661537e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 43/100 ===
[train][loop 1/40] loss=2.397180e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.269184e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.813363e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.373268e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.295574e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 44/100 ===
[train][loop 1/40] loss=2.392577e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.099138e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.894117e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.495152e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.359614e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=4.359614e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 45/100 ===
[train][loop 1/40] loss=2.488051e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.295568e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.983782e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.485154e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.337775e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=4.337775e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 46/100 ===
[train][loop 1/40] loss=2.423981e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.589595e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.733124e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.313984e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.249335e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 47/100 ===
[train][loop 1/40] loss=2.505547e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.789424e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.331207e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.820538e-03 next_lr=1.563e-05 stop=completed
[train][loop 5/40] loss=4.798898e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=4.760094e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 48/100 ===
[train][loop 1/40] loss=2.444829e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.978131e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.989745e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.550706e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.467930e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 49/100 ===
[train][loop 1/40] loss=2.490594e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.012145e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.669851e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.118401e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.062820e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 50/100 ===
[train][loop 1/40] loss=2.478913e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.349413e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.352828e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.860910e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.782888e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 51/100 ===
[train][loop 1/40] loss=2.529256e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.360401e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=4.992005e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.541160e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.399128e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=4.399128e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 52/100 ===
[train][loop 1/40] loss=2.436142e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.515030e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.437860e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.932834e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.803337e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=4.803337e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 53/100 ===
[train][loop 1/40] loss=2.406228e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.315752e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.726079e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.229832e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.185918e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 54/100 ===
[train][loop 1/40] loss=2.580292e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.250430e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.119876e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.662855e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.591294e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 55/100 ===
[train][loop 1/40] loss=2.407180e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.934580e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.995104e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.520712e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.469657e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 56/100 ===
[train][loop 1/40] loss=2.405262e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=6.784957e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.015330e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.553146e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.421526e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=4.421526e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 57/100 ===
[train][loop 1/40] loss=2.612844e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.310092e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.788595e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.250779e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.184053e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 58/100 ===
[train][loop 1/40] loss=2.507884e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.537825e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.312931e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.914821e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.815551e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 59/100 ===
[train][loop 1/40] loss=2.491918e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.392130e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.320120e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=4.815875e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.760379e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 60/100 ===
[train][loop 1/40] loss=2.587713e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.556234e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=7.220057e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=5.138730e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 5/40] loss=4.691639e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=4.639694e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
5,6,0.001677,0.001717,3.846516,3.914516,"(64, 64)","(26, 58)","(64, 64)"
6,7,0.001717,0.001959,3.914516,3.839997,"(64, 64)","(23, 56)","(64, 64)"
7,8,0.001959,0.002076,3.839997,3.824233,"(64, 64)","(19, 52)","(64, 64)"
8,9,0.002076,0.002083,3.824233,3.782167,"(64, 64)","(19, 52)","(64, 64)"
9,10,0.002083,0.002241,3.782167,3.757793,"(64, 64)","(17, 50)","(64, 64)"



=== Growth try 61/100 ===
[train][loop 1/40] loss=2.485742e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.557048e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=7.177693e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=5.052201e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 5/40] loss=4.585362e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=4.527858e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
56,57,0.004422,0.005184,4.128281,4.136038,"(64, 64)","(12, 34)","(64, 64)"
57,58,0.005184,0.004816,4.136038,4.120405,"(64, 64)","(12, 34)","(64, 64)"
58,59,0.004816,0.004760,4.120405,4.120878,"(64, 64)","(12, 34)","(64, 64)"
59,60,0.004760,0.004640,4.120878,4.187479,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 62/100 ===
[train][loop 1/40] loss=2.480955e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.646724e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.447550e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.006500e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=4.957553e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
57,58,0.005184,0.004816,4.136038,4.120405,"(64, 64)","(12, 34)","(64, 64)"
58,59,0.004816,0.004760,4.120405,4.120878,"(64, 64)","(12, 34)","(64, 64)"
59,60,0.004760,0.004640,4.120878,4.187479,"(64, 64)","(12, 34)","(64, 64)"
60,61,0.004640,0.004528,4.187479,4.174155,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 63/100 ===
[train][loop 1/40] loss=2.527541e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.250008e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.509858e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.092462e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.045593e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
58,59,0.004816,0.004760,4.120405,4.120878,"(64, 64)","(12, 34)","(64, 64)"
59,60,0.004760,0.004640,4.120878,4.187479,"(64, 64)","(12, 34)","(64, 64)"
60,61,0.004640,0.004528,4.187479,4.174155,"(64, 64)","(12, 34)","(64, 64)"
61,62,0.004528,0.004958,4.174155,4.101992,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 64/100 ===
[train][loop 1/40] loss=2.579843e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.920151e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=6.099819e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.577137e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.420078e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=5.420078e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
59,60,0.004760,0.004640,4.120878,4.187479,"(64, 64)","(12, 34)","(64, 64)"
60,61,0.004640,0.004528,4.187479,4.174155,"(64, 64)","(12, 34)","(64, 64)"
61,62,0.004528,0.004958,4.174155,4.101992,"(64, 64)","(12, 34)","(64, 64)"
62,63,0.004958,0.005046,4.101992,4.125048,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 65/100 ===
[train][loop 1/40] loss=2.456840e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.199804e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.539554e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.071269e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.012231e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
60,61,0.004640,0.004528,4.187479,4.174155,"(64, 64)","(12, 34)","(64, 64)"
61,62,0.004528,0.004958,4.174155,4.101992,"(64, 64)","(12, 34)","(64, 64)"
62,63,0.004958,0.005046,4.101992,4.125048,"(64, 64)","(12, 34)","(64, 64)"
63,64,0.005046,0.005420,4.125048,4.153733,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 66/100 ===
[train][loop 1/40] loss=2.502829e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.847996e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=7.205214e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=5.143076e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 5/40] loss=4.756801e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=4.702850e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
61,62,0.004528,0.004958,4.174155,4.101992,"(64, 64)","(12, 34)","(64, 64)"
62,63,0.004958,0.005046,4.101992,4.125048,"(64, 64)","(12, 34)","(64, 64)"
63,64,0.005046,0.005420,4.125048,4.153733,"(64, 64)","(12, 34)","(64, 64)"
64,65,0.005420,0.005012,4.153733,4.124873,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 67/100 ===
[train][loop 1/40] loss=2.489178e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.776613e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.943866e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.494297e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.358682e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=5.358682e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
62,63,0.004958,0.005046,4.101992,4.125048,"(64, 64)","(12, 34)","(64, 64)"
63,64,0.005046,0.005420,4.125048,4.153733,"(64, 64)","(12, 34)","(64, 64)"
64,65,0.005420,0.005012,4.153733,4.124873,"(64, 64)","(12, 34)","(64, 64)"
65,66,0.005012,0.004703,4.124873,4.104093,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 68/100 ===
[train][loop 1/40] loss=2.526043e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.619304e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=6.203188e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.657052e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.614585e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
63,64,0.005046,0.005420,4.125048,4.153733,"(64, 64)","(12, 34)","(64, 64)"
64,65,0.005420,0.005012,4.153733,4.124873,"(64, 64)","(12, 34)","(64, 64)"
65,66,0.005012,0.004703,4.124873,4.104093,"(64, 64)","(12, 34)","(64, 64)"
66,67,0.004703,0.005359,4.104093,4.180245,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 69/100 ===
[train][loop 1/40] loss=2.474940e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.236719e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.741975e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.320527e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.273515e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
64,65,0.005420,0.005012,4.153733,4.124873,"(64, 64)","(12, 34)","(64, 64)"
65,66,0.005012,0.004703,4.124873,4.104093,"(64, 64)","(12, 34)","(64, 64)"
66,67,0.004703,0.005359,4.104093,4.180245,"(64, 64)","(12, 34)","(64, 64)"
67,68,0.005359,0.005615,4.180245,4.155546,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 70/100 ===
[train][loop 1/40] loss=2.474800e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.013538e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.961179e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.556532e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.396633e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=5.396633e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
65,66,0.005012,0.004703,4.124873,4.104093,"(64, 64)","(12, 34)","(64, 64)"
66,67,0.004703,0.005359,4.104093,4.180245,"(64, 64)","(12, 34)","(64, 64)"
67,68,0.005359,0.005615,4.180245,4.155546,"(64, 64)","(12, 34)","(64, 64)"
68,69,0.005615,0.005274,4.155546,4.206567,"(64, 64)","(12, 34)","(64, 64)"



=== Growth try 71/100 ===
[train][loop 1/40] loss=2.635641e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.489032e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.770011e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.243448e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.171414e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
66,67,0.004703,0.005359,4.104093,4.180245,"(64, 64)","(12, 34)","(64, 64)"
67,68,0.005359,0.005615,4.180245,4.155546,"(64, 64)","(12, 34)","(64, 64)"
68,69,0.005615,0.005274,4.155546,4.206567,"(64, 64)","(12, 34)","(64, 64)"
69,70,0.005274,0.005397,4.206567,4.216395,"(64, 64)","(12, 33)","(64, 64)"



=== Growth try 72/100 ===
[train][loop 1/40] loss=2.660393e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.208016e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=6.649653e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.122106e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.971588e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=5.971588e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
67,68,0.005359,0.005615,4.180245,4.155546,"(64, 64)","(12, 34)","(64, 64)"
68,69,0.005615,0.005274,4.155546,4.206567,"(64, 64)","(12, 34)","(64, 64)"
69,70,0.005274,0.005397,4.206567,4.216395,"(64, 64)","(12, 33)","(64, 64)"
70,71,0.005397,0.005171,4.216395,4.128956,"(64, 64)","(12, 33)","(64, 64)"



=== Growth try 73/100 ===
[train][loop 1/40] loss=2.483038e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.661832e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.818158e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.387304e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.314077e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
68,69,0.005615,0.005274,4.155546,4.206567,"(64, 64)","(12, 34)","(64, 64)"
69,70,0.005274,0.005397,4.206567,4.216395,"(64, 64)","(12, 33)","(64, 64)"
70,71,0.005397,0.005171,4.216395,4.128956,"(64, 64)","(12, 33)","(64, 64)"
71,72,0.005171,0.005972,4.128956,4.190023,"(64, 64)","(12, 33)","(64, 64)"



=== Growth try 74/100 ===
[train][loop 1/40] loss=2.569523e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.359561e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.916642e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.385063e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.245141e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=5.245141e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
69,70,0.005274,0.005397,4.206567,4.216395,"(64, 64)","(12, 33)","(64, 64)"
70,71,0.005397,0.005171,4.216395,4.128956,"(64, 64)","(12, 33)","(64, 64)"
71,72,0.005171,0.005972,4.128956,4.190023,"(64, 64)","(12, 33)","(64, 64)"
72,73,0.005972,0.005314,4.190023,4.065124,"(64, 64)","(12, 33)","(64, 64)"



=== Growth try 75/100 ===
[train][loop 1/40] loss=2.506760e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=7.909101e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=7.528001e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=5.644708e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 5/40] loss=5.191438e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=5.081081e-03 next_lr=3.906e-06 stop=completed
[train][loop 7/40] loss=5.081081e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
70,71,0.005397,0.005171,4.216395,4.128956,"(64, 64)","(12, 33)","(64, 64)"
71,72,0.005171,0.005972,4.128956,4.190023,"(64, 64)","(12, 33)","(64, 64)"
72,73,0.005972,0.005314,4.190023,4.065124,"(64, 64)","(12, 33)","(64, 64)"
73,74,0.005314,0.005245,4.065124,4.117653,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 76/100 ===
[train][loop 1/40] loss=2.614535e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.132270e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=6.684786e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.111509e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.039690e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
71,72,0.005171,0.005972,4.128956,4.190023,"(64, 64)","(12, 33)","(64, 64)"
72,73,0.005972,0.005314,4.190023,4.065124,"(64, 64)","(12, 33)","(64, 64)"
73,74,0.005314,0.005245,4.065124,4.117653,"(64, 64)","(12, 32)","(64, 64)"
74,75,0.005245,0.005081,4.117653,4.107922,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 77/100 ===
[train][loop 1/40] loss=2.614696e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.285588e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=6.167645e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.603141e-03 next_lr=1.563e-05 stop=completed
[train][loop 5/40] loss=5.584060e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=5.467486e-03 next_lr=3.906e-06 stop=completed
[train][loop 7/40] loss=5.467486e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
72,73,0.005972,0.005314,4.190023,4.065124,"(64, 64)","(12, 33)","(64, 64)"
73,74,0.005314,0.005245,4.065124,4.117653,"(64, 64)","(12, 32)","(64, 64)"
74,75,0.005245,0.005081,4.117653,4.107922,"(64, 64)","(12, 32)","(64, 64)"
75,76,0.005081,0.006040,4.107922,4.160622,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 78/100 ===
[train][loop 1/40] loss=2.528397e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.397436e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=5.919723e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.457350e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.391037e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
73,74,0.005314,0.005245,4.065124,4.117653,"(64, 64)","(12, 32)","(64, 64)"
74,75,0.005245,0.005081,4.117653,4.107922,"(64, 64)","(12, 32)","(64, 64)"
75,76,0.005081,0.006040,4.107922,4.160622,"(64, 64)","(12, 32)","(64, 64)"
76,77,0.006040,0.005467,4.160622,4.098971,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 79/100 ===
[train][loop 1/40] loss=2.637374e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.028900e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.354995e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.842866e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.684489e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=6.684489e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
74,75,0.005245,0.005081,4.117653,4.107922,"(64, 64)","(12, 32)","(64, 64)"
75,76,0.005081,0.006040,4.107922,4.160622,"(64, 64)","(12, 32)","(64, 64)"
76,77,0.006040,0.005467,4.160622,4.098971,"(64, 64)","(12, 32)","(64, 64)"
77,78,0.005467,0.005391,4.098971,4.244641,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 80/100 ===
[train][loop 1/40] loss=2.668621e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.350953e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=8.077092e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=6.068502e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 5/40] loss=5.520575e-03 next_lr=1.563e-05 stop=completed
[train][loop 6/40] loss=5.502105e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 7/40] loss=5.502105e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
75,76,0.005081,0.006040,4.107922,4.160622,"(64, 64)","(12, 32)","(64, 64)"
76,77,0.006040,0.005467,4.160622,4.098971,"(64, 64)","(12, 32)","(64, 64)"
77,78,0.005467,0.005391,4.098971,4.244641,"(64, 64)","(12, 32)","(64, 64)"
78,79,0.005391,0.006684,4.244641,4.301831,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 81/100 ===
[train][loop 1/40] loss=2.693511e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.029075e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=6.992600e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.487092e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.326264e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=6.326264e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
76,77,0.006040,0.005467,4.160622,4.098971,"(64, 64)","(12, 32)","(64, 64)"
77,78,0.005467,0.005391,4.098971,4.244641,"(64, 64)","(12, 32)","(64, 64)"
78,79,0.005391,0.006684,4.244641,4.301831,"(64, 64)","(12, 32)","(64, 64)"
79,80,0.006684,0.005502,4.301831,4.247302,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 82/100 ===
[train][loop 1/40] loss=2.613743e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.523217e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=6.684827e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.161239e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.014962e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=6.014962e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
77,78,0.005467,0.005391,4.098971,4.244641,"(64, 64)","(12, 32)","(64, 64)"
78,79,0.005391,0.006684,4.244641,4.301831,"(64, 64)","(12, 32)","(64, 64)"
79,80,0.006684,0.005502,4.301831,4.247302,"(64, 64)","(12, 32)","(64, 64)"
80,81,0.005502,0.006326,4.247302,4.217539,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 83/100 ===
[train][loop 1/40] loss=2.598236e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.082436e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=8.055937e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=7.393939e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=7.238227e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=7.238227e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
78,79,0.005391,0.006684,4.244641,4.301831,"(64, 64)","(12, 32)","(64, 64)"
79,80,0.006684,0.005502,4.301831,4.247302,"(64, 64)","(12, 32)","(64, 64)"
80,81,0.005502,0.006326,4.247302,4.217539,"(64, 64)","(12, 32)","(64, 64)"
81,82,0.006326,0.006015,4.217539,4.194565,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 84/100 ===
[train][loop 1/40] loss=2.670789e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.212360e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=8.589758e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=6.201781e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 5/40] loss=5.770709e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=5.706375e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
79,80,0.006684,0.005502,4.301831,4.247302,"(64, 64)","(12, 32)","(64, 64)"
80,81,0.005502,0.006326,4.247302,4.217539,"(64, 64)","(12, 32)","(64, 64)"
81,82,0.006326,0.006015,4.217539,4.194565,"(64, 64)","(12, 32)","(64, 64)"
82,83,0.006015,0.007238,4.194565,4.173199,"(64, 64)","(12, 32)","(64, 64)"



=== Growth try 85/100 ===
[train][loop 1/40] loss=2.597787e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.007372e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.171320e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.658331e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.506131e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=6.506131e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
80,81,0.005502,0.006326,4.247302,4.217539,"(64, 64)","(12, 32)","(64, 64)"
81,82,0.006326,0.006015,4.217539,4.194565,"(64, 64)","(12, 32)","(64, 64)"
82,83,0.006015,0.007238,4.194565,4.173199,"(64, 64)","(12, 32)","(64, 64)"
83,84,0.007238,0.005706,4.173199,4.090021,"(64, 64)","(12, 31)","(64, 64)"



=== Growth try 86/100 ===
[train][loop 1/40] loss=2.640917e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.827989e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=6.397669e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=5.952906e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=5.812774e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=5.812774e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
81,82,0.006326,0.006015,4.217539,4.194565,"(64, 64)","(12, 32)","(64, 64)"
82,83,0.006015,0.007238,4.194565,4.173199,"(64, 64)","(12, 32)","(64, 64)"
83,84,0.007238,0.005706,4.173199,4.090021,"(64, 64)","(12, 31)","(64, 64)"
84,85,0.005706,0.006506,4.090021,4.206702,"(64, 64)","(12, 31)","(64, 64)"



=== Growth try 87/100 ===
[train][loop 1/40] loss=2.630571e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.076092e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.499983e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.982022e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.923925e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
82,83,0.006015,0.007238,4.194565,4.173199,"(64, 64)","(12, 32)","(64, 64)"
83,84,0.007238,0.005706,4.173199,4.090021,"(64, 64)","(12, 31)","(64, 64)"
84,85,0.005706,0.006506,4.090021,4.206702,"(64, 64)","(12, 31)","(64, 64)"
85,86,0.006506,0.005813,4.206702,4.247363,"(64, 64)","(12, 31)","(64, 64)"



=== Growth try 88/100 ===
[train][loop 1/40] loss=2.532966e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=8.542399e-03 next_lr=2.500e-04 stop=completed
[train][loop 3/40] loss=8.221073e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 4/40] loss=6.261717e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 5/40] loss=5.797954e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 6/40] loss=5.675076e-03 next_lr=3.906e-06 stop=completed
[train][loop 7/40] loss=5.675076e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
83,84,0.007238,0.005706,4.173199,4.090021,"(64, 64)","(12, 31)","(64, 64)"
84,85,0.005706,0.006506,4.090021,4.206702,"(64, 64)","(12, 31)","(64, 64)"
85,86,0.006506,0.005813,4.206702,4.247363,"(64, 64)","(12, 31)","(64, 64)"
86,87,0.005813,0.006924,4.247363,4.242685,"(64, 64)","(12, 31)","(64, 64)"



=== Growth try 89/100 ===
[train][loop 1/40] loss=2.646422e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.887838e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.027627e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.533201e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.396438e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=6.396438e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
84,85,0.005706,0.006506,4.090021,4.206702,"(64, 64)","(12, 31)","(64, 64)"
85,86,0.006506,0.005813,4.206702,4.247363,"(64, 64)","(12, 31)","(64, 64)"
86,87,0.005813,0.006924,4.247363,4.242685,"(64, 64)","(12, 31)","(64, 64)"
87,88,0.006924,0.005675,4.242685,4.297220,"(64, 64)","(12, 31)","(64, 64)"



=== Growth try 90/100 ===
[train][loop 1/40] loss=2.679141e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.974918e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.215882e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.622190e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.550424e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
85,86,0.006506,0.005813,4.206702,4.247363,"(64, 64)","(12, 31)","(64, 64)"
86,87,0.005813,0.006924,4.247363,4.242685,"(64, 64)","(12, 31)","(64, 64)"
87,88,0.006924,0.005675,4.242685,4.297220,"(64, 64)","(12, 31)","(64, 64)"
88,89,0.005675,0.006396,4.297220,4.243640,"(64, 64)","(12, 31)","(64, 64)"



=== Growth try 91/100 ===
[train][loop 1/40] loss=2.628626e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.080479e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.482651e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.934395e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.859987e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
86,87,0.005813,0.006924,4.247363,4.242685,"(64, 64)","(12, 31)","(64, 64)"
87,88,0.006924,0.005675,4.242685,4.297220,"(64, 64)","(12, 31)","(64, 64)"
88,89,0.005675,0.006396,4.297220,4.243640,"(64, 64)","(12, 31)","(64, 64)"
89,90,0.006396,0.006550,4.243640,4.257943,"(64, 64)","(12, 30)","(64, 64)"



=== Growth try 92/100 ===
[train][loop 1/40] loss=2.623601e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.530129e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.090348e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.459962e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.403076e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
87,88,0.006924,0.005675,4.242685,4.297220,"(64, 64)","(12, 31)","(64, 64)"
88,89,0.005675,0.006396,4.297220,4.243640,"(64, 64)","(12, 31)","(64, 64)"
89,90,0.006396,0.006550,4.243640,4.257943,"(64, 64)","(12, 30)","(64, 64)"
90,91,0.006550,0.006860,4.257943,4.265146,"(64, 64)","(12, 30)","(64, 64)"



=== Growth try 93/100 ===
[train][loop 1/40] loss=2.719255e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.645548e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.000972e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.473131e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.387137e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
88,89,0.005675,0.006396,4.297220,4.243640,"(64, 64)","(12, 31)","(64, 64)"
89,90,0.006396,0.006550,4.243640,4.257943,"(64, 64)","(12, 30)","(64, 64)"
90,91,0.006550,0.006860,4.257943,4.265146,"(64, 64)","(12, 30)","(64, 64)"
91,92,0.006860,0.006403,4.265146,4.310757,"(64, 64)","(12, 30)","(64, 64)"



=== Growth try 94/100 ===
[train][loop 1/40] loss=2.675199e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.900432e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.131162e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.654018e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.482340e-03 next_lr=3.906e-06 stop=completed
[train][loop 6/40] loss=6.482340e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
89,90,0.006396,0.006550,4.243640,4.257943,"(64, 64)","(12, 30)","(64, 64)"
90,91,0.006550,0.006860,4.257943,4.265146,"(64, 64)","(12, 30)","(64, 64)"
91,92,0.006860,0.006403,4.265146,4.310757,"(64, 64)","(12, 30)","(64, 64)"
92,93,0.006403,0.006387,4.310757,4.195091,"(64, 64)","(12, 30)","(64, 64)"



=== Growth try 95/100 ===
[train][loop 1/40] loss=2.738631e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.030798e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.203273e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.679963e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.603427e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
90,91,0.006550,0.006860,4.257943,4.265146,"(64, 64)","(12, 30)","(64, 64)"
91,92,0.006860,0.006403,4.265146,4.310757,"(64, 64)","(12, 30)","(64, 64)"
92,93,0.006403,0.006387,4.310757,4.195091,"(64, 64)","(12, 30)","(64, 64)"
93,94,0.006387,0.006482,4.195091,4.244725,"(64, 64)","(12, 30)","(64, 64)"



=== Growth try 96/100 ===
[train][loop 1/40] loss=2.683370e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.072560e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.768166e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=7.167059e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=7.089796e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
91,92,0.006860,0.006403,4.265146,4.310757,"(64, 64)","(12, 30)","(64, 64)"
92,93,0.006403,0.006387,4.310757,4.195091,"(64, 64)","(12, 30)","(64, 64)"
93,94,0.006387,0.006482,4.195091,4.244725,"(64, 64)","(12, 30)","(64, 64)"
94,95,0.006482,0.006603,4.244725,4.194356,"(64, 64)","(12, 30)","(64, 64)"



=== Growth try 97/100 ===
[train][loop 1/40] loss=2.710498e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.772139e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=6.956744e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.492867e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.401074e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
92,93,0.006403,0.006387,4.310757,4.195091,"(64, 64)","(12, 30)","(64, 64)"
93,94,0.006387,0.006482,4.195091,4.244725,"(64, 64)","(12, 30)","(64, 64)"
94,95,0.006482,0.006603,4.244725,4.194356,"(64, 64)","(12, 30)","(64, 64)"
95,96,0.006603,0.007090,4.194356,4.227686,"(64, 64)","(12, 30)","(64, 64)"



=== Growth try 98/100 ===
[train][loop 1/40] loss=2.726917e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.110297e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.825517e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=7.236449e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=7.162711e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
93,94,0.006387,0.006482,4.195091,4.244725,"(64, 64)","(12, 30)","(64, 64)"
94,95,0.006482,0.006603,4.244725,4.194356,"(64, 64)","(12, 30)","(64, 64)"
95,96,0.006603,0.007090,4.194356,4.227686,"(64, 64)","(12, 30)","(64, 64)"
96,97,0.007090,0.006401,4.227686,4.257806,"(64, 64)","(12, 30)","(64, 64)"



=== Growth try 99/100 ===
[train][loop 1/40] loss=2.582190e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=1.138408e-02 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=8.061241e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=7.635180e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=7.526121e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
94,95,0.006482,0.006603,4.244725,4.194356,"(64, 64)","(12, 30)","(64, 64)"
95,96,0.006603,0.007090,4.194356,4.227686,"(64, 64)","(12, 30)","(64, 64)"
96,97,0.007090,0.006401,4.227686,4.257806,"(64, 64)","(12, 30)","(64, 64)"
97,98,0.006401,0.007163,4.257806,4.430007,"(64, 64)","(12, 30)","(64, 64)"



=== Growth try 100/100 ===
[train][loop 1/40] loss=2.748678e-02 next_lr=2.500e-04 stop=adam_plateau
[train][loop 2/40] loss=9.940580e-03 next_lr=6.250e-05 stop=adam_plateau
[train][loop 3/40] loss=7.092722e-03 next_lr=1.563e-05 stop=adam_plateau
[train][loop 4/40] loss=6.637627e-03 next_lr=3.906e-06 stop=adam_plateau
[train][loop 5/40] loss=6.545335e-03 next_lr=9.766e-07 stop=adam_lr_min


Unnamed: 0,iteration,train_loss_from,train_loss_to,master_logpi_from,master_logpi_to,start_ranks,compressed_ranks,end_ranks
0,1,0.002391,0.001103,3.601187,3.799068,"(64, 64)","(19, 17)","(64, 64)"
1,2,0.001103,0.001323,3.799068,3.806651,"(64, 64)","(33, 47)","(64, 64)"
2,3,0.001323,0.001372,3.806651,3.846378,"(64, 64)","(34, 59)","(64, 64)"
3,4,0.001372,0.001616,3.846378,3.891257,"(64, 64)","(30, 60)","(64, 64)"
4,5,0.001616,0.001677,3.891257,3.846516,"(64, 64)","(29, 60)","(64, 64)"
...,...,...,...,...,...,...,...,...
95,96,0.006603,0.007090,4.194356,4.227686,"(64, 64)","(12, 30)","(64, 64)"
96,97,0.007090,0.006401,4.227686,4.257806,"(64, 64)","(12, 30)","(64, 64)"
97,98,0.006401,0.007163,4.257806,4.430007,"(64, 64)","(12, 30)","(64, 64)"
98,99,0.007163,0.007526,4.430007,4.297161,"(64, 64)","(12, 30)","(64, 64)"
