In [6]:
# %cd /home/yali/MEGA/Hack The Tockenizer/notebooks
%cd /home/dpinto/hack_the_tokenizer/notebooks
# %cd "C:\Users\yakim\Documents\MEGA\03. Vida Académica\03. Mestrado Ciencias Computadores\Dissertacao\Hack The Tockenizer\notebooks"

[Errno 2] No such file or directory: '/home/dpinto/hack_the_tokenizer/notebooks'
/home/yali/MEGA/Hack The Tockenizer/notebooks


In [2]:
import os
import json
import datetime as dt
from io import StringIO
import ipywidgets
from IPython.display import display, HTML
from plotly.subplots import make_subplots
import plotly.graph_objects as go

import tqdm
import numpy as np
import pandas as pd

from pathlib import Path

output_dir = Path(os.getcwd()).parent / "outputs"

# Set plotly as the default plotting backend
pd.options.plotting.backend = "plotly"

In [3]:
dfs: dict[str|int, pd.DataFrame] = {}
results: dict[str, dict] = {}
for file in tqdm.tqdm(os.listdir(output_dir), desc='Loading files'):
    if file.endswith("parquet"):
        key = dt.datetime.strptime(str(file), 'analysis_%Y%m%d%H%M%S.parquet').strftime('analysis@%Y-%m-%d %H:%M:%S')
        dfs[key] = pd.read_parquet(output_dir / file)
        dfs[len(dfs.keys()) // 2] = dfs[key]   # Save a copy with "num index" for easier access

        # Convert Categorical columns back to "number" or "string"
        for col in dfs[key].select_dtypes(include=['category']).columns:
            # Try converting to numeric first, if that fails convert to string
            try:
                dfs[key][col] = dfs[key][col].astype(int)
            except:
                dfs[key][col] = dfs[key][col].astype(str)

        # Convert unsigned integer columns to regular integers
        for col in dfs[key].select_dtypes(include=[np.unsignedinteger]).columns:
            dfs[key][col] = dfs[key][col].astype(int)
    elif file.endswith("json"):
        with open(output_dir / file, 'r', encoding='utf-8') as f:
            key = dt.datetime.strptime(str(file), 'results_%Y%m%d%H%M%S.json').strftime('results@%Y-%m-%d %H:%M:%S')
            results[key] = json.load(f)

Loading files: 100%|██████████| 19/19 [00:11<00:00,  1.60it/s]


# Defining Analysis Functions

In [4]:
# Log Scale menus
log_button = lambda x, y, axis: dict(
    type="buttons",
    x=x,
    y=y,
    active=0,  # Set initial state (0 for linear)
    buttons=[
        dict(
            label="Log (X-Axis)",
            method="relayout",
            args=[{f"{axis}.type": "linear"}],
            args2=[{f"{axis}.type": "log"}]
        )
    ]
)

In [5]:
def get_rank_comparison(df: pd.DataFrame, *__, width=1000, **_):

    new_tokens = df.groupby(by=['new_token_rank'], as_index=False)[['new_token_id']].count()
    old_tokens = df.groupby(by=['old_token_rank'], as_index=False)[['new_token_id']].count() 

    new_tokens.columns = ['rank', '#new_token']
    old_tokens.columns = ['rank', '#old_token']

    df = new_tokens.merge(old_tokens, how='outer').fillna(0)
    df = df.sort_values(by='rank', ascending=True).reset_index(drop=True)
    df['#new_token_acc'] = df['#new_token'].cumsum()
    df['#old_token_acc'] = df['#old_token'].cumsum()

    fig = make_subplots(rows=1, cols=2, subplot_titles=['Rank Distribution', 'Rank Accumulative Distribution'])
    # Plotting the normal view
    fig_normal = df.plot(x='rank', y=['#new_token', '#old_token'], title='Rank Distribution')

    # Plotting accumulative view
    fig_acc = df.plot(x='rank', y=['#new_token_acc', '#old_token_acc'], title='Rank Acc Distribution')


    # Add traces to subplot
    for trace in fig_normal.data:
        fig.add_trace(trace, row=1, col=1)
    for trace in fig_acc.data:
        fig.add_trace(trace, row=1, col=2)

    # Update layout
    button_width = 0.30
    button_coords = (0.12, 1.15)
    fig.update_layout(
        title_text="Rank Distribution [NewTokens Vs OldTokens]",
        showlegend=True,
        width=width*2.2,
        updatemenus = [
            log_button(button_coords[0], button_coords[1], 'xaxis'),
            log_button(button_coords[0]+button_width*1, button_coords[1], 'yaxis'),
            log_button(button_coords[0]+button_width*1 + 0.25, button_coords[1], 'xaxis2'),
            log_button(button_coords[0]+button_width*2 + 0.25, button_coords[1], 'yaxis2'),
        ] # type: ignore
    )
    return [fig]

def get_logit_comparison(df: pd.DataFrame, *__, width=1000, **_):
    new_tokens = df.groupby(by=['new_token_logits'], as_index=False)[['new_token_id']].count()
    old_tokens = df.groupby(by=['old_token_logits'], as_index=False)[['new_token_id']].count() 

    new_tokens.columns = ['logits', '#new_token']
    old_tokens.columns = ['logits', '#old_token']

    df = new_tokens.merge(old_tokens, how='outer').fillna(0)
    df = df.sort_values(by='logits', ascending=True).reset_index(drop=True)

    # Plotting the normal view
    fig = df.plot(x='logits', y=['#new_token', '#old_token'], title='Logits Distribution')

    # Update layout
    button_width = 0.30
    button_coords = (0.12, 1.15)
    fig.update_layout(
        title_text="Logits Distribution [NewTokens Vs OldTokens]",
        showlegend=True,
        width=width*2.2,
        updatemenus = [
            log_button(button_coords[0], button_coords[1], 'xaxis'),
            log_button(button_coords[0]+button_width*1, button_coords[1], 'yaxis'),
        ] # type: ignore
    )
    return [fig]

import plotly.graph_objects as go

def get_rank_diff_whiskers(df: pd.DataFrame, *__, width=1000, height=400, **_):
    # Calculate rank differences for all models
    df = df.copy()
    df['rank_diff'] = df['new_token_rank'] - df['old_token_rank']
    
    # Get unique models and sort them for consistent ordering
    models = sorted(df['model'].unique())
    
    figures = []
    
    # Create one vertical box plot per model
    for model in models:
        model_data = df[df['model'] == model]
        
        fig = go.Figure()
        
        fig.add_trace(go.Box(
            y=model_data['rank_diff'],
            name=model,
            boxpoints='outliers',
            marker_color='rgb(8,81,156)',
            line_color='rgb(8,81,156)'
        ))
        
        # Update layout for this figure
        fig.update_layout(
            title_text=f"Rank Difference: {model} (New - Old Rank)",
            showlegend=False,
            width=width,
            height=height,
            margin=dict(t=50, b=50, l=50, r=50),
            yaxis_title="Rank Difference"
        )
        
        # Add horizontal reference line at y=0
        fig.add_hline(y=0, line_dash="dash", line_color="red")
        
        figures.append(fig)
    
    
    fig = make_subplots(rows=1, cols=len(figures), subplot_titles=['Rank Distribution', 'Rank Accumulative Distribution'])

    # Add traces to subplot
    for col, figure in enumerate(figures):
        for trace in figure.data:
            fig.add_trace(trace, row=1, col=col+1)
    return [fig]

def get_metrics_aux(result, version):
    output = []
    for model_type, results in result['RESULTS'].items():
        output.append({'model': result['RUN_CONFIGS']['model_name'], 'version': version, 'model_type': model_type}) 
        for metric in results['Metrics'].keys():
            output[-1][metric] = results['Metrics'][metric]
        for benchmark in results['Benchmarks'].keys():
            output[-1][benchmark] = results['Benchmarks'][benchmark]['result']
    return pd.DataFrame(output)
def get_metrics(result, version, *_, **__):
    df = get_metrics_aux(result, version)
    display(ipywidgets.HTML(df.to_html(index=False)))


In [6]:
def run_analysis(df, result: dict, *args, **kwargs):
    display(ipywidgets.HTML('<h3>Run Config</h3>\n<ul>{}</ul><h3>Metrics</h3>'.format('\n'.join([f'<li>{k}: {v}</li>' for k, v in result['RUN_CONFIGS'].items()]))))
    get_metrics(result, *args, **kwargs)
    display(ipywidgets.HTML('<h3>Comparisons (NewTokens Vs OldTokens)</h3>'))
    figs = []
    figs.extend(get_rank_comparison(df, *args, **kwargs))
    figs.extend(get_logit_comparison(df, *args, **kwargs))
    figs.extend(get_rank_diff_whiskers(df, *args, **kwargs))
    for fig in figs: fig.show()


In [7]:
%matplotlib inline

options = [str(file) for file in dfs.keys() if isinstance(file, str)]
options.sort()
file_selection: ipywidgets.Dropdown = ipywidgets.Dropdown(
    options=options + ['all'],
    value='all',
    description='File:',
    disabled=False,
)
width_slider: ipywidgets.IntSlider = ipywidgets.IntSlider(
    value=700,
    min=200,
    max=1000,
    step=1,
    description='Width:',
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
height_slider: ipywidgets.IntSlider = ipywidgets.IntSlider(
    value=400,
    min=200,
    max=1000,
    step=1,
    description='Height:',
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
execute_button = ipywidgets.Button(
    description='Execute',
    disabled=False,
    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Execute',
)


out: ipywidgets.Output = ipywidgets.Output(layout={'border': '1px solid black'})
def on_button_clicked(_):
    # out.clear_output()
    items = [file_selection.value]
    if file_selection.value == 'all':
        items = options.copy()
    for analysis in items:
        df = dfs[analysis] # type: ignore
        result = results[analysis.replace('analysis', 'results')]  # type: ignore
        with out:
            # Filtering timestamp
            display(ipywidgets.HTML('<h1 style="text-align:center">MODEL: {}<h1>'.format(df['model'].min().replace('[NEW_TOKENS]', ''))))
            display(ipywidgets.HTML(f'<h3 style="text-align:center; margin-top: -10px">{analysis}</h3>'))

            run_analysis(df, result, analysis, width=width_slider.value, heigt=height_slider.value)
            display(ipywidgets.HTML('<div style="position: relative; width:100%; margin: 20px; height:4px;border-bottom: solid black 1px;border-top: solid black 1px;"></divß>'))

execute_button.on_click(on_button_clicked)

In [8]:
# Display all metrics for all runs
output = []
for version, result in results.items():
    output.append(get_metrics_aux(result, version))
    output[-1]['number_new_tokens'] = result['RUN_CONFIGS']['number_new_tokens']
    # Add the "Run Configs" to the dataframe
    # for key, val in result['RUN_CONFIGS'].items():
        # output[-1][key] = val
tmp = pd.concat(output).reset_index(drop=True).sort_values(by=['model', 'number_new_tokens', 'model_type'])
tmp.to_csv('RESULTS_SUMMARY_{}.csv'.format(pd.Timestamp.now().strftime('%Y%m%d%H%M%S')), index=False)
tmp

Unnamed: 0,model,version,model_type,FertilityInput,MMLU,number_new_tokens,FertilityOutput,SupergluePTPT,CalamePT
3,HuggingFaceTB/SmolLM2-135M,results@2025-08-12 01:40:27,BASELINE,2.474843,0.232517,1000,,,
4,HuggingFaceTB/SmolLM2-135M,results@2025-08-12 01:40:27,INITIALIZED_NO_TRAINING,1.936543,0.232517,1000,,,
5,HuggingFaceTB/SmolLM2-135M,results@2025-08-12 01:40:27,INITIALIZED_WITH_TRAINING,1.936543,0.232517,1000,,,
22,HuggingFaceTB/SmolLM2-135M,results@2025-08-12 03:04:02,BASELINE,2.474843,0.232517,5000,,,
23,HuggingFaceTB/SmolLM2-135M,results@2025-08-12 03:04:02,INITIALIZED_NO_TRAINING,1.758996,0.232517,5000,,,
24,HuggingFaceTB/SmolLM2-135M,results@2025-08-12 03:04:02,INITIALIZED_WITH_TRAINING,1.758996,0.232517,5000,,,
16,HuggingFaceTB/SmolLM2-135M,results@2025-08-12 04:35:42,BASELINE,2.474843,0.232517,7500,,,
17,HuggingFaceTB/SmolLM2-135M,results@2025-08-12 04:35:42,INITIALIZED_NO_TRAINING,1.754933,0.232517,7500,,,
18,HuggingFaceTB/SmolLM2-135M,results@2025-08-12 04:35:42,INITIALIZED_WITH_TRAINING,1.754933,0.232517,7500,,,
13,HuggingFaceTB/SmolLM3-3B,results@2025-08-12 18:49:42,BASELINE,1.940017,0.566657,1000,2.001278,0.496864,0.58526


In [9]:
display(file_selection)
display(width_slider)
display(execute_button)
display(out)

Dropdown(description='File:', index=9, options=('analysis@2025-08-12 01:40:27', 'analysis@2025-08-12 03:04:02'…

IntSlider(value=700, description='Width:', max=1000, min=200)

Button(button_style='info', description='Execute', style=ButtonStyle(), tooltip='Execute')

Output(layout=Layout(border_bottom='1px solid black', border_left='1px solid black', border_right='1px solid b…

In [64]:
import pandas as pd

df1 = pd.read_csv('RESULTS_SUMMARY_20250727065621.csv')
df2 = pd.read_csv('RESULTS_SUMMARY_20250813090924.csv')

df = df1.merge(df2, on=['model', 'model_type', 'number_new_tokens'], how='outer')

df = df[[
    'number_new_tokens', 'model', 'model_type',
    'version_x', 'version_y', 
    'FertilityInput_x', 'FertilityInput_y', 
    'Perplexity',
    'FertilityOutput_x', 'FertilityOutput_y',
    'MMLU',
    'CalamePT_x', 'CalamePT_y',
    'SupergluePTPT_x', 'SupergluePTPT_y'
]]
# Keeping only most recent data
# df = df.drop(columns=df.columns[df.columns.str.endswith('_x')].tolist())
for col in df.columns[df.columns.str.endswith('_y')]:
    df[col[:-2]] = df[col].fillna(df[col[:-2] + '_x'])
df = df.drop(columns=df.columns[df.columns.str.endswith('_x') | df.columns.str.endswith('_y')].tolist()).sort_values(by=['model', 'number_new_tokens', 'model_type'])
# Remove baseline except when number_new_tokens == 1000
df[(df['model_type'] != 'BASELINE') | (df['number_new_tokens'] == 1000)]

# Sort columns
df = df[[
    'number_new_tokens', 'model', 'model_type',
    'version',
    'FertilityInput',
    'Perplexity',
    'FertilityOutput',
    'MMLU',
    'CalamePT',
    'SupergluePTPT'
]]
df.to_csv('RESULTS_SUMMARY_{}.csv'.format(pd.Timestamp.now().strftime('%Y%m%d%H%M%S')), index=False)

# MANUAL SECTION

This section highlights the manual merge of different results

In [4]:
df_1 = pd.read_csv('RESULTS_SUMMARY_20250813094505.csv')
df_1

Unnamed: 0,number_new_tokens,model,model_type,version,FertilityInput,Perplexity,FertilityOutput,MMLU,CalamePT,SupergluePTPT
0,1000,HuggingFaceTB/SmolLM2-135M,BASELINE,results@2025-08-12 01:40:27,2.474843,283.38605,3.074219,0.232517,0.135356,0.014678
1,1000,HuggingFaceTB/SmolLM2-135M,INITIALIZED_NO_TRAINING,results@2025-08-12 01:40:27,1.936543,1134.296027,3.24426,0.232517,0.135356,0.014678
2,1000,HuggingFaceTB/SmolLM2-135M,INITIALIZED_WITH_TRAINING,results@2025-08-12 01:40:27,1.936543,1200.905614,3.295876,0.232517,0.135356,0.014678
3,5000,HuggingFaceTB/SmolLM2-135M,BASELINE,results@2025-08-12 03:04:02,2.474843,283.38605,3.074219,0.232517,0.135356,0.014678
4,5000,HuggingFaceTB/SmolLM2-135M,INITIALIZED_NO_TRAINING,results@2025-08-12 03:04:02,1.758996,1965.716529,2.758512,0.232517,0.135356,0.015055
5,5000,HuggingFaceTB/SmolLM2-135M,INITIALIZED_WITH_TRAINING,results@2025-08-12 03:04:02,1.758996,2086.669628,2.792973,0.232517,0.135356,0.015055
6,7500,HuggingFaceTB/SmolLM2-135M,BASELINE,results@2025-08-12 04:35:42,2.474843,283.38605,3.074219,0.232517,0.135356,0.014678
7,7500,HuggingFaceTB/SmolLM2-135M,INITIALIZED_NO_TRAINING,results@2025-08-12 04:35:42,1.754933,2707.23825,2.825158,0.232517,0.135356,0.015055
8,7500,HuggingFaceTB/SmolLM2-135M,INITIALIZED_WITH_TRAINING,results@2025-08-12 04:35:42,1.754933,2707.722005,2.855443,0.232517,0.135356,0.015055
9,1000,HuggingFaceTB/SmolLM3-3B,BASELINE,results@2025-08-12 18:49:42,1.940017,,2.001278,0.566657,0.58526,0.496864


In [3]:
import tqdm
import os

dfs = []
for file in tqdm.tqdm(os.listdir(output_dir), desc='Loading files'):
    if not file.startswith('FBoost'): continue
    elif file.endswith("json"):
        with open(output_dir / file, 'r', encoding='utf-8') as f:
            key = dt.datetime.strptime(str(file), 'FBoost_results_%Y%m%d%H%M%S.json').strftime('results@%Y-%m-%d %H:%M:%S')
            result = json.load(f)
            config = result['RUN_CONFIGS']
            run_configs = {
                'number_new_tokens': config['number_new_tokens'],
                'model': config['model_name'],
                'version': key
            }
            rows = []
            for model_type in result['RESULTS'].keys():
                rows.append(run_configs.copy())
                rows[-1]['model_type'] = model_type
                for metric_name, metric_val in result['RESULTS'][model_type]['Metrics'].items():
                    rows[-1][metric_name] = metric_val
            dfs.append(pd.DataFrame(rows))

df = pd.concat(dfs).reset_index(drop=True).sort_values(by=['model', 'number_new_tokens', 'model_type'])
df['FertilityBoost [Mean]'] = df[df.columns[df.columns.str.startswith('FertilityBoost')]].mean(axis=1)
df['FertilityBoost [STD]'] = df[df.columns[df.columns.str.startswith('FertilityBoost')]].std(axis=1)
df['FertilityBoost'] = df.apply(lambda x: f'{x["FertilityBoost [Mean]"]:.2%} ± {x["FertilityBoost [STD]"]:.2%}', axis=1)
df

Loading files: 100%|██████████| 37/37 [00:00<00:00, 565.28it/s]


Unnamed: 0,number_new_tokens,model,version,model_type,FertilityBoost [Run #0],FertilityBoost [Run #1],FertilityBoost [Run #2],FertilityBoost [Run #3],FertilityBoost [Run #4],FertilityBoost [Run #5],FertilityBoost [Run #6],FertilityBoost [Run #7],FertilityBoost [Run #8],FertilityBoost [Run #9],FertilityBoost [Mean],FertilityBoost [STD],FertilityBoost
2,1000,HuggingFaceTB/SmolLM2-135M,results@2025-08-20 23:08:10,INITIALIZED_NO_TRAINING,0.050465,0.051331,0.051883,0.050881,0.050178,0.050571,0.050418,0.050559,0.05018,0.05065,0.050712,0.000505,5.07% ± 0.05%
3,1000,HuggingFaceTB/SmolLM2-135M,results@2025-08-20 23:08:10,INITIALIZED_WITH_TRAINING,0.050336,0.050019,0.050702,0.049992,0.050074,0.051359,0.050016,0.05119,0.051123,0.049818,0.050463,0.00055,5.05% ± 0.06%
4,5000,HuggingFaceTB/SmolLM2-135M,results@2025-08-20 23:21:17,INITIALIZED_NO_TRAINING,0.129053,0.129199,0.128711,0.128901,0.127792,0.129148,0.128307,0.128619,0.128901,0.128925,0.128756,0.000408,12.88% ± 0.04%
5,5000,HuggingFaceTB/SmolLM2-135M,results@2025-08-20 23:21:17,INITIALIZED_WITH_TRAINING,0.130606,0.127904,0.130205,0.130166,0.129279,0.129095,0.130289,0.130518,0.127705,0.128314,0.129408,0.001055,12.94% ± 0.11%
8,7500,HuggingFaceTB/SmolLM2-135M,results@2025-08-20 23:34:52,INITIALIZED_NO_TRAINING,0.161246,0.160117,0.158266,0.16198,0.160684,0.158923,0.159183,0.161216,0.160945,0.160008,0.160257,0.001118,16.03% ± 0.11%
9,7500,HuggingFaceTB/SmolLM2-135M,results@2025-08-20 23:34:52,INITIALIZED_WITH_TRAINING,0.158265,0.15882,0.160682,0.16012,0.162512,0.160504,0.15963,0.160227,0.159357,0.158701,0.159882,0.001166,15.99% ± 0.12%
12,1000,HuggingFaceTB/SmolLM3-3B,results@2025-08-21 22:50:57,INITIALIZED_NO_TRAINING,0.002387,0.002015,0.0022,0.002009,0.002295,0.002373,0.002232,0.002183,0.002231,0.002379,0.00223,0.00013,0.22% ± 0.01%
13,1000,HuggingFaceTB/SmolLM3-3B,results@2025-08-21 22:50:57,INITIALIZED_WITH_TRAINING,0.002443,0.002211,0.002056,0.002172,0.002517,0.002091,0.002339,0.002206,0.002256,0.002167,0.002246,0.00014,0.22% ± 0.01%
16,5000,HuggingFaceTB/SmolLM3-3B,results@2025-08-22 00:02:11,INITIALIZED_NO_TRAINING,0.008001,0.009205,0.008791,0.009208,0.008844,0.008776,0.008142,0.008857,0.008699,0.008206,0.008673,0.000401,0.87% ± 0.04%
17,5000,HuggingFaceTB/SmolLM3-3B,results@2025-08-22 00:02:11,INITIALIZED_WITH_TRAINING,0.007775,0.00852,0.008581,0.009107,0.00835,0.008333,0.008544,0.009591,0.008019,0.00755,0.008437,0.000569,0.84% ± 0.06%


In [None]:
df_1.merge(df, on=['number_new_tokens', 'model', 'model_type'], suffixes=('', '_y'), how='left').drop(columns=['version_y']).to_csv('RESULTS_SUMMARY_{}.csv'.format(pd.Timestamp.now().strftime('%Y%m%d%H%M%S')), index=False)

Unnamed: 0,number_new_tokens,model,model_type,version,FertilityInput,Perplexity,FertilityOutput,MMLU,CalamePT,SupergluePTPT,...,FertilityBoost [Run #3],FertilityBoost [Run #4],FertilityBoost [Run #5],FertilityBoost [Run #6],FertilityBoost [Run #7],FertilityBoost [Run #8],FertilityBoost [Run #9],FertilityBoost [Mean],FertilityBoost [STD],FertilityBoost
0,1000,HuggingFaceTB/SmolLM2-135M,BASELINE,results@2025-08-12 01:40:27,2.474843,283.38605,3.074219,0.232517,0.135356,0.014678,...,,,,,,,,,,
1,1000,HuggingFaceTB/SmolLM2-135M,INITIALIZED_NO_TRAINING,results@2025-08-12 01:40:27,1.936543,1134.296027,3.24426,0.232517,0.135356,0.014678,...,0.050881,0.050178,0.050571,0.050418,0.050559,0.05018,0.05065,0.050712,0.000505,5.07% ± 0.05%
2,1000,HuggingFaceTB/SmolLM2-135M,INITIALIZED_WITH_TRAINING,results@2025-08-12 01:40:27,1.936543,1200.905614,3.295876,0.232517,0.135356,0.014678,...,0.049992,0.050074,0.051359,0.050016,0.05119,0.051123,0.049818,0.050463,0.00055,5.05% ± 0.06%
3,5000,HuggingFaceTB/SmolLM2-135M,BASELINE,results@2025-08-12 03:04:02,2.474843,283.38605,3.074219,0.232517,0.135356,0.014678,...,,,,,,,,,,
4,5000,HuggingFaceTB/SmolLM2-135M,INITIALIZED_NO_TRAINING,results@2025-08-12 03:04:02,1.758996,1965.716529,2.758512,0.232517,0.135356,0.015055,...,0.128901,0.127792,0.129148,0.128307,0.128619,0.128901,0.128925,0.128756,0.000408,12.88% ± 0.04%
5,5000,HuggingFaceTB/SmolLM2-135M,INITIALIZED_WITH_TRAINING,results@2025-08-12 03:04:02,1.758996,2086.669628,2.792973,0.232517,0.135356,0.015055,...,0.130166,0.129279,0.129095,0.130289,0.130518,0.127705,0.128314,0.129408,0.001055,12.94% ± 0.11%
6,7500,HuggingFaceTB/SmolLM2-135M,BASELINE,results@2025-08-12 04:35:42,2.474843,283.38605,3.074219,0.232517,0.135356,0.014678,...,,,,,,,,,,
7,7500,HuggingFaceTB/SmolLM2-135M,INITIALIZED_NO_TRAINING,results@2025-08-12 04:35:42,1.754933,2707.23825,2.825158,0.232517,0.135356,0.015055,...,0.16198,0.160684,0.158923,0.159183,0.161216,0.160945,0.160008,0.160257,0.001118,16.03% ± 0.11%
8,7500,HuggingFaceTB/SmolLM2-135M,INITIALIZED_WITH_TRAINING,results@2025-08-12 04:35:42,1.754933,2707.722005,2.855443,0.232517,0.135356,0.015055,...,0.16012,0.162512,0.160504,0.15963,0.160227,0.159357,0.158701,0.159882,0.001166,15.99% ± 0.12%
9,1000,HuggingFaceTB/SmolLM3-3B,BASELINE,results@2025-08-12 18:49:42,1.940017,,2.001278,0.566657,0.58526,0.496864,...,,,,,,,,,,
