In [None]:
!pip install pwlf #for colab

## Dataset setup and preprocessing

In [None]:
import time

modules = [
    ('numpy', 'np'),
    ('scipy.stats', 'stats'),
    ('scipy.optimize', 'optimize'), 
    ('matplotlib.pyplot', 'plt'), 
    ('pandas', 'pd'),
    ('seaborn', 'sns'),
    ('itertools', 'itertools'),
    ('copy', 'copy'),
    ('re', 're'),
    ('pdb', 'pdb'),
    ('logging', 'logging')
]

for module, alias in modules:
    start = time.time()
    exec(f"import {module} as {alias}")
    end = time.time()
    print(f"{module}: {end - start:.4f} seconds")

In [8]:
import numpy as np
from scipy import stats, optimize
import matplotlib.pyplot as plt
import pandas as pd #taking long to load here
import seaborn as sns
import itertools
import copy,re, pdb, logging

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger=logging.getLogger(__name__)


### June 2024 dataset

In [13]:

# df = pd.read_csv("https://epochai.org/data/epochdb/notable_systems.csv")
url = 'https://drive.google.com/file/d/1RLLKPU3bEYK65wlQlU0p20u9M8cHkLMl/view?usp=sharing'
url = 'https://drive.google.com/uc?id=' + url.split('/')[-2]

df = pd.read_csv(url)

df = df[~df["Notability criteria"].isna()]

df["compute"] = df["Training compute (FLOP)"]
df["date"] = df["Publication date"]
df["model"] = df["System"]
df["poss1e23"] = df["Possibly over 1e23 FLOP"]
df["poss1e25"] = df["Estimated over 1e25 FLOP"]
df["cost"] = df["Training compute cost (2023 USD)"]
df["cost"] = df["cost"].str.replace(",", "").str.replace("$", "").astype(float)

df = df[["model", "compute", "date", "cost", "poss1e23", "poss1e25"]]

to_remove = ['AlphaGo Zero','AlphaZero'] #outliers
df = df[~df["model"].isin(to_remove)]

to_append = [
  ["Claude 3.5 Sonnet", 4.3e25, "2024-06-21", np.nan, np.nan, np.nan],
  ["GPT-4o Mini", 1.2e25, "2024-07-18", np.nan, np.nan, np.nan],
]

for row in to_append:
  if row[0] not in df["model"].values:
    df.loc[len(df)] = row


to_add_compute = {
    "Claude 3 Opus": 2.5e25,
    "Claude 3 Sonnet": 1.1e25,
    "GPT-4o": 2.9e25,
    "Gemini 1.0 Pro": 2.8e24,
    "Gemini 1.5 Pro": 1.9e25,
    "Reka Core": 8.4e24,
    "GPT-4 Turbo": 2.1e25,  # rough guess
    "GPT-4V": 2.1e25,  # rough guess
    "Claude 2.1": df[df["model"]=="Claude 2"]["compute"].values,  # rough guess
}

for k, v in to_add_compute.items():
  if df.loc[df["model"] == k, "compute"].isna().values:
    df.loc[df["model"] == k, "compute"] = v
  else:
    print(f"{k} already has a compute value")


# Reset the ones we've set
df.loc[~df["compute"].isna(), "poss1e23"] = np.nan
df.loc[~df["compute"].isna(), "poss1e25"] = np.nan

# Set some temporary placeholder values
# TODO: revisit
# df.loc[(df["poss1e25"] == "checked"), "compute"] = 1.01e25  # placeholder
# df.loc[((df["poss1e23"] =="checked") & (df["poss1e25"] != "checked")), "compute"] = 1.01e23  # placeholder

# We want to handle these leading models manually via the above compute estimates.
assert df[(df["poss1e25"] == "checked") & (df["compute"].isna())].size == 0

# We sample 1e23-1e25 models with unknown compute from the existing empirical distribution.
# TODO: revisit
poss1e23 = ((df["poss1e23"] == "checked") & (df["poss1e25"] != "checked"))
df.loc[poss1e23, "compute"] = df[(df["compute"] >= 1e23) & (df["compute"] < 1e25)]["compute"].sample(poss1e23.sum(), random_state=0).values

df["date"] = pd.to_datetime(df["date"])
df["log_compute"] = np.log10(df["compute"])

df["date_float"] = df["date"].dt.year + df["date"].dt.month/12

df['year'] = df['date'].dt.year

df = df.sort_values("date")
df.dropna(subset="compute", inplace=True)

print("=== Full Dataset ===")
print("Most recent date:", df["date"].max())
print("\nDatapoints per year (2017-2025):")
for year in range(2017, 2026):
    count = len(df[df["year"] == year])
    print(f"{year}: {count}")

max_compute_idx = df['compute'].idxmax()
print(f"\nLargest compute value: {df.loc[max_compute_idx, 'compute']:.2e} ({df.loc[max_compute_idx, 'model']})")
print(f"Total entries with compute value: {len(df)}")

df_no_2024 = df[df["year"] < 2024].copy()

print("\n=== Dataset without 2024 ===") 
print("Most recent date:", df_no_2024["date"].max())
print("\nDatapoints per year (2017-2023):")
for year in range(2017, 2024):
    count = len(df_no_2024[df_no_2024["year"] == year])
    print(f"{year}: {count}")

max_compute_idx = df_no_2024['compute'].idxmax()
print(f"\nLargest compute value: {df_no_2024.loc[max_compute_idx, 'compute']:.2e} ({df_no_2024.loc[max_compute_idx, 'model']})")
print(f"Total entries with compute value: {len(df_no_2024)}")

df=df_no_2024


# Report number of entries before removing NaN
print(f"\n\n Number of entries before removing rows with compute=NaN: {len(df)}")

# Remove rows with NaN in compute column
df = df.dropna(subset=['compute'])

# Report number of entries after removing rows with compute=NaN
print(f"Number of entries after removing rows with compute=NaN: {len(df)}")

### Feb 2025 dataset

In [None]:
#path 
path="/Users/iyngkarrankumar/Documents/GovAI WF/EUAIA_thresholds_project/data/notable_ai_models_24_02_2025.csv"

df = pd.read_csv(path)
df = df[~df["Notability criteria"].isna()]

df["compute"] = df["Training compute (FLOP)"]
df["date"] = pd.to_datetime(df["Publication date"])
df["year"] = pd.to_datetime(df["date"]).dt.year
df["model"] = df["Model"]
df["cost"] = df["Training compute cost (2023 USD)"]
df["cost"] = df["cost"].fillna("$0")  # Handle NaN values
df["cost"] = df["cost"].astype(str)  # Convert to string
df["cost"] = df["cost"].str.replace(",", "").str.replace("$", "").astype(float)
df = df[["model", "compute", "date", "cost","year"]]

# Models to remove
to_remove = ["AlphaGo Zero", "AlphaZero"]
df = df[~df["model"].isin(to_remove)]



# Print stats for full dataset
print("=== Full Dataset ===")
print("Most recent date:", df["date"].max())
print("\nDatapoints per year (2017-2025):")
for year in range(2017, 2026):
    count = len(df[df["year"] == year])
    print(f"{year}: {count}")

max_compute_idx = df['compute'].idxmax()
print(f"\nLargest compute value: {df.loc[max_compute_idx, 'compute']:.2e} ({df.loc[max_compute_idx, 'model']})")

# Create dataset without 2025 data
df_no_2025 = df[df["year"] < 2025].copy()

print("\n=== Dataset without 2025 ===")
print("Most recent date:", df_no_2025["date"].max())
print("\nDatapoints per year (2017-2024):")
for year in range(2017, 2025):
    count = len(df_no_2025[df_no_2025["year"] == year])
    print(f"{year}: {count}")

max_compute_idx = df_no_2025['compute'].idxmax()
print(f"\nLargest compute value: {df_no_2025.loc[max_compute_idx, 'compute']:.2e} ({df_no_2025.loc[max_compute_idx, 'model']})")

df=df_no_2025

# Report number of entries before removing NaN
print(f"\n\n Number of entries before removing rows with compute=NaN: {len(df)}")

# Remove rows with NaN in compute column
df = df.dropna(subset=['compute'])

# Report number of entries after removing rows with compute=NaN
print(f"Number of entries after removing rows with compute=NaN: {len(df)}")

In [None]:
#generate basic scatterplot
if 1:
    fig = sns.scatterplot(data=df[df['date']>'2010-01-01'], x='date',y='compute')
    fig.set(yscale='log')
    plt.grid(alpha=0.5)

    # Add line of best fit for historical data
    historical_data = df[df['date']>'2010-01-01']
    x = historical_data['date'].astype(np.int64) // 10**9  # Convert to unix timestamp
    y = historical_data['compute']
    z = np.polyfit(x, np.log(y), 1)
    p = np.poly1d(z)
    plt.plot(historical_data['date'], np.exp(p(x)), 'b--', alpha=0.8)

    future_dates = pd.date_range(start='2025-01-01', end='2029-12-31', periods=200)
    base = 1e25  # Starting point based on 2024 level
    noise = np.random.normal(0, 10, len(future_dates))
    years_from_2025 = (future_dates.year - 2025)

    growth_rate = 3.0  # Exponential growth rate
    future_compute = base * np.exp(growth_rate * years_from_2025) * (1 + noise)
    plt.scatter(future_dates, future_compute, alpha=0.3, color='red', label='Scenario A')

    growth_rate = 0.4
    future_compute = base * np.exp(growth_rate * years_from_2025) * (1 + noise)
    plt.scatter(future_dates, future_compute, alpha=0.3, color='green', label='Scenario B')

    growth_rate = 5.0  # Higher growth rate than Scenario A
    future_compute = base * np.exp(growth_rate * years_from_2025) * (1 + noise)
    plt.scatter(future_dates, future_compute, alpha=0.3, color='blue', label='Scenario C')

    plt.legend()
    plt.xlim([pd.Timestamp('2020-01-01'),pd.Timestamp('2030-01-01')])

    for exp in range(25,31):
        plt.axhline(y=10**exp,color='gray',linestyle='--',alpha=0.6)



## Utils

In [None]:
#util funcs cell
def norm_exp_func(x,a,b,k):
    norm_factor=(1/k)*(np.exp(k*b)-np.exp(k*a))
    return (1/norm_factor)*np.exp(k*x)

def sample_from_exp_dist(a,b,k,spacing='linear'):
    x=np.linspace(a,b,10000) #might need to change this to logspace
    dx=x[1]-x[0] #differnt if logspace
    pdf=norm_exp_func(x,a,b,k=k)
    assert(round(sum(pdf*dx),2)==1), print(sum(pdf*dx)) #sanity check on probability dist
    prob_dist=pdf*dx
    prob_dist=prob_dist/np.sum(prob_dist) #ensure that sums exactly to 1 for use with np.random.choice

    return np.random.choice(x,p=prob_dist)

## Training compute spending extrapolation

In [None]:
#beta OOMs of training compute can be traded off for alpha OOMs of inference compute

def compute_allocations(tau):
    tau = np.array(tau)
    train_alloc = tau/(tau+1)
    inference_alloc = 1/(tau+1)
    return train_alloc, inference_alloc
    

taus = [0.2, 0.25, 0.3333, 0.5, 1, 2, 3, 4, 5] #high tau - poor inference time scaling. Low tau - great inference time scaling
train_fracs = []
inference_fracs = []

for tau in taus:
    train_frac, inference_frac = compute_allocations(tau)
    train_fracs.append(train_frac)
    inference_fracs.append(inference_frac)

# Plot training and inference fractions vs tau
plt.figure(figsize=(10,6))

plt.semilogx(taus, train_fracs, label='Training fraction', marker='o')
plt.semilogx(taus, inference_fracs, label='Inference fraction', marker='o')

plt.xlabel('τ (Tradeoff parameter)')
plt.ylabel('Fraction of compute')
plt.title('Training vs Inference Compute Fractions')
plt.grid(alpha=0.5)
plt.ylim(0,1)

# Set x-axis ticks and labels
plt.xticks([0.2, 0.25, 0.333, 0.5, 1, 2, 3, 4, 5], 
           ['0.2', '0.25', '0.33', '0.5', '1.0', '2.0', '3.0', '4.0', '5.0'])

plt.legend()
plt.tight_layout()

In [None]:
#total AI-relevant compute extrapolations

#extraps
LINEAR_EXTRAP=True

#allocations
FIXED_ALLOCATION=False
DECREASING_TAU=True #inference scaling continues improving
assert(FIXED_ALLOCATION+DECREASING_TAU)==1
tau_dict = {
    2025: 1,
    2026: 1/2,
    2027: 1/3,
    2028: 1/4,
    2029: 1/5
}

#plot
PLOT=True

from sklearn.linear_model import LinearRegression

FLOP_dollar_2024 = 2e25/100e6
dollar_FLOP_2024 = 1/FLOP_dollar_2024

LOG_AGGREGATE_COMPUTE_DATA={}


year_grouped_df=df.groupby(df['date'][df['date']>'2010-01-01'].dt.year)
aggregate_compute=year_grouped_df['compute'].sum()
aggregate_compute_cost=aggregate_compute*dollar_FLOP_2024
log_aggregate_compute=np.log10(aggregate_compute)
log_aggregate_compute_cost=np.log10(aggregate_compute_cost)

recent_years = log_aggregate_compute[log_aggregate_compute.index.isin(range(2020,df.year.max()+1))]
recent_log_compute_dict = {int(k): v for k, v in recent_years.items()}


#do extrapolations
if 1:
    if LINEAR_EXTRAP:
        # Fit exponential for extrapolation
        # Linear regression
        x = np.array(list(year_grouped_df.groups.keys())).reshape(-1, 1)
        y = log_aggregate_compute.values
        reg = LinearRegression().fit(x, y)

        # Generate future years for extrapolation
        pred_years = np.arange(df.year.max()+1, 2030).reshape(-1, 1)
        # Get predictions
        log_aggregate_compute_predictions = reg.predict(pred_years)
        log_aggregate_compute_predictions_dict = {int(year): pred for year, pred in zip(pred_years.flatten(), log_aggregate_compute_predictions)}

        # Combine historical and predicted data
        combined_log_aggregate_compute_dict = dict(sorted({**recent_log_compute_dict, **log_aggregate_compute_predictions_dict}.items()))

        LOG_AGGREGATE_COMPUTE_DATA['Total']=combined_log_aggregate_compute_dict


#do allocations
if 1: 
    if FIXED_ALLOCATION:
        train_alloc,inference_alloc=compute_allocations(tau=1)
        LOG_AGGREGATE_COMPUTE_DATA['aggregate training compute'] = {year: val + np.log(train_alloc) for year, val in LOG_AGGREGATE_COMPUTE_DATA['Total'].items()}
        LOG_AGGREGATE_COMPUTE_DATA['aggregate inference compute'] = {year: val + np.log(inference_alloc) for year, val in LOG_AGGREGATE_COMPUTE_DATA['Total'].items()}
    if DECREASING_TAU:
        train_alloc_dict = {}
        inference_alloc_dict = {}
        
        for year, val in LOG_AGGREGATE_COMPUTE_DATA['Total'].items():
            tau = tau_dict.get(year, 1.0) #gets key; if key not found, default to 1
            train_alloc, inference_alloc = compute_allocations(tau=tau)
            train_alloc_dict[year] = val + np.log10(train_alloc)
            inference_alloc_dict[year] = val + np.log10(inference_alloc)
            
        LOG_AGGREGATE_COMPUTE_DATA['aggregate training compute'] = train_alloc_dict
        LOG_AGGREGATE_COMPUTE_DATA['aggregate inference compute'] = inference_alloc_dict



if PLOT:
    plt.figure(figsize=(10,6))
    
    # Plot historical data
    plt.scatter(log_aggregate_compute.index, log_aggregate_compute.values,
                label='Historical Total', color='blue')
    
    # Plot extrapolations for each method
    colors = {'Total': 'orange', 'aggregate training compute': 'green', 'aggregate inference compute': 'red'}
    markers = {'Total': 'o', 'aggregate training compute': '.', 'aggregate inference compute': 'x'}
    for method, predictions in LOG_AGGREGATE_COMPUTE_DATA.items():
        years = [y for y in predictions.keys() if y >= 2025]
        values = [predictions[y] for y in years]
        plt.scatter(years, values, label=f'{method} (Projected)', 
                   color=colors[method], marker=markers[method])
    
    plt.xlabel('Year')
    plt.ylabel('Log10(Compute) [FLOP]')
    plt.title(f'Compute Usage Over Time')
    plt.legend()
    plt.grid(True)
    plt.xticks(np.arange(min(log_aggregate_compute.index), 2030, 2))

## Generate compute samples

In [None]:
#get compute_alloc fits
fit_years=np.arange(2020,df.year.max()+1)
FIT_DATA={year:None for year in fit_years}


print('Fitting f_M coefficients')
for idx,year in enumerate(fit_years):
    total_compute=aggregate_compute[aggregate_compute.index==year].values
    datapoints_year=df[df['date'].dt.year==year]['compute']
    mean_log_compute=np.log10(datapoints_year).mean()

    sorted_computes=np.sort(datapoints_year)
    norm_factor=total_compute[0]
    norm_sorted_computes=sorted_computes/norm_factor
    cumsum=np.cumsum(sorted_computes)
    norm_cumsum=cumsum/norm_factor

    #store data 
    FIT_DATA[year]={
    'compute':sorted_computes,
    'cumulative_sum':cumsum,
    'norm_factor':norm_factor,
    'f_m_coeffs':None,
            }
    
    #fit data
    X = np.log10(norm_sorted_computes).reshape(-1, 1)
    y = np.log10(norm_cumsum)
    reg = LinearRegression().fit(X, y)
    FIT_DATA[year]['fit data'] = (X.ravel(),y.ravel())
    FIT_DATA[year]['f_m_coeffs'] = [reg.coef_[0], reg.intercept_]

In [None]:
##generate compute samples

np.random.seed(42)

CONST_FM=True
LIN_EXTRAP_FM=False
TEST_FM=False
assert(CONST_FM+LIN_EXTRAP_FM+TEST_FM)==1, "Only one of CONST_FM, LIN_EXTRAP_FM, or TEST_FM can be True"

PLOT_KDES=True
PLOT_SCATTER=True

#total compute
projection="Linear"

#compute allocation parameters
if CONST_FM:
    fm_grad,fm_int = np.mean([FIT_DATA[year]['f_m_coeffs'][0] for year in FIT_DATA]),np.mean([FIT_DATA[year]['f_m_coeffs'][1] for year in FIT_DATA])
if LIN_EXTRAP_FM:
    pass
if TEST_FM:
    fm_grad,fm_int=1.1,1.0 #1.1,1.0 are arithmatic means from [2020,2023]



#individual model size parameters
log_min_norm_m = np.log10(1e-8) #the smallest model to allocate compute to is ~1e-8 the size of total compute spending that year
log_max_norm_m = np.log10(1e-1) #free param - assume that largest model that year is no larger than 10% of total training compute (can find this from historic data and so sensitivity analysis)

#bin sampling parameters
bin_sampling_method='random'
k=-100 #for exponential dist sampling

#misc parameters
round_param=2


COMPUTE_SAMPLE_DATA={int(year):None for year in pred_years}


all_years=np.concatenate([fit_years, pred_years.astype(int).ravel()])

for year in all_years:

    log_agg_training_compute=LOG_AGGREGATE_COMPUTE_DATA[projection][year]
    agg_training_compute=10**log_agg_training_compute #total compute used over the year

    #model sizes (as fraction of T_tot)
    norm_ms = np.logspace(log_min_norm_m,log_max_norm_m,2*(int(log_max_norm_m)-int(log_min_norm_m))+1)
    log_norm_ms = np.log10(norm_ms)
    log_frac_cum_compute = fm_grad*log_norm_ms + fm_int
    cum_fm=10**log_frac_cum_compute

    model_ctgs = [f'{norm_ms[i]:.2e}--{norm_ms[i+1]:.2e}' for i in range(len(norm_ms)-1)]
    f_m = np.diff(cum_fm) #we don't include compute alloc to models 1e-8 smaller than total compute
    bin_compute_allocs=f_m*agg_training_compute #array of how much compute allocated to each bin
    DATA_alloc={model_ctgs[i]:
                {'compute alloc':bin_compute_allocs[i]} for i in range(len(model_ctgs))}
    
    compute_samples_rand=[]

    for idx,(ctg,alloc) in enumerate(list(zip(model_ctgs,bin_compute_allocs))):
        #here alloc is the amount of alloc given to each individual bin

        bounds = ctg.split('--')
        norm_model_bin_lb,norm_model_bin_ub = float(bounds[0]),float(bounds[1])
        model_bin_lb,model_bin_ub = agg_training_compute*norm_model_bin_lb, agg_training_compute*norm_model_bin_ub #normalising factor is total training compute
        allocnorm_model_bin_lb,allocnorm_model_bin_ub=model_bin_lb/alloc, model_bin_ub/alloc

        #not generating multiple samples yet for CIs
        running_tot=0
        allocnormed_samples=[] 
        while running_tot<1:
            #SAMPLE
            if bin_sampling_method=='random':
                sample = np.random.uniform(allocnorm_model_bin_lb, allocnorm_model_bin_ub)
            elif bin_sampling_method=='exp':
                sample  = sample_from_exp_dist(a=allocnorm_model_bin_lb,b=allocnorm_model_bin_ub,k=k)

            #SUM CHECK
            if running_tot + sample > 1:
                allocnormed_samples.append(1 - running_tot)
                running_tot = 1
            else:
                allocnormed_samples.append(sample)
                running_tot += sample

        #print(f"Model category {ctg} adds {len(allocnormed_samples)} models")
        compute_samples_rand = compute_samples_rand + (list(alloc*np.array(allocnormed_samples)))
        
        '''
        print(f"""
        Sampling for:
        Year: {year}
        Model category: {ctg}
        n_models: {len(allocnormed_samples)}
        """)
        '''

    compute_samples_rand = [x for x in compute_samples_rand if x!=0]

    print(year)
    COMPUTE_SAMPLE_DATA[year]=compute_samples_rand
        

if PLOT_KDES:
    fig, axes = plt.subplots(3, 2, figsize=(12, 8))
    axes = axes.ravel()

    for idx, (year, samples) in enumerate((y, s) for y, s in COMPUTE_SAMPLE_DATA.items() if y in pred_years):
        sns.kdeplot(data=np.log10(samples), ax=axes[idx])
        axes[idx].set_title(f'Year {year}')
        axes[idx].set_xlabel('log compute (FLOPs)')
        axes[idx].set_ylabel('Density')
        axes[idx].grid(alpha=0.5)
        axes[idx].set_xlim([15,30])

    plt.tight_layout()
    plt.show()

if PLOT_SCATTER:
    #Get historical data
    historical_data = {
        'year': [],
        'compute': []
    }
    for year in range(2020, df.year.max()+1):
        models = df[df['year'] == year]['compute'].values
        for compute in models:
            # Add random month offset
            year_frac = year + np.random.random()
            historical_data['year'].append(year_frac)
            historical_data['compute'].append(compute)

    # Get projected data
    projected_data = {
        'year': [],
        'compute': []
    }
    for year, samples in ((y,s) for y,s in COMPUTE_SAMPLE_DATA.items() if y in pred_years):
        for compute in samples:
            # Add random month offset
            year_frac = year + np.random.random()
            projected_data['year'].append(year_frac)
            projected_data['compute'].append(compute)

    # Create scatter plot
    plt.figure(figsize=(12,6))
    plt.scatter(historical_data['year'], np.log10(historical_data['compute']), alpha=0.5, label='Historical')
    plt.scatter(projected_data['year'], np.log10(projected_data['compute']), alpha=0.5, label='Projected',color='red')
    plt.xlabel('Year')
    plt.ylabel('Log Compute (FLOPs)')
    plt.grid(alpha=0.3)
    plt.legend()
    plt.show()


thresholds = [25, 26, 27, 28, 29, 30]
threshold_counts = {year: [] for year in pred_years.astype(int).ravel()}

for year, samples in COMPUTE_SAMPLE_DATA.items():
    if year in pred_years:
        for threshold in thresholds:
            count = sum(x >= 10**threshold for x in samples)
            threshold_counts[year].append(count)

df_counts = pd.DataFrame(threshold_counts,
                        index=[f'>1e{t}' for t in thresholds])
display(df_counts)


for year, samples in COMPUTE_SAMPLE_DATA.items():
    if year in pred_years:
        print(f"Year {year}: {len(samples)} samples")

## Experiments with allocation parameters

In [None]:
#plot 2020-2023 m_alloc,c_alloc
gradients = [FIT_DATA[year]['f_m_coeffs'][0] for year in fit_years]
intercepts = [FIT_DATA[year]['f_m_coeffs'][1] for year in fit_years]

fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(8,5))

# Plot gradients
ax1.plot(fit_years, gradients, 'o-')
ax1.set_xlabel('Year')
ax1.set_ylabel('Value')
ax1.set_title('m_alloc trend')
ax1.grid(alpha=0.3)

# Plot intercepts
ax2.plot(fit_years, intercepts, 'o-')
ax2.set_xlabel('Year')
ax2.set_ylabel('Value') 
ax2.set_title('c_alloc trend')
ax2.grid(alpha=0.3)

# Set same y limits
ymin = min(min(gradients), min(intercepts))
ymax = max(max(gradients), max(intercepts))
ax1.set_ylim(ymin, ymax)
ax2.set_ylim(ymin, ymax)

plt.tight_layout()
plt.show()


In [None]:
## TS for fm_m,fm_c effect


np.random.seed(42)

CONST_FM=False
LIN_EXTRAP_FM=False
TEST_FM=True
assert(CONST_FM+LIN_EXTRAP_FM+TEST_FM)==1, "Only one of CONST_FM, LIN_EXTRAP_FM, or TEST_FM can be True"

PLOT_KDES=True
PLOT_SCATTER=True


#compute allocation parameters
if CONST_FM:
    fm_grad,fm_int = np.mean([FIT_DATA[year]['f_m_coeffs'][0] for year in FIT_DATA]),np.mean([FIT_DATA[year]['f_m_coeffs'][1] for year in FIT_DATA])
if LIN_EXTRAP_FM:
    pass
if TEST_FM:
    fm_grad,fm_int=1.1,0.5 #1.1,1.0 are arithmatic means from [2020,2023]
    
fm_m_choices=[1.1]
fm_c_choices=[0.5,1.0,2.0]
FM_combos=[(m,c) for m in fm_m_choices for c in fm_c_choices]

#individual model size parameters
log_min_norm_m = np.log10(1e-8) #the smallest model to allocate compute to is ~1e-8 the size of total compute spending that year
log_max_norm_m = np.log10(1e-1) #free param - assume that largest model that year is no larger than 10% of total training compute (can find this from historic data and so sensitivity analysis)

#bin sampling parameters
bin_sampling_method='random'
k=-100 #for exponential dist sampling

#misc parameters
round_param=2


FM_DATA={(fm_m,fm_c):None for fm_m,fm_c in FM_combos}


for fm_m,fm_c in FM_combos:
    COMPUTE_SAMPLE_DATA={int(year):None for year in pred_years}
    for year in pred_years.astype(int).ravel():

        log_agg_training_compute=aggregate_compute_predictions_dict[year]
        agg_training_compute=10**log_agg_training_compute #total compute used over the year

        #model sizes (as fraction of T_tot)
        norm_ms = np.logspace(log_min_norm_m,log_max_norm_m,2*(int(log_max_norm_m)-int(log_min_norm_m))+1)
        log_norm_ms = np.log10(norm_ms)
        log_frac_cum_compute = fm_m*log_norm_ms + fm_c
        cum_fm=10**log_frac_cum_compute

        model_ctgs = [f'{norm_ms[i]:.2e}--{norm_ms[i+1]:.2e}' for i in range(len(norm_ms)-1)]
        f_m = np.diff(cum_fm) #we don't include compute alloc to models 1e-8 smaller than total compute
        bin_compute_allocs=f_m*agg_training_compute #array of how much compute allocated to each bin
        DATA_alloc={model_ctgs[i]:
                    {'compute alloc':bin_compute_allocs[i]} for i in range(len(model_ctgs))}
        
        compute_samples_rand=[]

        for idx,(ctg,alloc) in enumerate(list(zip(model_ctgs,bin_compute_allocs))):
            #here alloc is the amount of alloc given to each individual bin

            bounds = ctg.split('--')
            norm_model_bin_lb,norm_model_bin_ub = float(bounds[0]),float(bounds[1])
            model_bin_lb,model_bin_ub = agg_training_compute*norm_model_bin_lb, agg_training_compute*norm_model_bin_ub #normalising factor is total training compute
            allocnorm_model_bin_lb,allocnorm_model_bin_ub=model_bin_lb/alloc, model_bin_ub/alloc

            #not generating multiple samples yet for CIs
            running_tot=0
            allocnormed_samples=[] 
            while running_tot<1:
                #SAMPLE
                if bin_sampling_method=='random':
                    sample = np.random.uniform(allocnorm_model_bin_lb, allocnorm_model_bin_ub)
                elif bin_sampling_method=='exp':
                    sample  = sample_from_exp_dist(a=allocnorm_model_bin_lb,b=allocnorm_model_bin_ub,k=k)

                #SUM CHECK
                if running_tot + sample > 1:
                    allocnormed_samples.append(1 - running_tot)
                    running_tot = 1
                else:
                    allocnormed_samples.append(sample)
                    running_tot += sample

            #print(f"Model category {ctg} adds {len(allocnormed_samples)} models")
            compute_samples_rand = compute_samples_rand + (list(alloc*np.array(allocnormed_samples)))
            
            '''
            print(f"""
            Sampling for:
            Year: {year}
            Model category: {ctg}
            n_models: {len(allocnormed_samples)}
            """)
            '''

        compute_samples_rand = [x for x in compute_samples_rand if x!=0]

        COMPUTE_SAMPLE_DATA[year]=compute_samples_rand
    FM_DATA[(fm_m,fm_c)]=COMPUTE_SAMPLE_DATA


if PLOT_SCATTER:
    n_plots = len(FM_DATA.keys())
    fig, axes = plt.subplots(1, n_plots, figsize=(5*n_plots, 5))
    if n_plots == 1:
        axes = [axes]
    colors = plt.cm.rainbow(np.linspace(0, 1, n_plots))
    
    # Plot historical data first
    historical_years = df[df['year'].between(2020, 2023)]['year']
    historical_compute = df[df['year'].between(2020, 2023)]['compute']
    for ax in axes:
        # Add random jitter to historical years
        jittered_hist_years = np.random.uniform(historical_years-0.4, historical_years+0.4, len(historical_years))
        ax.scatter(jittered_hist_years, historical_compute, alpha=0.3, c='gray', s=10, label='Historical')
        ax.set_xlim(2020, 2029)

    # Plot model data
    for (fm_m, fm_c), color, ax in zip(FM_DATA.keys(), colors, axes):
        years = []
        compute_values = []
        for year, samples in FM_DATA[(fm_m,fm_c)].items():
            # Add random jitter to years to spread points out
            jittered_years = np.random.uniform(year-0.4, year+0.4, len(samples))
            years.extend(jittered_years)
            compute_values.extend(samples)
            
        ax.scatter(years, compute_values, alpha=0.3, c=color, s=10, label='Model')
        ax.set_yscale('log')
        ax.set_xlabel('Year')
        ax.set_ylabel('Compute (FLOP)')
        ax.set_title(f'm_alloc={fm_m}, c_alloc={fm_c}')
        ax.grid(alpha=0.5)
        ax.legend()

    plt.tight_layout()
    plt.show()


if PLOT_KDES:
    fig, ax = plt.subplots(figsize=(10, 6))
    colors = plt.cm.rainbow(np.linspace(0, 1, len(FM_DATA.keys())))
    
    for (fm_m, fm_c), color in zip(FM_DATA.keys(), colors):
        # Get samples for 2026
        samples_2026 = FM_DATA[(fm_m,fm_c)][2026]
            
        # Create KDE
        sns.kdeplot(data=np.log10(samples_2026), 
                   label=f'fm_m={fm_m}, fm_c={fm_c}',
                   color=color)
    
    ax.set_xlabel('Log Compute (FLOP)')
    ax.set_ylabel('Density')
    ax.set_title('Log compute KDE for 2026')
    ax.grid(alpha=0.5)
    ax.legend()
    
    plt.tight_layout()
    plt.show()

## Backtesting

In [None]:
retrodict_years=fit_years
retrodict_thresholds=[1e23,1e24,1e25]

#observed
# Create DataFrame from observed counts
df_observed = pd.DataFrame.from_dict({threshold: {year: sum(df[df['year'] == year]['compute'] > threshold)
                                                for year in retrodict_years}
                                    for threshold in retrodict_thresholds}, 
                                    orient='index')
df_observed.index = [f'{threshold:.2e}' for threshold in retrodict_thresholds]
df_observed.index.name = 'Threshold'

# Create retrodict counts dictionary
retrodict_counts = {year: [] for year in retrodict_years}

for year, samples in COMPUTE_SAMPLE_DATA.items():
    if year in retrodict_years:
        for threshold in retrodict_thresholds:
            count = sum(x >= threshold for x in samples)
            retrodict_counts[year].append(count)

df_retrodict = pd.DataFrame(retrodict_counts,
                          index=[f'{t:.2e}' for t in retrodict_thresholds])
df_retrodict.index.name = 'Threshold'

print("Observed counts:")
display(df_observed)
print("\nRetrodicted counts:")
display(df_retrodict)

