In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.linear_model import LinearRegression #for linear regression
from scipy.optimize import curve_fit #for exponential fit

csv_path = '/Users/iyngkarrankumar/Documents/Misc/Tracking models/data/all_systems.csv'

In [None]:
def half_year_bin(date):
    #CHATGPT generated

    if date.month <= 6:
        return f'{date.year}-H1'
    else: 
        return f'{date.year}-H2'

def year_bin(date):
    return date.year


def exponential_model(x,a,b):
    return a*np.exp(b*(x-2017))

def geometric_model(x,a,r):
    return a*r**(x-2017)

## A

In [None]:
#prep

DATA = pd.read_csv(csv_path)
DATA['Publication date'] = pd.to_datetime(DATA['Publication date'])

#filter based on compute
DATA_ = DATA.dropna(subset=['Training compute (FLOP)'])


#data filtering and binning
start_year = 2017
DATA_f1 = DATA_[DATA_['Publication date'] > f'{start_year}-01-01']

#remove systems
SYSTEMS_TO_REMOVE = ['AlphaGo Zero','AlphaGo Master']
DATA_f1 = DATA_f1[~DATA_f1['System'].isin(SYSTEMS_TO_REMOVE)]

#new column for binning
bin_type = 'year' 
if bin_type=='year':
    DATA_f1['Publication_Bin'] = DATA_['Publication date'].apply(year_bin)
elif bin_type=='half year':
    DATA_f1['Publication_Bin'] = DATA_['Publication date'].apply(half_year_bin)

#new column for log flop
DATA_f1['log10 Training compute (FLOP)'] = np.log10(DATA_f1['Training compute (FLOP)'])


In [None]:
DOUBLE_2024 = True
PLOT=True

years = np.array(list(reversed(DATA_f1['Publication_Bin'].unique())))
years_str = [str(year) for year in years]
thresholds = [23,24]

if PLOT: fig,ax=plt.subplots(figsize=(8,6),sharey=True)
LARGEST_RUNS=[]

PLOT_DATA = {key: None for key in thresholds}

for idx,threshold in enumerate(thresholds):
    if PLOT: ax.grid(alpha=0.4)
    plot_data = []
    for year in years:
        date_condition = DATA_f1['Publication_Bin'] <= year
        date_filtered_df = DATA_f1[date_condition]
        largest_run = date_filtered_df['log10 Training compute (FLOP)'].max()
        if idx==0:LARGEST_RUNS.append(round(largest_run,1))

        threshold_condition = DATA_f1['log10 Training compute (FLOP)'] >= threshold
        filtered_df = DATA_f1[date_condition & threshold_condition]

        data_point = len(filtered_df)
        if year==2024 and DOUBLE_2024:
            exceeding_threshold_2024 = data_point-plot_data[-1]
            data_point = data_point + 2*exceeding_threshold_2024 #to get a full year of 2024 data out

        

        plot_data.append(data_point)



    PLOT_DATA[threshold] = np.array(plot_data)

if PLOT:
    ax.bar(years-0.1,PLOT_DATA[23], width=0.5, label=f'Threshold: 10^23 FLOPs',color='tab:blue',alpha=0.8)
    ax.bar(years+0.1,PLOT_DATA[24], width=0.5, label=f'Threshold: 10^24 FLOPS',color='tab:orange',alpha=0.8)
    ax.set_yticks(np.arange(0,130,10))
    ax.legend()
    fig.suptitle('Number of models exceeding thresholds')
    #custom_xticks = [f'{year} \n {run}' for year,run in zip(years,LARGEST_RUNS)]
    #ax.set_xticklabels(custom_xticks,fontsize=10)

In [None]:
#non zero filtering
threshold_23_NZ_indices = np.nonzero(PLOT_DATA[23])[0]
threshold_24_NZ_indices = np.nonzero(PLOT_DATA[24])[0]

threshold_23_filtered = PLOT_DATA[23][threshold_23_NZ_indices]
threshold_23_years_filtered = years[threshold_23_NZ_indices]

threshold_24_filtered = PLOT_DATA[24][threshold_24_NZ_indices]
threshold_24_years_filtered = years[threshold_24_NZ_indices]

### Fit models to 10^23 data

# linear
linear_model = LinearRegression()
linear_model.fit(threshold_23_years_filtered.reshape(-1,1),threshold_23_filtered)

#poly
degree = 2
coefficients = np.polyfit(threshold_23_years_filtered,threshold_23_filtered ,degree)
polynomial = np.poly1d(coefficients)

#exp
def geometric_model(x,a,r):
    return a*r**(x-threshold_23_years_filtered[0])
popt_geometric, _ = curve_fit(geometric_model,threshold_23_years_filtered,threshold_23_filtered)


### Find best fit

N_MODELS_OVERSHOOT=[]

lin_predict = (linear_model.predict(threshold_24_years_filtered.reshape(-1,1))).astype('int')
N_MODELS_OVERSHOOT.append(np.sum(lin_predict-threshold_24_filtered))

poly_pred = (polynomial(threshold_24_years_filtered)).astype('int')
N_MODELS_OVERSHOOT.append(np.sum(poly_pred-threshold_24_filtered))

geometric_pred = (geometric_model(threshold_24_years_filtered,*popt_geometric)).astype('int')
N_MODELS_OVERSHOOT.append(np.sum(geometric_pred-threshold_24_filtered))

print(N_MODELS_OVERSHOOT)
