### Setup & process data

In [None]:
import numpy as np
from scipy import stats, optimize
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import itertools
import copy,re
%matplotlib inline

In [None]:
# df = pd.read_csv("https://epochai.org/data/epochdb/notable_systems.csv")
url = 'https://drive.google.com/file/d/1RLLKPU3bEYK65wlQlU0p20u9M8cHkLMl/view?usp=sharing'
url = 'https://drive.google.com/uc?id=' + url.split('/')[-2]

df = pd.read_csv(url)

df = df[~df["Notability criteria"].isna()]

df["compute"] = df["Training compute (FLOP)"]
df["date"] = df["Publication date"]
df["model"] = df["System"]
df["poss1e23"] = df["Possibly over 1e23 FLOP"]
df["poss1e25"] = df["Estimated over 1e25 FLOP"]
df["cost"] = df["Training compute cost (2023 USD)"]
df["cost"] = df["cost"].str.replace(",", "").str.replace("$", "").astype(float)

df = df[["model", "compute", "date", "cost", "poss1e23", "poss1e25"]]

In [None]:
to_remove = ['AlphaGo Zero','AlphaZero']
df = df[~df["model"].isin(to_remove)]

In [None]:
to_append = [
  ["Claude 3.5 Sonnet", 4.3e25, "2024-06-21", np.nan, np.nan, np.nan],
  ["GPT-4o Mini", 1.2e25, "2024-07-18", np.nan, np.nan, np.nan],
]

for row in to_append:
  if row[0] not in df["model"].values:
    df.loc[len(df)] = row

In [None]:
to_add_compute = {
    "Claude 3 Opus": 2.5e25,
    "Claude 3 Sonnet": 1.1e25,
    "GPT-4o": 2.9e25,
    "Gemini 1.0 Pro": 2.8e24,
    "Gemini 1.5 Pro": 1.9e25,
    "Reka Core": 8.4e24,
    "GPT-4 Turbo": 2.1e25,  # rough guess
    "GPT-4V": 2.1e25,  # rough guess
    "Claude 2.1": df[df["model"]=="Claude 2"]["compute"].values,  # rough guess
}

for k, v in to_add_compute.items():
  if df.loc[df["model"] == k, "compute"].isna().values:
    df.loc[df["model"] == k, "compute"] = v
  else:
    print(f"{k} already has a compute value")

In [None]:
# Reset the ones we've set
df.loc[~df["compute"].isna(), "poss1e23"] = np.nan
df.loc[~df["compute"].isna(), "poss1e25"] = np.nan

# Set some temporary placeholder values
# TODO: revisit
# df.loc[(df["poss1e25"] == "checked"), "compute"] = 1.01e25  # placeholder
# df.loc[((df["poss1e23"] == "checked") & (df["poss1e25"] != "checked")), "compute"] = 1.01e23  # placeholder

# We want to handle these leading models manually via the above compute estimates.
assert df[(df["poss1e25"] == "checked") & (df["compute"].isna())].size == 0

# We sample 1e23-1e25 models with unknown compute from the existing empirical distribution.
# TODO: revisit
poss1e23 = ((df["poss1e23"] == "checked") & (df["poss1e25"] != "checked"))
df.loc[poss1e23, "compute"] = df[(df["compute"] >= 1e23) & (df["compute"] < 1e25)]["compute"].sample(poss1e23.sum(), random_state=0).values

df["date"] = pd.to_datetime(df["date"])
df["log_compute"] = np.log10(df["compute"])

df["date_float"] = df["date"].dt.year + df["date"].dt.month/12

df['year'] = df['date'].dt.year

df = df.sort_values("date")
df.dropna(subset="compute", inplace=True)

In [None]:
fig = sns.scatterplot(data=df[df['date']>'2010-01-01'], x='date',y='compute')
fig.set(yscale='log')
%matplotlib inline

# Method 1: Advancing normal distributions + exp.count

### Model counts extrapolation

In [None]:
from scipy.stats import t

def exp_fit(x,a,b):
  return a*np.exp(b*x)

def exp_pred_counts(years,year_counts,future_years,alpha=0.1):
  mapped_years = np.arange(0,len(year_counts)).astype(float)
  popt, pcov = optimize.curve_fit(exp_fit, mapped_years, year_counts.values.astype(float))
  pred_counts = exp_fit(future_years-years[0],*popt).astype(int)



  #conf bounds
  #assuming log normal uncertainty
  fit_pred_counts = exp_fit(mapped_years,*popt) #predicted counts for fitted years
  log_pred_counts_fit = np.log(fit_pred_counts)
  log_obs_counts = np.log(year_counts.values.astype(float))
  residuals = log_pred_counts_fit - log_obs_counts #we're calculating residuals of log counts
  SEP = np.sqrt(np.sum(residuals**2)/(len(year_counts-2)))

  dof = len(year_counts)-2
  crit_t_val = t.ppf(1-alpha/2, dof)
  pred_delta = crit_t_val*SEP

  years_all = np.concatenate([years,future_years])
  preds_all = np.concatenate([fit_pred_counts,pred_counts])
  log_pred_UB = np.log(preds_all)+pred_delta
  log_pred_LB = np.log(preds_all)-pred_delta
  pred_counts_UB = np.exp(log_pred_UB)
  pred_counts_LB = np.exp(log_pred_LB)

  return years_all,preds_all,pred_counts_UB,pred_counts_LB



In [None]:
tmp_df = df[(df['date']>'2017-01-01') & (df['date']<'2024-01-01')]
years = np.arange(2017,2023+1)
year_counts = tmp_df.groupby(['year']).size()
future_years = np.arange(2024,2030+1)
all_years, pred_counts, pred_counts_UB,pred_counts_LB = exp_pred_counts(years,year_counts,future_years,alpha=0.1)

fig,ax=plt.subplots()
ax.plot(all_years,pred_counts,label='Predicted counts')
ax.fill_between(all_years,pred_counts_UB,pred_counts_LB,alpha=0.1,label='90% CI')
ax.legend()
ax.grid()

In [None]:
## Bayesian approach
from scipy import optimize
from scipy.stats import multivariate_normal

def exp_fit(x,a,b):
  return a*np.exp(b*x)


tmp_df = df[(df['date']>'2017-01-01') & (df['date']<'2024-01-01')]
years = np.arange(2017,2023+1)
year_counts = tmp_df.groupby(['year']).size()
future_years = np.arange(2024,2030+1)

mapped_years = np.arange(0,len(years)).astype(float)
popt_counts, cov_counts = optimize.curve_fit(exp_fit, mapped_years, year_counts.values.astype(float))
#pred_counts = exp_fit(future_years-years[0],*popt).astype(int)


window_size = '30D'
dates = pd.to_datetime(tmp_df['date'])
rolling = dates.rolling(window=window_size)

### Fit compute distributions

In [None]:
from scipy.stats import gaussian_kde,norm,linregress
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression

In [None]:
## utils

#placeholder vars
data_to_fit=None
mu_0=None

#to fit right tail of normal dist to 2024 data
def trunc_norm_NLL(sigma):
    ll = norm.logpdf(data_to_fit.to_numpy(),mu_0,sigma) - np.log(1-norm.cdf(data_to_fit.to_numpy().min(),mu_0,sigma))
    return -np.sum(ll)



In [None]:
## mean log compute prediction
tmp_df = df[(df['date']>'2017-01-01') & (df['date']<'2024-01-01')]
tmp_df = tmp_df[~tmp_df['compute'].isna()]

year_grouped_df = tmp_df.groupby(['year'])
mean_log_compute = year_grouped_df['log_compute'].mean().reset_index()

X = mean_log_compute['year'].values

mean_log_compute_model = LinearRegression()
mean_log_compute_model.fit(X.reshape(-1,1),mean_log_compute)

In [None]:
tmp_df = df[(df['date']>'2017-01-01') & (df['date']<'2024-01-01')]
tmp_df = tmp_df[~tmp_df['compute'].isna()]
year_grouped_df = tmp_df.groupby(['year'])

mean_log_compute = year_grouped_df['log_compute'].mean().reset_index()
X = mean_log_compute['year'].values
mean_log_compute_model = LinearRegression()
mean_log_compute_model.fit(X.reshape(-1,1),mean_log_compute)

MEAN_log_compute = year_grouped_df['log_compute'].mean()
STD_log_compute = year_grouped_df['log_compute'].std()

#predict stats for 2024
mean_log_compute_2024 = mean_log_compute_model.predict([[2024]])[0][-1]
MEAN_log_compute.loc[2024] = mean_log_compute_2024
tmp_df_2024 = df[df['date']>'2024-01-01']
RT_log_compute_data_2024 = tmp_df_2024[tmp_df_2024['log_compute']>mean_log_compute_2024]['log_compute'] #right tail of 2024 log compute data
init_sigma = np.std(RT_log_compute_data_2024)
data_to_fit = RT_log_compute_data_2024
mu_0 = mean_log_compute_2024
sigma = (minimize(trunc_norm_NLL,[init_sigma])).x
STD_log_compute.loc[2024]=sigma[0] #2024 std found assuming that we just have right tail data


years = np.arange(2017,2024+1)
future_years = np.arange(2025,2029+1)
years_concat = np.concatenate([years,future_years])


CONST_VAR=True
last_n = 4


fig,ax=plt.subplots()

for idx,year in enumerate(years_concat):

  if year in years:
    log_compute_data = tmp_df[tmp_df['year']==year]['log_compute']
    if idx%2==0:
      sns.kdeplot(log_compute_data,label=f'{year}',alpha=0.5,linewidth=2,linestyle='--',color='tab:blue')



  if year in future_years:
    pred_mean_log_compute = mean_log_compute_model.predict(np.array(year).reshape(-1,1))[0]
    if CONST_VAR:
      pred_std_log_compute = STD_log_compute[-1*last_n:].mean() #take mean of last n std vals
    else: #smth more fancy
      pass

    x_min,x_max = pred_mean_log_compute[-1]-3*pred_std_log_compute,pred_mean_log_compute[-1]+3*pred_std_log_compute
    x =  np.linspace(x_min,x_max,1000)
    norm_pdf = norm.pdf(x,pred_mean_log_compute[-1],scale=pred_std_log_compute)

    if idx%2==0:
      ax.plot(x,norm_pdf,label=f'predicted {year}',linewidth=3,color='tab:red',alpha=0.8)

    ax.legend()

ax.grid(alpha=0.6)




### Count models above threshold

In [None]:
np.random.seed(42)

SET_G_MEAN = True
mean_2023,g_mean = MEAN_log_compute.loc[2023],0.65 #set growth rate of distribution mean in OOMs
CONST_STD = False #const std for model distributions


tmp_df = df[(df['date']>'2017-01-01') & (df['date']<'2024-01-01')]
tmp_year = 2021

years = np.arange(2017,2023+1)
future_years = np.arange(2024,2028+1)
all_years = np.concatenate([years,future_years])

if CONST_STD:
    idx_tmp_year = np.where(years==tmp_year)[0][0]
    dist_std = STD_log_compute.loc[tmp_year:].mean()
else:
    std_bounds = [1.1,1.6]




stt_bin,stop_bin,num = 23,30,1000
x=np.linspace(start=stt_bin,stop=stop_bin,num=num)
bin_edges = np.arange(stt_bin,stop_bin+1)

#
rollouts = 1000


sampled_count_params = multivariate_normal.rvs(mean=popt_counts,cov=cov_counts,size=rollouts) #assume flat prior, normal posterior

mapped_future_years = future_years-years[0]
count_predictions = np.array([exp_fit(mapped_future_years,a,b) for a,b in sampled_count_params])

SAMPLES = []

if SET_G_MEAN:
  future_means = np.array([mean_2023 + (year-2023)*g_mean for year in future_years]) #grow at .5 OOMs per year
else:
  future_means = mean_log_compute_model.predict(future_years.reshape(-1,1))[:,1] #direct mean extrap

if CONST_STD:
  pass
else:
  rollout_stds = np.random.uniform(*std_bounds,size=(1000,len(future_years))) #pretty hacky way to model stds


for rollout_idx in range(rollouts):
  rollout_count_predictions = count_predictions[rollout_idx]
  if CONST_STD:
    future_stds = np.array([dist_std]*len(future_years))
  if not CONST_STD:
    future_stds = rollout_stds[rollout_idx]

  sample = [norm.rvs(loc=mean,scale=std,size=n.astype(int)) for mean,std,n in zip(future_means,future_stds,rollout_count_predictions)]
  SAMPLES.append(sample)



bin_strs = [f'>{b} lF' for b in bin_edges[:-1]]


tmp_df_50 = pd.DataFrame(index=bin_strs,columns=future_years) #tmp df to compute median cumulative number of models exceeding theshold
tmp_df_95 = copy.deepcopy(tmp_df_50)
tmp_df_5 = copy.deepcopy(tmp_df_50)

import sys

for year_idx,year in enumerate(future_years):
  year_samples = [sample[year_idx] for sample in SAMPLES]

  for bin in bin_edges[:-1]:
    exceed_thresh = [(sample>bin).sum() for sample in year_samples] #count how many exceed threshold
    tmp_df_50.at[f'>{bin} lF',year] = (np.percentile(exceed_thresh,50)).astype(int)
    tmp_df_95.at[f'>{bin} lF',year] = (np.percentile(exceed_thresh,95)).astype(int)
    tmp_df_5.at[f'>{bin} lF',year] = (np.percentile(exceed_thresh,5)).astype(int)

    if bin==27:
      #sys.exit()
      pass

cumulative_df_50 = (tmp_df_50).cumsum(axis=1)
cumulative_df_95 = (tmp_df_95).cumsum(axis=1)
cumulative_df_5 = (tmp_df_5).cumsum(axis=1)


index = cumulative_df_50.index
cols = cumulative_df_50.columns

tmp_df = copy.deepcopy(cumulative_df_50)

for idx in index:
  for col in cols:
    n_50,n_5,n_95 = cumulative_df_50.at[idx,col], cumulative_df_5.at[idx,col], cumulative_df_95.at[idx,col]
    input = f'{n_50}  ({n_5}:{n_95})'
    tmp_df.at[idx,col] = input

In [None]:
#replace with UB/LB cumulative df if need

fig,ax=plt.subplots(figsize=(10,6))
for label,values in cumulative_df_50.T.items():
  bin = int((label.split(' '))[0].split('>')[-1])
  label_ = f'> {bin} log FLOP'
  ax.plot(values.index.values,values.values,label=label_,marker='x')

ax.grid()
ax.legend(fontsize=12)
ax.set_title('Cumulative model count exceeding thresholds')
yticks = np.arange(0,700,100)
ax.set_yticks(yticks,labels=yticks,fontsize=12)
ax.set_xticks(future_years,labels=future_years,rotation=45,fontsize=12)
#ax.set_yscale('log')

In [None]:
plt.scatter(STD_log_compute.loc[:2024].index,STD_log_compute.loc[:2024].values)
plt.xlim([2016,2030])
plt.xticks(np.arange(2016,2030),rotation=45)
plt.ylim([0.5,2.0])

bounds = [1.0,1.6]
future_std = np.random.uniform(*bounds,size=(5,1))
plt.scatter(np.arange(2025,2030),future_std,color='red')
plt.axhline(y=bounds[0],color='r',linestyle='--',alpha=0.6)
plt.axhline(y=bounds[-1],color='r',linestyle='--',alpha=0.6)

### Backtesting

In [None]:
backtest_years = np.arange(2020,2023+1)
tmp_df = df[df['date'].dt.year.isin(backtest_years)]
tmp_df = tmp_df[~tmp_df['compute'].isna()]
log_compute_data = tmp_df['log_compute']
log_compute_data.index = tmp_df['year'].loc[log_compute_data.index]


index = cumulative_df_50.index.values[:2] #first two threhsolds in cum df
obs_df = pd.DataFrame(index=index,columns=backtest_years)
pred_df = pd.DataFrame(index=index,columns=backtest_years)

thresholds = [23,24]

#populate obs df
for thr in thresholds:
  threshold_count = (log_compute_data[log_compute_data>thr]).groupby(['year']).size()
  cum_threshold_count = threshold_count.cumsum()
  obs_df.loc[f'>{thr} lF'] = cum_threshold_count

obs_df=obs_df.fillna(0)

#populate pred df
for year in backtest_years:
  mean,sigma = MEAN_log_compute.loc[year],STD_log_compute.loc[year]
  model_count = (pred_counts[np.where(all_years==year)[0][0]]).astype(int)
  bin_edges = np.arange(15,30)
  norm_cdf = norm.cdf(bin_edges,loc=mean,scale=sigma)
  bin_pmfs = np.diff(norm_cdf)
  bin_counts = (model_count*bin_pmfs).astype(int)


  pred_df.loc[f'>{23} lF',year] = np.sum(bin_counts[np.where(bin_edges==23)[0][0]:])
  pred_df.loc[f'>{24} lF',year] = np.sum(bin_counts[np.where(bin_edges==24)[0][0]:])


fig,ax=plt.subplots()
fig2,ax2=plt.subplots()
palette=['tab:blue','tab:red']

for idx,(k,v) in enumerate(pred_df.transpose().items()):
  ax.plot(pred_df.columns.values,v,label=f'{k}(predicted)',linestyle='--',color=palette[idx])

  #joint_pred_x=np.concatenate([pred_df.columns.values,cumulative_df.columns.values])
  #joint_pred_y=np.concatenate([v.values,cumulative_df.loc[k].values])
  #ax2.plot(joint_pred_x,joint_pred_y,linestyle='--',color=palette[idx],label=f'{k}(predicted)')

  ax2.plot(pred_df.columns.values,v,label=f'{k}(predicted)',linestyle='--',color=palette[idx])
  ax2.plot(cumulative_df_50.columns.values,cumulative_df_50.loc[k].values,linestyle='--',color=palette[idx])

for idx,(k,v) in enumerate(obs_df.transpose().items()):
  ax.plot(obs_df.columns.values,v,label=f'{k}(observed)',color=palette[idx])
  ax2.plot(obs_df.columns.values,v,label=f'{k}(observed)',color=palette[idx])



ax.legend()
ax.set_ylabel('Cumulative model count')
ax.grid()

ax2.legend()
ax2.set_ylabel('Cumulative model count')
ax2.grid()

In [None]:
### on backtest, current model is underestimating number of models > 1e23 FLOP
### exp. counts fit seems fine, so this is in the compute distribution assumptions

fig,axs = plt.subplots(nrows=2,ncols=2)
axs_ravel = axs.ravel()

palette = ['tab:blue','tab:red','tab:green','tab:orange']

for idx,year in enumerate(backtest_years):
  ax = axs_ravel[idx]

  mean,sigma = MEAN_log_compute.loc[year],STD_log_compute.loc[year]
  x = np.linspace(mean-3*sigma,mean+3*sigma,num=1000)

  sns.kdeplot(tmp_df[tmp_df['year']==year]['log_compute'],label=f'{year}',alpha=0.5,linewidth=2,color=palette[year-backtest_years[0]],ax=ax)
  ax.plot(x,norm.pdf(x,mean,sigma),alpha=0.5,linestyle='--',linewidth=2,color=palette[year-backtest_years[0]])

  ax.legend()
  ax.grid()

fig.tight_layout()

### Verify with investment/training budget

In [None]:
tmp_df = df[(df['date']>'2017-01-01') & (df['date']<'2024-01-01')]


## fill na values with samples from empirical compute distribution of
def fill_na_with_sample(group):
  non_na_values = group.dropna()
  return group.apply(lambda x: np.random.sample(non_na_values) if pd.isna(x) else x)

year_grouped_df = tmp_df.groupby('year')
tmp_df['compute'] = year_grouped_df['compute'].transform(fill_na_with_sample)
tmp_df['log_compute'] = np.log10(tmp_df['compute'])

total_log_compute = np.log10(tmp_df.groupby('year')['compute'].sum())

log_compute_model = LinearRegression()
log_compute_model.fit(total_log_compute.index.values.reshape(-1,1),total_log_compute.values)
predicted_total_log_compute = log_compute_model.predict(future_years.reshape(-1,1))


#from distributions
def get_total_log_compute(percentile=50):
  sample_total_compute = np.array([np.array([sum(10**s) for s in tmp_smp]) for tmp_smp in SAMPLES])
  percentile = np.percentile(sample_total_compute,q=percentile,axis=0)
  return np.log10(percentile)

dist_total_log_compute = get_total_log_compute(percentile=50)



fig,ax=plt.subplots(figsize=(10,6))
ax.plot(total_log_compute.index.values,10**total_log_compute.values,color='tab:blue',marker='.',linewidth=3)
ax.plot(future_years,10**predicted_total_log_compute,color='tab:blue',marker='x',linestyle='--',label='Total training compute direct extrapolation',linewidth=3)
ax.plot(future_years,(10**dist_total_log_compute)*1.05,color='tab:red',marker='x',linestyle='--',label='Total predicted training compute',linewidth=3)
ax.legend(fontsize=12)
ax.set_yscale('log')
ax.set_ylabel('Total compute',fontsize=12)
ax.tick_params(axis='both',labelsize=12)
ax.grid()


total_compute_ratio = (10**dist_total_log_compute)/(10**predicted_total_log_compute)
delta_total_log_compute = (10**dist_total_log_compute) - (10**predicted_total_log_compute)

# Hardware capacity check

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# source: https://www.macrotrends.net/stocks/charts/NVDA/nvidia/revenue

temp_dict = {
    "2017-01-31": 2173.0,
    "2017-04-30": 1937.0,
    "2017-07-31": 2230.0,
    "2017-10-31": 2636.0,
    "2018-01-31": 2911.0,
    "2018-04-30": 3207.0,
    "2018-07-31": 3123.0,
    "2018-10-31": 3181.0,
    "2019-01-31": 2205.0,
    "2019-04-30": 2220.0,
    "2019-07-31": 2579.0,
    "2019-10-31": 3014.0,
    "2020-01-31": 3105.0,
    "2020-04-30": 3080.0,
    "2020-07-31": 3866.0,
    "2020-10-31": 4726.0,
    "2021-01-31": 5003.0,
    "2021-04-30": 5661.0,
    "2021-07-31": 6507.0,
    "2021-10-31": 7103.0,
    "2022-01-31": 7643.0,
    "2022-04-30": 8288.0,
    "2022-07-31": 6704.0,
    "2022-10-31": 5931.0,
    "2023-01-31": 6051.0,
    "2023-04-30": 7192.0,
    "2023-07-31": 13507.0,
    "2023-10-31": 18120.0,
    "2024-01-31": 22103.0,
    "2024-04-30": 26044.0
} ## (1e6)

#add predicted from next two quarters: https://finance.yahoo.com/quote/NVDA/analysis/?guccounter=1
temp_dict["2024-07-31"]=28520.0
temp_dict["2024-10-31"]=31400.0


##predicted future annual revenues: https://finance.yahoo.com/quote/NVDA/analysis/?guccounter=1
temp_dict_2 = {}
temp_dict_2['2025'] = 120750.0
temp_dict_2['2026'] = 166250.0

quarterly_revenue_data = pd.DataFrame()
quarterly_revenue_data['date'] = np.array(list(temp_dict.keys()))
quarterly_revenue_data['date'] = pd.to_datetime(quarterly_revenue_data['date'])
quarterly_revenue_data['revenue'] = np.array(list(temp_dict.values()))*1e6
quarterly_revenue_data['log_revenue'] = np.log10(quarterly_revenue_data['revenue'])


quarterly_revenue_data['year'] = quarterly_revenue_data['date'].dt.year
year_grouped_df = quarterly_revenue_data.groupby('year')
quarterly_revenue_data['revenue fraction'] = quarterly_revenue_data['revenue']/year_grouped_df['revenue'].transform('sum')

revenue_fractions = quarterly_revenue_data.groupby('year')['revenue fraction'].apply(list).to_dict()
avg_revenue_fractions = np.array([np.array(v) for k,v in revenue_fractions.items()]).mean(axis=0)

quarters = ['01-31','04-30','07-31','10-31']

new_rows=[]
if 1:
  for year,v in temp_dict_2.items():
    for idx,q in enumerate(quarters):
      new_date = pd.to_datetime(f'{int(year)}-'+q)
      new_revenue = v*1e6*avg_revenue_fractions[idx]
      quarterly_revenue_data['date'].loc[new_date] = v*1e6*avg_revenue_fractions[idx]
      new_row = {
          'date': pd.to_datetime(new_date),
          'revenue': new_revenue,
          'log_revenue': np.log10(new_revenue),
          'year': year,
        }
      new_rows.append(new_row)

quarterly_revenue_data = pd.concat([quarterly_revenue_data, pd.DataFrame(new_rows)], ignore_index=True)
quarterly_revenue_data['date'] = pd.to_datetime(quarterly_revenue_data['date'])


### FLOP/$ extrap
#hardware (FLOP/$)
start_year=2014
log_FLOP_per_dollar_2014 = np.log10(2.2e17)
OOM_growth_rate = 0.125 #figure 6 here https://epochai.org/blog/trends-in-machine-learning-hardware says 10x every 8 years --> 0.125 OOMs every year
years = np.arange(start_year,2030+1)
log_FLOP_per_dollar = log_FLOP_per_dollar_2014+OOM_growth_rate*(years-2014)
FLOP_per_dollar = 10**log_FLOP_per_dollar
temp_dict_3 = {k:v for k,v in list(zip(years,FLOP_per_dollar))}


In [None]:
PLOT_REVENUE_EXTRAP=False

## regression
from sklearn.linear_model import LinearRegression

ref_date = pd.to_datetime('2000-01-01')
quarterly_revenue_data['date_float'] = (quarterly_revenue_data['date']-ref_date).dt.days

log_revenue_model = LinearRegression() #assume growing exponentially
X = quarterly_revenue_data['date_float'].values.reshape(-1,1)
Y = quarterly_revenue_data['log_revenue'].values
log_revenue_model.fit(X,Y)

future_dates = pd.to_datetime(pd.date_range(start='2027-01-01',end='2031-01-01',freq='Q'))
future_dates_float = (future_dates-ref_date).days
future_log_revenue_preds = log_revenue_model.predict(future_dates_float.values.reshape(-1,1))

future_data = pd.DataFrame({
    'date': future_dates,
    'log_revenue': future_log_revenue_preds,
    'revenue': 10**future_log_revenue_preds,
    'date_float': future_dates_float
})

quarterly_revenue_data['date'] = pd.to_datetime(quarterly_revenue_data['date'])

#plot quarterly revenue
if PLOT_REVENUE_EXTRAP:
  fig,ax = plt.subplots(figsize=(10,6))
  ax.scatter(quarterly_revenue_data['date'],quarterly_revenue_data['revenue'],color='tab:blue')
  ax.plot(quarterly_revenue_data['date'],10**(log_revenue_model.predict(X)),alpha=0.8,linestyle='--',color='tab:blue',linewidth=2)
  ax.scatter(future_data['date'],future_data['revenue'],color='tab:red',label='pred')
  ax.set_xlabel('Date')
  ax.tick_params(axis='both',labelsize=12)
  ax.set_ylabel('Revenue',fontsize=12)
  ax.set_yscale('log')
  ax.set_title('NVIDIA Log revenue extrapolation')
  ax.grid()


#plot cumulative FLOP
chip_lifetime = 3
t_lag = 1 #number of years from chip release to commericialisation

combined_df = pd.concat([quarterly_revenue_data,future_data])
combined_df['year'] = combined_df.date.dt.year
year_grouped_df = combined_df.groupby('year')
annual_revenue = year_grouped_df['revenue'].sum()
annual_FLOP_production = pd.DataFrame(index=annual_revenue.index)
annual_FLOP_production['FLOP produced'] = annual_revenue.index.map(temp_dict_3)*annual_revenue.values

running_FLOP_total = (annual_FLOP_production['FLOP produced'].rolling(window=chip_lifetime,min_periods=chip_lifetime).sum().shift(t_lag)).dropna()
m = round((np.log10(running_FLOP_total).max()-np.log10(running_FLOP_total).min())/(running_FLOP_total.index.values.max() - running_FLOP_total.index.values.min()),2)





FRAC=False

fig2,(ax2,ax3) = plt.subplots(ncols=2,figsize=(12,6))

xs_temp = total_log_compute.index.values
xs_temp_2 = np.arange(xs_temp[-1]+1,xs_temp[-1]+1+len(predicted_total_log_compute))
xs = np.concatenate([xs_temp,xs_temp_2])
ys = 10**(np.concatenate([total_log_compute.values,predicted_total_log_compute]))

#plotting direct FLOP extrap

ax2.plot(running_FLOP_total.index.values,running_FLOP_total.values,marker='x',label=f'GPU FLOP available')
ax2.plot(xs,ys,marker='x',color='tab:red',label='GPU FLOP used for ML training')
ax2.grid()
ax2.set_ylabel('FLOP',fontsize=14)
#ax2.set_title(f'Total FLOP (chip lifetime={chip_lifetime})',fontsize=12)
ax2.tick_params(axis='both',labelsize=12)
#ticks = np.arange(2019,2030+1)
#ax2.set_xticks(ticks,labels=ticks,rotation=45)
ax2.set_yscale('log')
ax2.set_xlim([2020,2030])
ax2.legend(fontsize=12)
ax2.set_ylim([1e23,1e32])
ax2.set_xlim

x_lb= max(xs.min(),running_FLOP_total.index.min())
x_ub = min(xs.max(),running_FLOP_total.index.max())
total_compute_bounded = (running_FLOP_total.loc[x_lb:x_ub]).values
ML_flop_bounded = ys[np.where(xs==x_lb)[0][0]:np.where(xs==x_ub)[0][0]+1]
assert(len(total_compute_bounded)==len(ML_flop_bounded))

ax3.plot(np.arange(x_lb,x_ub+1),ML_flop_bounded/total_compute_bounded,color='tab:blue',marker='x',linestyle='--')
ax3.set_yscale('log')
ax3.set_title('Fraction of GPU FLOP spent on ML training')
ax3.grid()
ax3.tick_params(axis='both',labelsize=12)


fig.tight_layout()
