In [2]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy

In [3]:
# reading PPLX over batch size ablations

df = pd.read_csv('bs_pplx.csv')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [5]:
# path to a reference model eval results (rohith's pplx 0)
df_ref = pd.read_csv('rohith_data.csv')
df['reference'] = df_ref['pplx_0']

In [6]:
# number of eval samples
sizes = [1000, 2000, 3000,5000, 8000, 10000]

In [7]:
# doing some normalization for each column individually because pplx across diff batch sizes is very big
# normalize to mean 0, std 1

for col in df.keys():
    normalized = (df[col] - np.mean(df[col])) / np.std(df[col])
    df[f'{col}-norm'] = normalized

In [8]:
# adding index column that was the training order

df['index'] = np.arange(len(df))
df = df.sample(frac=1)
df

Unnamed: 0,pplx-bs-64,pplx-bs-32,pplx-bs-16,pplx-bs-8,pplx-bs-4,pplx-bs-2,pplx-bs-1,reference,pplx-bs-64-norm,pplx-bs-32-norm,pplx-bs-16-norm,pplx-bs-8-norm,pplx-bs-4-norm,pplx-bs-2-norm,pplx-bs-1-norm,reference-norm,index
9551,6.512421,5.355885,4.471481,4.029378,3.728558,3.549416,3.317615,5.305233,1.289747,1.344630,1.389082,1.609684,1.649939,1.780699,1.639797,-1.620719,9551
5312,6.233234,4.889270,3.996078,3.634403,3.244037,3.051197,2.797729,5.728055,0.129783,0.088843,0.281299,0.702185,0.529002,0.629575,0.448325,-0.157340,5312
147,5.971060,4.483189,3.458138,2.956741,2.619292,2.396329,2.241204,6.046747,-0.959497,-1.004030,-0.972205,-0.854819,-0.916342,-0.883481,-0.827117,0.945649,147
3904,6.113346,4.593090,3.454701,2.965214,2.733281,2.563173,2.347149,5.896775,-0.368327,-0.708255,-0.980215,-0.835350,-0.652628,-0.497992,-0.584311,0.426598,3904
9549,6.043230,4.539887,3.596963,3.124154,2.829440,2.585170,2.409808,5.249019,-0.659646,-0.851440,-0.648718,-0.470167,-0.430164,-0.447169,-0.440709,-1.815276,9549
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6642,6.189918,4.855064,3.724449,3.116610,2.842768,2.502418,2.258257,5.266968,-0.050188,-0.003214,-0.351648,-0.487502,-0.399330,-0.638366,-0.788035,-1.753153,6642
2174,6.097157,4.736269,3.710368,3.218757,2.906806,2.730680,2.553706,5.530777,-0.435587,-0.322924,-0.384460,-0.252807,-0.251179,-0.110970,-0.110925,-0.840115,2174
2054,6.321955,5.110437,4.094315,3.518581,3.243840,3.039016,2.864700,5.442965,0.498402,0.684064,0.510210,0.436072,0.528546,0.601431,0.601808,-1.144029,2054
615,6.601558,5.349301,4.555935,4.115333,3.670155,3.408111,3.223093,6.056595,1.660093,1.326910,1.585878,1.807175,1.514825,1.454218,1.423171,0.979733,615


In [9]:
batch_sizes = [1, 2, 4, 8, 16, 32, 64]

table = pd.DataFrame({}) # table to display results
table['batch_size'] = batch_sizes

for size in sizes:
    temp = []
    for bs in batch_sizes:
        temp.append(f'{scipy.stats.spearmanr(df['index'][:size], 
                                       df[f'pplx-bs-{bs}-norm'][:size] - df['reference-norm'][:size])[1]:.4e}')
    table[f'{size} samples'] = temp

table

Unnamed: 0,batch_size,1000 samples,2000 samples,3000 samples,5000 samples,8000 samples,10000 samples
0,1,0.0047,1.3661e-05,1.352e-06,6.174e-06,3.864e-09,5.2063e-10
1,2,0.0043589,1.291e-05,2.019e-06,1.2277e-05,7.3688e-09,8.3368e-10
2,4,0.0082916,4.7661e-05,6.2042e-06,2.6675e-05,1.2781e-08,5.9167e-10
3,8,0.008099,6.3092e-05,9.3169e-06,3.6193e-05,1.3012e-08,6.297e-10
4,16,0.018754,0.00017485,1.478e-05,6.6082e-05,1.7645e-08,3.6146e-10
5,32,0.025286,0.00023761,1.7188e-05,0.00011017,2.5585e-08,2.8004e-10
6,64,0.014282,0.00020234,1.4651e-05,0.0001319,2.7971e-08,1.3382e-10
