In [1]:
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import pickle as pkl
import time

from IPython.core.display import display, HTML
from IPython.display import clear_output
display(HTML("<style>.container { width:100% !important; }</style>"))

np.set_printoptions(linewidth=150)
pd.set_option('display.max_columns', 500)

# Parse Helpers

In [2]:
def line_format(line,add_quotes=True):
    if ("INFO" not in line or 'cell:' in line) and "[" in line and "]" in line:
        if "," not in line:
            if "." in line:
                line = re.sub('(?<=\d)\.+',",",line)
            else:
                line = re.sub('(?<=\d)\s+',",",line)
    if "torch.utils.data" in line:
        line = line.replace("<","'").replace(">","'")
            
            
    line = line.replace("\n","").replace(" ","").replace("array","np.array").replace("\e[1m","").replace("\e[21mN","")
    if add_quotes:
        if ":" in line:
            name,val = line.split(":",1)
            if name=='prefix':
                return '"{}":"{}"'.format(name.strip(),val)
            else:
                return '"{}":{}'.format(name.strip(),val)
    return line


def general_stats(line,current,start_cond,end_cond,stats_str):
    if start_cond in line:
        current = start_cond
        stats_str = line_format(line.split("stats:")[-1],False)
    elif current == start_cond:
        if end_cond in line:
            current = ''
            return True,"{}".format(stats_str),current

        else:
            stats_str += '{}'.format(line_format(line,False))
    return False,stats_str,current
            
def specific_stats(line,current,start_cond,end_cond,stats_str=""):
    #print(current,[(x,line,x in line) for x in end_cond])
    if start_cond in line:
        current = start_cond
        stats_str = ""
    
    elif current == start_cond:
        if any([x in line for x in end_cond]):
            current = ''
            return True,"{{{}}}".format(stats_str.rsplit(",",1)[0]),current
        elif line[-1]==",":
            #print("pre add ",stats_str)
            stats_str += '{}'.format(line_format(line.split("run:")[-1]))
            #print("post add",stats_str)
        else:
            #print("pre add ",stats_str)
            stats_str += '{}, '.format(line_format(line.split("run:")[-1]))
            #print("post add",stats_str)
    return False,stats_str,current

def local_exec(exec_str):
    ldict = {}
    exec("s={}".format(exec_str),globals(),ldict)
    return ldict['s']

def macro_process(predictor_df):
    losses = predictor_df[predictor_df['epoch']>=500].append(predictor_df.iloc[-1])
    losses['curve']=losses['curve'].apply(lambda x: x+[max(x)]*12 if len(x)==500 else x)
    losses['curve']=losses['curve'].apply(lambda x: x[:512] if len(x)>512 else x)
    losses['reductions']=losses['cell_types'].apply(sum)
    losses['parallel']=losses['cell_matrices'].apply(lambda x: len(x) if len(x)<3 else 1)
    losses['cells']=losses['cell_types'].apply(len)
    losses['nodes']=losses['cell_matrices'].apply(lambda x: len(x[0]) if len(x)<3 else len(x))
    losses['cell']=losses['cell_matrices'].apply(lambda x: np.array(x[0]) if len(x)<3 else np.array(x))
    losses['lr_max']=losses['lr_schedule'].apply(lambda x: x['lr_max'])
    losses['fill']=losses['cell'].apply(lambda x: len(np.triu(x).nonzero()[0])/x.size)

    losses = losses[losses['reductions']>4]
    for i in range(512):
        if i==0:
            losses['max_curve_{}'.format(i)]=losses['curve'].apply(lambda x:x[0])
        else:
            losses['max_curve_{}'.format(i)]=losses['curve'].apply(lambda x: max(x[:i+1]))
    return losses

In [3]:
def parse(last=False):
    log1 = open("model_testbed.log","r").read()
    log2 = open("logs/model_testbed.log","r").read()
    log = log1+log2
    outs = log.split("NEW MODEL ")[1:]
    
    runs = []
    if last:
        outs = outs[-1:]
    for i,out in enumerate(outs):
        if not last:
            print(i,end="\r")
        curve = []
        predictions = []
        current = None
        time_taken = None
        max_val = None
        early_terminate=False
        stats_str = ""
        epoch_time = None
        run_date = None
        curve_update = False

        general_model_stats,general_run_stats,specific_model_stats,specific_run_stats = {},{},{},{}
        #print("--LOOP ENTER--")
        for line in out.split('\n'):
            if line=="" or ('Loss' in line and 'Corrects' not in line and 'Epoch: 0' not in line):
                curve_update=False
                pass
            else:
                finished,stats_str,current = general_stats(line,current,'Model stats:','Run stats:',stats_str)
                if finished:
                    general_model_stats = local_exec(stats_str)
                    stats_str, finished = "",False

                finished,stats_str,current = general_stats(line,current,'Run stats:','=====',stats_str)
                if finished:
                    general_run_stats= local_exec(stats_str)
                    stats_str, finished = "",False

                finished,stats_str,current = specific_stats(line,current,'-- Model stats',[' Run stats '],stats_str)
                if finished:
                    specific_model_stats= local_exec(stats_str)
                    stats_str, finished = "",False

                if not specific_run_stats:
                    finished,stats_str,current = specific_stats(line,current,' Run stats ',['====',"Per epoch","Run finished","Train Epoch: 0"],stats_str)
                    if finished:
                        specific_run_stats= local_exec(stats_str)
                        stats_str, finished = "",False

                #print(line,curve_update)
                if "Corrects" in line and not curve_update:
                    curve.append(int(line.split(":")[-1].split("/")[0]))
                    curve_update = True
                if "Prediction" in line:
                    if "[[" in line:
                        predictions.append(float(line.split("[[")[-1].split("]]")[0]))
                    elif "," in line:
                        predictions.append(float(line.split(":")[-1].split(",")[0]))
                    else:
                        predictions.append(float(line.split(":")[-1]))
                if "Time taken" in line:
                    time_taken = line.split(":")[-1]
                if "terminated" in line.lower():
                    early_terminate=True
                if 'Run started' in line:
                    run_date = line.split("at ")[-1]
                if 'Max corrects' in line:
                    max_val = int(line.split(":")[-1].split("/")[0])
                if 'Per epoch time' in line:
                    epoch_time = line.split(":")[-1].strip()
        #print("--LOOP EXIT--")

        stats = {}
        if general_run_stats:
            stats.update(general_run_stats)
        if general_model_stats:
            stats.update(general_model_stats)
        if specific_run_stats:
            stats.update(specific_run_stats)
        if specific_model_stats:
            stats.update(specific_model_stats)

        for key in stats.keys():
            new_key = key.strip().replace('matrix','matrices')
            #print(key)
            if key=='cell':
                new_key='cell_matrices'
            if key=='momemtum':
                new_key = 'momentum'
            if new_key!=key:
                stats[new_key] = stats.pop(key)

        stats['curve']=curve
        stats['predictions']=predictions
        stats['epoch']=len(curve)
        stats['time_taken']=time_taken
        stats['early_terminate']=early_terminate
        stats['per_epoch_time']=epoch_time
        if not max_val:
            stats['max']=max(curve) if len(curve)>0 else None
        else:
            stats['max']=max_val
        stats['run_date']=run_date
        runs.append(stats)

    run_stats = pd.DataFrame(runs)
    run_stats.to_pickle('run_stats.pkl')
    return run_stats
run_stats=parse()
record = max(run_stats['max'])
run_stats.iloc[-10:]

4001

Unnamed: 0,auxiliaries,auxillaries,cell_matrices,cell_types,curve,data,drop_path,early_terminate,epoch,epochs,log,lr,lr_schedule,max,momentum,params,per_epoch_time,predictions,prefix,residual_cells,run_date,scale,scales,time_taken,track_progess,track_progress,verbose,weight_decay
3992,,,"[[[0.0, 1.0, 7.0, 10.0, 0.0, 10.0, 0.0], [0.0,...","[1, 1]","[4429, 4493, 4933, 4999, 5045, 5270, 5373, 5454]",,False,False,8,8.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",5454.0,0.9,7458.0,9.46 s,"[3957.0, 5095.0, 5428.0, 5376.0, 5408.0, 5497....",Micro,,2019-02-03 22:44:13,2.0,"(2.0, 0.5)","1 min, 25 s",,True,False,0.0001
3993,,,"[[[0.0, 11.0, 10.0, 6.0, 13.0, 0.0], [0.0, 0.0...","[1, 1]","[4230, 4981, 4990, 5392, 5333, 5397, 5505, 5828]",,False,False,8,8.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",5828.0,0.9,9234.0,10.79 s,"[4132.0, 5574.0, 5498.0, 5720.0, 5748.0, 5664....",Micro,,2019-02-04 11:03:43,2.0,"(2.0, 0.5)","1 min, 35 s",,True,False,0.0001
3994,,,"[[[0.0, 4.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0...","[1, 1]","[4014, 4349, 4466, 4599, 4529, 4507, 4573, 4728]",,False,False,8,8.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",4728.0,0.9,5226.0,7.12 s,"[3934.0, 4901.0, 4912.0, 4909.0, 4905.0, 4811....",Micro,,2019-02-04 11:13:09,2.0,"(2.0, 0.5)","1 min, 4 s",,True,False,0.0001
3995,,,"[[[0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0,...","[1, 1]","[3532, 3602, 3482, 3628, 3631, 3502, 3698, 3722]",,False,False,8,8.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",3722.0,0.9,5226.0,6.88 s,"[4065.0, 4172.0, 4008.0, 3898.0, 3875.0, 3786....",Micro,,2019-02-04 11:16:11,2.0,"(2.0, 0.5)","1 min, 2 s",,True,False,0.0001
3996,,[2],"[[[0.0, 1.0, 1.0], [0.0, 0.0, 1.0], [0.0, 0.0,...","[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...","[1734, 1687, 1661, 1693, 1842, 1744, 1727, 185...",,True,False,512,512.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",1900.0,0.9,2105226.0,38.02 s,"[6816.0, 6853.0, 6183.0, 6252.0, 6121.0, 5966....",Macro,,2019-02-04 17:07:35,6.0,"(2.0, 0.5)","5 hrs, 51 mins, 20 s",,True,False,0.0001
3997,,,"[[[0.0, 1.0, 0.0, 0.0, 7.0, 14.0, 8.0, 0.0], [...","[1, 1]","[4233, 4734, 4707, 5200, 5039, 5304, 5466, 5622]",,False,False,8,8.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",5622.0,0.9,13390.0,13.47 s,"[4334.0, 5462.0, 5315.0, 5550.0, 5571.0, 5572....",Micro,,2019-02-04 17:09:37,2.0,"(2.0, 0.5)","1 min, 58 s",,True,False,0.0001
3998,,[2],"[[[0.0, 1.0, 0.0, 0.0, 7.0, 14.0, 8.0, 0.0], [...","[1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1]","[3776, 4401, 4485, 5820, 5769, 6009, 6256, 676...",,True,False,512,512.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",9019.0,0.9,16877002.0,"2 min, 36 s","[8490.0, 8654.0, 8355.0, 8718.0, 8640.0, 8710....",Macro,,2019-02-05 16:23:17,4.0,"(2.0, 0.5)","23 hrs, 13 mins, 20 s",,True,False,0.0001
3999,,,"[[[0.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 5.0], ...","[1, 1]","[3955, 4264, 4355, 4504, 4282, 4472, 4610, 4658]",,False,False,8,8.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",4658.0,0.9,5226.0,6.95 s,"[3976.0, 4794.0, 4780.0, 4803.0, 4803.0, 4711....",Micro,,2019-02-05 16:24:25,2.0,"(2.0, 0.5)","1 min, 3 s",,True,False,0.0001
4000,,[2],"[[[0.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 5.0], ...","[1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1]","[3633, 4097, 4059, 4463, 4198, 4126, 4838, 501...",,True,False,512,512.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",6229.0,0.9,2105226.0,37.34 s,"[7820.0, 7310.0, 6969.0, 7011.0, 6988.0, 6851....",Macro,,2019-02-05 22:09:29,6.0,"(2.0, 0.5)","5 hrs, 44 mins, 59 s",,True,False,0.0001
4001,,,"[[[0.0, 1.0, 1.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0...","[1, 1]","[4212, 4281, 4470, 4744, 4763, 4643, 4943, 5055]",,False,False,8,8.0,True,0.01,"{'type': 'cosine', 'lr_min': 1e-09, 'lr_max': ...",5055.0,0.9,5722.0,8.47 s,"[3907.0, 4856.0, 4930.0, 5067.0, 5103.0, 5010....",Micro,,2019-02-05 22:10:49,2.0,"(2.0, 0.5)","1 min, 15 s",,True,False,0.0001


# See Current Run Predictions

In [4]:
def pred_plot(row,solo=True):
    funcs = pkl.load(open('macro_loss_predictors.pkl',"rb"))
    curve_len = len(row['curve'])
    print("Epoch",curve_len)
    print("Current Score: {}, Max Score: {}, Record Score: {:.0f}".format(row['curve'][-1],max(row['curve']),record))
    if solo:
        plt.figure(figsize=(30,15))
    plt.plot(row['curve'],color='black',label='Latest Curve')
    preds = []
    
    for i in range(curve_len):
        X = row[['max_curve_{}'.format(i),'params','parallel','cells','nodes','scale','lr_max']]
        preds.append(funcs[i]['b']+np.dot(X,funcs[i]['m']))
    preds = np.array(preds)
    c95s = np.array([value['95'] for (key,value) in list(funcs.items())[:curve_len]])
    
    if curve_len==512:
        plt.plot([max(row['curve'])]*curve_len,label="Actual Max",color='black',linestyle='dashed')
        plt.plot(preds,label="Predicted Max",color='red')
    else:
        if solo:
            pass
            plt.fill_between(range(curve_len),(preds+c95s)[-1],(preds-c95s)[-1],color='blue',alpha=.1,label="Latest Bounds")
            plt.plot(preds,label="Predicted Max",color='red')
        else:
            pass
            #plt.fill_between(range(curve_len-1,512),(preds+c95s)[-1],(preds-c95s)[-1],color='pink',alpha=.2,label="Latest Bounds")
            #plt.plot(list(preds)+[preds[-1]]*(512-curve_len-1),label="Predicted Max",color='red',alpha=.3)
    if solo:
        plt.fill_between(range(curve_len),preds+c95s,preds-c95s,color='red',alpha=.2)
    
    
    if solo:
        plt.title(row['run_date'])
        plt.legend(loc='lower right')
        plt.show()
    
last = macro_process(parse(last=True)).iloc[0]
pred_plot(last)

IndexError: single positional indexer is out-of-bounds

# See Current Run Compared to Best Macro Runs

In [None]:
def next_powers(x):
    return [2**i for i in range(1,10) if (2**i)>x]

In [None]:
while 1:
    plt.figure(figsize=(30,15))
    last = macro_process(parse(last=True)).iloc[0]

    curr_epoch = last['epoch']

    # plot top runs
    best_runs = []
    for i,row in run_stats.iterrows():
        if 256<len(row['curve']) and max(row['curve'])>=8000:
            plt.plot(row['curve'],alpha=.5)
            best_runs.append(row['curve'])

    # plot benchmarks
    plt.plot(range(512),[max(run_stats['max'])]*512,linestyle="dotted",label="PB")
    plt.plot(range(512),[9000]*512,linestyle="dotted",label="90%")     
    plt.plot(range(512),[9852]*512,linestyle="dotted",label="WR")     

    # plot latest run
    pred_plot(last,solo=False)
    delta = last['curve'][curr_epoch-1]-run_stats.sort_values(by='max',ascending=False)['curve'].iloc[0][curr_epoch-1]
    if curr_epoch>10:
        best_run = run_stats.sort_values(by='max',ascending=False)['curve'].iloc[0]
        
        powers = next_powers(curr_epoch)
        average_delta=np.mean(np.array(last['curve'][curr_epoch-10:curr_epoch])-best_run[curr_epoch-10:curr_epoch])
        deltas_to_restart = [np.mean([max(x[:power])-max(x[:curr_epoch]) for x in best_runs]) for power in powers]
        best_run_deltas = [max(best_run[:power])-max(best_run[:curr_epoch]) for power in powers]
        
        print("Current Record Delta: {}, Average T-10 Delta: {:.0f}".format(delta,average_delta))
        print("Average Delta to Epoch Restart:",", ".join(["{}: {:.1f}".format(power,deltas_to_restart[i],) for (i,power) in enumerate(powers)]))
        print("Record  Delta to Epoch Restart:",", ".join(["{}: {:.1f}".format(power,best_run_deltas[i]) for (i,power) in enumerate(powers)]))
    else:
        print("Current Record Delta: {}".format(delta))

    # label
    plt.title("CIFAR-10 Loss History, Model Testbed")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend(loc='lower right')
    plt.show()
    
    while parse(last=True).iloc[0]['epoch'] == curr_epoch:
        time.sleep(5)
    clear_output()