In [8]:
import matplotlib
import pandas as pd
import numpy as np
import numpy.linalg as LA
from scipy import stats
import math
import numpy.ma as ma
import matplotlib.pyplot as plt
import numpy.ma as ma
from pathlib import Path
import sys

def agg(x, L, a=[]):
    if(L == 1):
        a = np.logical_not(ma.getmaskarray(x))
        return np.average(np.abs(x),weights=a,axis=1)
    if(L == 2):
        a = np.logical_not(ma.getmaskarray(x))
        return LA.norm(x,axis=1)/np.sqrt(np.sum(a,axis=1))
    if(L == 3):
        a = np.logical_not(ma.getmaskarray(x))
        return np.prod(np.power(x,a/np.repeat(np.expand_dims(np.sum(a,axis=1),axis=1),a.shape[1],axis=1)),axis=1)
    if(L == 4):
        a = a*np.logical_not(ma.getmaskarray(x))
        return np.average(np.abs(x),weights=a,axis=1)
    if(L == 5):
        a = a*np.logical_not(ma.getmaskarray(x))
        return np.prod(np.power(x,a/np.repeat(np.expand_dims(np.sum(a,axis=1),axis=1),a.shape[1],axis=1)),axis=1)

def all_aggs(in_chan, out_chan, in_weight, out_weight):
    return np.asarray([agg(np.ma.concatenate((in_chan,out_chan),axis=1),L=i,a=np.ma.concatenate((in_weight,out_weight),axis=1)) for i in range(1,6)])

def sqrtlog(chans, weights):
    a = np.logical_not(ma.getmaskarray(weights))
    all = []
    for i in range(len(chans)):
        all.append([])
        chand = chans[i]*np.sum(a,axis=1)
        all[i].append(chans[i])
        all[i].append(chand)
        all[i].append(np.sqrt(chans[i]))
        all[i].append(np.sqrt(chand))
        all[i].append(np.log(chans[i]))
        all[i].append(np.log(chand))
        all[i].append(np.log(np.sqrt(chans[i])))
        all[i].append(np.log(np.sqrt(chand)))
    return all

def get_data(filename):
    df = pd.read_csv(filename,skip_blank_lines=False)
    data = dict()

    if(pd.isna(df.iloc[-1][1])):
        df = df.drop(labels=df.shape[0]-1, axis=0)

    zero_models = []

    idx = list(np.where(pd.isna(df["model_id"]))[0])
    idxcopy = idx
    idx = idx - np.arange(0,len(idx),1)
    lenidx = np.append(idx,len(df["model_id"])-len(idx))
    lenidx = np.insert(lenidx,0,0)
    maxLength = np.max(np.abs(np.diff(lenidx)))
    for key in list(df.keys()):
        idxx = list(np.where(pd.isna(df[key]))[0])
        idxx = list(set(idxcopy) ^ set(idxx))
        data[key] = df[key]
        datacopy = data[key]
        data[key].loc[idxx] = 0
        data[key] = data[key].dropna(axis=0)
        data[key] = np.array_split(data[key],idx)
    
        for i in range(len(data[key])):
            #equalize all model sizes
            prevlen = len(data[key][i])
            data[key][i] = np.append(data[key][i],(np.zeros((maxLength-len(data[key][i])))))
            #delete zero models
            num_non_zero = np.sum(data[key][i]!=0)
            threshold = 1
            if(num_non_zero<threshold):
                #print(key,str(i),num_non_zero,prevlen)
                zero_models.append(i)
        data[key] = np.asarray(data[key])
    
    zero_models = list(set(zero_models))
    print("zero models deleted: "+str(len(zero_models)))
    for key in list(data.keys()):
        data[key] = np.delete(data[key],zero_models,axis=0)
        data[key] = ma.masked_array(data[key], mask=(data[key]==0.))

    data['in_QS_BE'] = np.arctan2(data['in_S_BE'],(1-1/data['in_C_BE']))
    data['out_QS_BE'] = np.arctan2(data['out_S_BE'],(1-1/data['out_C_BE']))
    data['in_QS_AE'] = np.arctan2(data['in_S_AE'],(1-1/data['in_C_AE']))
    data['out_QS_AE'] = np.arctan2(data['out_S_AE'],(1-1/data['out_C_AE']))

    aggregates = dict()

    aggregates['QS_BE'] = all_aggs(data['in_QS_BE'],data['out_QS_BE'],data['in_weight_BE'],data['out_weight_BE'])
    aggregates['QS_AE'] = all_aggs(data['in_QS_AE'],data['out_QS_AE'],data['in_weight_AE'],data['out_weight_AE'])
    aggregates['QE_BE'] = all_aggs(data['in_ER_BE'],data['out_ER_BE'],data['in_weight_BE'],data['out_weight_BE'])
    aggregates['QE_AE'] = all_aggs(data['in_ER_AE'],data['out_ER_AE'],data['in_weight_AE'],data['out_weight_AE'])

    aggregates['QS_BE'] = sqrtlog(aggregates['QS_BE'],np.ma.concatenate((data['in_weight_BE'],data['out_weight_BE']),axis=1))
    aggregates['QS_AE'] = sqrtlog(aggregates['QS_AE'],np.ma.concatenate((data['in_weight_AE'],data['out_weight_AE']),axis=1))
    aggregates['QE_BE'] = sqrtlog(aggregates['QE_BE'],np.ma.concatenate((data['in_weight_BE'],data['out_weight_BE']),axis=1))
    aggregates['QE_AE'] = sqrtlog(aggregates['QE_AE'],np.ma.concatenate((data['in_weight_AE'],data['out_weight_AE']),axis=1))

    aggregates['spec_BE'] = all_aggs(data['in_spec_BE'],data['out_spec_BE'],data['in_weight_BE'],data['out_weight_BE'])
    aggregates['spec_AE'] = all_aggs(data['in_spec_AE'],data['out_spec_AE'],data['in_weight_AE'],data['out_weight_AE'])
    aggregates['fro_BE'] = all_aggs(data['in_fro_BE'],data['out_fro_BE'],data['in_weight_BE'],data['out_weight_BE'])
    aggregates['fro_AE'] = all_aggs(data['in_fro_AE'],data['out_fro_AE'],data['in_weight_AE'],data['out_weight_AE'])

    aggregates['spec_BE'] = sqrtlog(aggregates['spec_BE'],np.ma.concatenate((data['in_weight_BE'],data['out_weight_BE']),axis=1))
    aggregates['spec_AE'] = sqrtlog(aggregates['spec_AE'],np.ma.concatenate((data['in_weight_AE'],data['out_weight_AE']),axis=1))
    aggregates['fro_BE'] = sqrtlog(aggregates['fro_BE'],np.ma.concatenate((data['in_weight_BE'],data['out_weight_BE']),axis=1))
    aggregates['fro_AE'] = sqrtlog(aggregates['fro_AE'],np.ma.concatenate((data['in_weight_AE'],data['out_weight_AE']),axis=1))

    aggregates['path'] = np.mean(data['path'],axis=1)

    aggregates['test_acc'] = np.mean(data['test_acc'],axis=1)
    aggregates['train_acc'] = np.mean(data['train_acc'],axis=1)
    aggregates['test_loss'] = np.mean(data['test_loss'],axis=1)
    aggregates['train_loss'] = np.mean(data['train_loss'],axis=1)
    aggregates['gap'] = np.mean(data['gap'],axis=1)

    return aggregates

In [47]:
import glob

dataset = 'CIFAR10'
optimizer = ['AdaM','AdamP','AdaBound']

epochs = []
raw = [glob.glob(str(sys.path[0][0:-8])+"/outputs/LilJon-"+optimize+"-"+dataset+"/*") for optimize in optimizer]
wanted_epochs = np.arange(0,70,5)
wanted_epochs = np.append(wanted_epochs, 69)
for i in range(len(wanted_epochs)):
    epochs.append([])
    for j in range(len(optimizer)):
        for rawe in raw[j]:
            if(int((rawe.split('-')[-1]).split('.')[0]) == wanted_epochs[i]):
                epochs[i].append(rawe)

translation = {'QS':'SQ','QE':'E','spec':'S','fro':'F'}
fancytranslation = {'QS_AE':'${\widehat{Q}_{SQ}^{p}}$','QE_AE':'${\widehat{Q}_{E}^{L2}}$','spec_AE':'${\widehat{Q}_{S}^{p}}$','fro_AE':'${\widehat{Q}_{F}^{p}}$','QS_BE':'${{Q}_{SQ}^{p}}$','QE_BE':'${{Q}_{E}^{L2}}$','spec_BE':'${{Q}_{S}^{p}}$','fro_BE':'${{Q}_{F}^{p}}$'}
types = {'QS':[2,0],'QE':[1,6],'spec':[2,7],'fro':[2,7]}
lrfdict = {'AE':'LRF','BE':''}
gendict = {'test_acc':'test','gap':'gap'}
fancygendict = {'test_acc':'Test Acc.','gap':'Gen. Gap'}
optimizer = '-'.join(optimizer)

In [48]:
print(epochs)

[['c:\\Users\\jjaeg\\Desktop\\QC-Bench\\/outputs/LilJon-AdaM-CIFAR10\\results-07-05-2021_19-48-38-LilJon-CIFAR10-0.csv', 'c:\\Users\\jjaeg\\Desktop\\QC-Bench\\/outputs/LilJon-AdamP-CIFAR10\\results-07-08-2021_14-52-09-LilJon-CIFAR10-0.csv', 'c:\\Users\\jjaeg\\Desktop\\QC-Bench\\/outputs/LilJon-AdaBound-CIFAR10\\results-07-08-2021_20-19-45-LilJon-CIFAR10-0.csv'], ['c:\\Users\\jjaeg\\Desktop\\QC-Bench\\/outputs/LilJon-AdaM-CIFAR10\\results-07-05-2021_21-10-08-LilJon-CIFAR10-5.csv', 'c:\\Users\\jjaeg\\Desktop\\QC-Bench\\/outputs/LilJon-AdamP-CIFAR10\\results-07-08-2021_16-10-58-LilJon-CIFAR10-5.csv', 'c:\\Users\\jjaeg\\Desktop\\QC-Bench\\/outputs/LilJon-AdaBound-CIFAR10\\results-07-08-2021_21-46-38-LilJon-CIFAR10-5.csv'], ['c:\\Users\\jjaeg\\Desktop\\QC-Bench\\/outputs/LilJon-AdaM-CIFAR10\\results-07-05-2021_20-01-29-LilJon-CIFAR10-10.csv', 'c:\\Users\\jjaeg\\Desktop\\QC-Bench\\/outputs/LilJon-AdamP-CIFAR10\\results-07-08-2021_14-55-48-LilJon-CIFAR10-10.csv', 'c:\\Users\\jjaeg\\Desktop\\Q

In [50]:

i = 0
for epoch in epochs:
    print(epoch[0])
    data = get_data(epoch[0])
    for combine in epoch[1:]:
        datac = get_data(combine)
        for keye in data.keys():
            if len((np.asarray(data[keye])).shape) < 2:
                data[keye] = np.append(data[keye], datac[keye])
            else:
                for agge in range(len(data[keye])):
                        for proe in range(8):
                            data[keye][agge][proe] = np.append(data[keye][agge][proe], datac[keye][agge][proe])
    print(i)
    for key in translation.keys():
        for lrf in lrfdict.keys():
            for gen in gendict.keys():
                plt.figure(figsize=(8.5, 8.5))
                plt.locator_params(axis='y', nbins=6)
                plt.locator_params(axis='x', nbins=6)
                plt.plot(data[key+"_"+lrf][types[key][0]][types[key][1]],data[gen],'ro',alpha=0.5)
                coefficients = np.polyfit(data[key+"_"+lrf][types[key][0]][types[key][1]],data[gen],2)
                plt.plot(np.arange(min(data[key+"_"+lrf][types[key][0]][types[key][1]]),max(data[key+"_"+lrf][types[key][0]][types[key][1]]),0.001),np.polyval(coefficients,np.arange(min(data[key+"_"+lrf][types[key][0]][types[key][1]]),max(data[key+"_"+lrf][types[key][0]][types[key][1]]),0.001)), c='black', lw=4,alpha=0.85)
                plt.xlabel("Quality Measure "+fancytranslation[key+"_"+lrf],fontsize=34)
                plt.ylabel(fancygendict[gen],fontsize=34)
                plt.xticks(fontsize=28)
                plt.yticks(fontsize=28)
                plt.title(fancygendict[gen]+" over "+fancytranslation[key+"_"+lrf]+" at Epoch "+str(int((epoch[0].split('-')[-1]).split('.')[0])+1),fontsize=34)
                plt.tight_layout()
                plt.savefig(str(sys.path[0][0:-9])+"/results/"+dataset+"/"+optimizer+"/"+translation[key]+"/"+gendict[gen]+"/"+lrfdict[lrf]+(epoch[0].split('-')[-1]).split('.')[0]+".png", bbox_inches="tight")
                plt.clf()
                plt.close()
                i+=1
        


c:\Users\jjaeg\Desktop\QC-Bench\/outputs/LilJon-AdaM-CIFAR10\results-07-05-2021_19-48-38-LilJon-CIFAR10-0.csv
zero models deleted: 15
zero models deleted: 15
zero models deleted: 15
0
c:\Users\jjaeg\Desktop\QC-Bench\/outputs/LilJon-AdaM-CIFAR10\results-07-05-2021_21-10-08-LilJon-CIFAR10-5.csv
zero models deleted: 15
zero models deleted: 15
zero models deleted: 15
16
c:\Users\jjaeg\Desktop\QC-Bench\/outputs/LilJon-AdaM-CIFAR10\results-07-05-2021_20-01-29-LilJon-CIFAR10-10.csv
zero models deleted: 15
zero models deleted: 15
zero models deleted: 15
32
c:\Users\jjaeg\Desktop\QC-Bench\/outputs/LilJon-AdaM-CIFAR10\results-07-06-2021_00-45-05-LilJon-CIFAR10-15.csv
zero models deleted: 15
zero models deleted: 15
zero models deleted: 15
48
c:\Users\jjaeg\Desktop\QC-Bench\/outputs/LilJon-AdaM-CIFAR10\results-07-05-2021_20-52-43-LilJon-CIFAR10-20.csv
zero models deleted: 15
zero models deleted: 15
zero models deleted: 15
64
c:\Users\jjaeg\Desktop\QC-Bench\/outputs/LilJon-AdaM-CIFAR10\results-07-0

In [51]:
for dataset in ["CIFAR10"]:
    for optimizer in ["AdaBound"]:
        input_path = str(sys.path[0][0:-8])+"/outputs/LilJon-"+optimizer+"-"+dataset
        epochs = glob.glob(input_path+"/*")
        wanted_epochs = np.arange(0,70,5)
        wanted_epochs = np.append(wanted_epochs, 69)

        translation = {'QS':'SQ','QE':'E','spec':'S','fro':'F'}
        fancytranslation = {'QS_AE':'${\widehat{Q}_{SQ}^{p}}$','QE_AE':'${\widehat{Q}_{E}^{L2}}$','spec_AE':'${\widehat{Q}_{S}^{p}}$','fro_AE':'${\widehat{Q}_{F}^{p}}$',   'QS_BE':'${{Q}_{SQ}^{p}}$','QE_BE':'${{Q}_{E}^{L2}}$','spec_BE':'${{Q}_{S}^{p}}$','fro_BE':'${{Q}_{F}^{p}}$'}
        types = {'QS':[2,0],'QE':[1,6],'spec':[2,7],'fro':[2,7]}
        lrfdict = {'AE':'LRF','BE':''}
        gendict = {'test_acc':'test','gap':'gap'}
        fancygendict = {'test_acc':'Test Acc.','gap':'Gen. Gap'}
        for epoch in epochs:
            if int((epoch.split('-')[-1]).split('.')[0]) in wanted_epochs:
                print(int((epoch.split('-')[-1]).split('.')[0]))
                data = get_data(epoch)
                
                for key in translation.keys():
                    for lrf in lrfdict.keys():
                        for gen in gendict.keys():
                            plt.figure(figsize=(8.5, 8.5))
                            plt.locator_params(axis='y', nbins=6)
                            plt.locator_params(axis='x', nbins=6)
                            plt.plot(data[key+"_"+lrf][types[key][0]][types[key][1]],data[gen],'ro',alpha=0.5)
                            coefficients = np.polyfit(data[key+"_"+lrf][types[key][0]][types[key][1]],data[gen],2)
                            plt.plot(np.arange(min(data[key+"_"+lrf][types[key][0]][types[key][1]]),max(data[key+"_"+lrf][types[key][0]][types[key][1]]),0.001),np.polyval(coefficients,np.arange(min(data[key+"_"+lrf][types[key][0]][types[key][1]]),max(data[key+"_"+lrf][types[key][0]][types[key][1]]),0.001)), c='black', lw=4,alpha=0.85)
                            plt.xlabel("Quality Measure "+fancytranslation[key+"_"+lrf],fontsize=34)
                            plt.ylabel(fancygendict[gen],fontsize=34)
                            plt.xticks(fontsize=28)
                            plt.yticks(fontsize=28)
                            plt.title(fancygendict[gen]+" over "+fancytranslation[key+"_"+lrf]+" at Epoch "+str(int((epoch.split('-')[-1]).split('.')[0])+1),fontsize=34)
                            plt.tight_layout()
                            plt.savefig(str(sys.path[0][0:-9])+"/results/"+dataset+"/"+optimizer+"/"+translation[key]+"/"+gendict[gen]+"/"+lrfdict[lrf]+(epoch.split('-')[-1]).split('.')[0]+".png", bbox_inches="tight")
                            plt.clf()
                            plt.close()

0
zero models deleted: 15
10
zero models deleted: 15
20
zero models deleted: 15
30
zero models deleted: 15
40
zero models deleted: 15
50
zero models deleted: 15
60
zero models deleted: 15
25
zero models deleted: 15
45
zero models deleted: 15
5
zero models deleted: 15
65
zero models deleted: 15
35
zero models deleted: 15
15
zero models deleted: 15
55
zero models deleted: 15
69
zero models deleted: 15
