In [5]:
import itertools
import pandas as pd
from sklearn import linear_model
import numpy as np
import matplotlib.pyplot as plt

from graph2net.trainers import gen_and_validate,generate,model_validate,full_model_run
from graph2net.data_loaders import *
from graph2net.graph_generators import *
from graph2net.helpers import max_model_size
import torch

import functools

from scipy.stats import spearmanr, pearsonr, rankdata
from sklearn import linear_model
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import train_test_split
import time

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

np.set_printoptions(linewidth=150)
pd.set_option('display.max_columns', 500)

%load_ext autoreload
%autoreload 2

In [6]:
data = load_data(batch_size=256)

In [9]:
results = []
ts = []
configs = []
for test in range(10):
    t_start = time.time()
    print("Test {:<3} ==========".format(test))
    nodes = np.random.randint(3,10)
    connectivity = np.random.uniform(.3,.7)
    cell = gen_cell(nodes,connectivity,concat=.5)
    configs.append(max_model_size(cell,data))
    ts.append(time.time()-t_start)
    
print(np.mean(ts))

6.922346067428589


In [10]:
configs

[[5, 3, True, 51825098],
 [5, 5, False, 14531914],
 [6, 5, False, 1406986],
 [5, 4, False, 19962506],
 [5, 5, False, 12997066],
 [5, 2, True, 23967818],
 [6, 5, False, 6890058],
 [6, 4, True, 12007818],
 [6, 5, False, 20245770],
 [5, 2, False, 16494410]]

In [None]:
result_df = pd.DataFrame(results)
result_df
#valids = result_df[result_df['valid']]
#idx = valids.groupby(['test'])['params'].transform(max)==valids['params']
#valids[idx]

In [None]:
result_df = pd.DataFrame(results)
list(result_df)

micros = result_df[(result_df['reductions']==2) & (result_df['spacing']==0) & (result_df['scale']==2)]
micro_params = dict([(row['test'],row['params']) for (idx,row) in micros.iterrows()])
result_df['micro_params']=result_df['test'].apply(lambda x: micro_params[x])
result_df['param_scale']=result_df['params']/result_df['micro_params']
result_df['reduction_spacing']=result_df['reductions']+result_df['reductions']*result_df['spacing']
result_df

In [None]:
def plot_fixed_pair(tests,x):
    plt.figure(figsize=(60,30))
    
    ranges = [list(test_vars[test]) for test in tests]
    products = itertools.product(*ranges)
    for i,product in enumerate(products):

        conditions = [result_df[tests[i]]==product[i] for i in range(len(tests))]
        print([(tests[i],product[i]) for i in range(len(tests))])
        
        condition = functools.reduce(lambda x,y: x&y, conditions)
        example = result_df[condition]
        if len(example):
            for run in range(100):
                plt.plot(example[x],example['param_scale'])
        plt.show()


runs = [result_df[result_df['test']==i] for i in result_df['test'].unique()]
test_vars = {'scale':range(2,6),
             'reduction_spacing':range(2,20)}

for element in itertools.combinations(test_vars.keys(),r=len(test_vars.keys())-1):
    x_axis = [x for x in test_vars.keys() if x not in element][0]
    plot_fixed_pair(element,x_axis)

In [None]:
result_df['scale_mod']=result_df['scale']
result_df['reductions_mod']=result_df['reductions']
result_df['spacing_mod']=np.e**result_df['spacing']

result_df['y_mod']=np.log(result_df['param_scale'])
selector = ['scale_mod','reductions_mod','spacing_mod','micro_params','nodes']

train,test = train_test_split(result_df,test_size=.25)     
train_X,test_X = train[selector],test[selector]
train_Y,test_Y = train['y_mod'],test['y_mod']
train_act_Y, test_act_Y = train['param_scale'],test['param_scale']

regr = linear_model.LinearRegression()
regr.fit(train_X,train_Y)


x_func = np.e**(regr.intercept_+np.dot(train_X,regr.coef_))
pred   = np.e**(regr.intercept_+np.dot(test_X,regr.coef_))
c95  = np.std(test_act_Y-pred)*1.96
func ={'b':regr.intercept_,'m':regr.coef_,'95':c95,'coef_names':selector}
pearson = pearsonr(x_func,train_act_Y)

print("Train/Test:",len(train),len(test))
print('Intercept:', regr.intercept_)
print('Coefficients: \n', *["{}: {:.4f}".format(list(train_X)[i],x) for (i,x) in enumerate(regr.coef_)])
plt.subplot(121)
plt.plot(pred,test_act_Y,".")
plt.plot(pred,pred,alpha=.5)
plt.plot(pred,pred+c95,alpha=.5)
plt.plot(pred,pred-c95,alpha=.5)
plt.xlabel("Predicted Score")
plt.ylabel("Actual Score")
plt.title("Test Micro/Macro")

plt.subplot(122)
plt.plot(x_func,train_act_Y,".")
plt.plot(x_func,x_func,alpha=.5)
plt.plot(x_func,x_func+c95,alpha=.5)
plt.plot(x_func,x_func-c95,alpha=.5)
plt.xlabel("Predicted Score")
plt.ylabel("Actual Score")
plt.title("Train Micro/Macro")
plt.tight_layout()

plt.show()

print("Pearson:  {:.2f},p={}".format(*pearson))
print("Spearman: {:.2f},p={}".format(*spearmanr(x_func,train_act_Y)))
print("95%: ±{:.2f}".format(c95))