In [131]:
import ray
from ray import tune
import os
import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt
import  seaborn as sns
import learning_lidar.utils.global_settings as gs
import matplotlib as mpl
sns.set_palette(sns.color_palette("tab10"))

plt.rcParams['figure.dpi'] = gs.FIGURE_DPI
plt.rcParams['savefig.dpi'] = gs.SAVEFIG_DPI

sns.set_theme()
sns.set_context("paper", font_scale=1.5, rc={"lines.linewidth": 2.5})

In [132]:
base_folder = os.path.dirname(os.path.dirname(os.path.abspath(os.curdir)))
results_folder = os.path.join(base_folder, 'results')

# postprocessing LCNET results from jason state files saved in  `runs_board.xlsx`

In [133]:
runs_df = pd.read_excel(os.path.join(results_folder,'runs_board.xlsx'))
for idx,row in runs_df.iterrows():
    #row = runs_df.iloc[idx]
    try:
        state_fname = glob.glob(os.path.join( row.experiment_folder,r'experiment_state*.json'))[0]
        analysis = tune.ExperimentAnalysis(state_fname)
        ignore_MARELoss = "MARELoss" in [row.field_to_ignore]
        analysis.default_metric = "MARELoss" if ignore_MARELoss else "MARELoss"
        analysis.default_mode="min"
        results_df = analysis.dataframe(metric="MARELoss", mode="min",)

        # update fields:
        if ignore_MARELoss:
            results_df["MARELoss"]=None

        # rename column names:
        cols = results_df.columns.values.tolist()
        new_cols = [col.replace('config/',"") for col in cols]
        dict_cols ={}
        for col,new_col in zip(cols, new_cols):
            dict_cols.update({col:new_col})
        results_df = results_df.rename(columns= dict_cols)

        # update power values:
        len_pow = len(results_df[results_df.use_power==True])
        len_no_pow = len(results_df[results_df.use_power==False])
        len_pows = len(results_df[results_df.use_power !=False])
        if len_no_pow>0:
            results_df.loc[results_df[results_df.use_power==False].index,'powers']= ''
        if len_pows!=len_pow:
            results_df.loc[results_df[results_df.use_power!=False].index,'powers']=results_df.use_power
            results_df.loc[results_df[results_df.use_power!=False].index,'use_power']=True
        else:
            results_df.loc[results_df[results_df.use_power==True].index,'powers']= '([0.5,0.5],[0.5])'

        # Update Notes
        note = row['note']
        results_df['note']= note if type(note)==str else 'ok'

        # drop irrelevant columns:
        drop_cols = [ 'time_this_iter_s', 'should_checkpoint', 'done',
                   'timesteps_total', 'episodes_total',
                   'experiment_id',  'timestamp',  'pid', 'hostname',
                   'node_ip', 'time_since_restore', 'timesteps_since_restore',
                   'iterations_since_restore']
        results_df.drop(columns=drop_cols,inplace=True)


        # reorganize columns:
        new_order = ['trial_id', 'date','time_total_s','training_iteration',
                     'loss', 'MARELoss',
                     'bsize', 'dfilter', 'dnorm','fc_size', 'hsizes', 'lr',
                     'ltype', 'source', 'use_bg', 'use_power','powers','note','logdir']
        results_df = results_df.reindex(columns=new_order)

        # keep index trial (especially for cases when trails are ignored)
        #results_df['idx']=results_df.index
        #new_cols = ['idx']
        #new_cols.extend(cols)
        #results_df = results_df.reindex(columns=new_cols)

        # remove irrelevant trials (e.g. when dnorm had wrong calculation)
        if row.trial_to_ignore is not np.nan:
            key,cond = eval(row.trial_to_ignore)
            results_df.drop(index=results_df[results_df[key]==cond].index,inplace=True)

        # save csv
        results_csv = os.path.join(analysis._experiment_dir, f'experiment_results.csv')
        results_df.to_csv(results_csv, index=False)

        # update csv path in main runs_board
        runs_df.loc[idx,'results_csv']=results_csv
        print(results_csv)
        #runs_df.loc[idx,'best_trial']=analysis.best_trial
        #runs_df.loc[idx,'best_config']=analysis.best_config
        #runs_df.loc[idx,'best_logdir']=analysis.best_logdir
        #runs_df.loc[idx,'best_checkpoint']=analysis.best_checkpoint
        #runs_df.loc[idx,'best_result']=analysis.best_result
    except:
        continue
# TODO: save runs_df with results_csv paths

No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of sync, as checkpointing is periodic.
No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of sync, as checkpointing is periodic.
No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of sync, as checkpointing is periodic.
No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of sync, as checkpointing is periodic.
No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of sync, as checkpointing is periodic.
No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of sync, as checkpointing is periodic.
Couldn't read config from 5 paths
No `self.trials`. Drawing logdirs from checkpoint file. This may result in some information that is out of s

C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-06_19-17-01\experiment_results.csv
C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-08_17-45-11\experiment_results.csv
C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-09_22-48-01\experiment_results.csv
C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-12_11-54-20\experiment_results.csv
C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-12_16-45-18\experiment_results.csv
C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-12_21-04-44\experiment_results.csv
C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-13_01-25-47\experiment_results.csv
C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-13_09-42-44\experiment_results.csv
C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-13_15-41-26\experiment_results.csv
C:\Users\addalin\Dropbox\Lidar\lidar_learning\results\main_2021-05-13_23-14-02\exp

In [137]:
paths = [row['results_csv'] for idx,row in runs_df.iterrows() if (row['include'] and type (row['results_csv'])==str)]
results_dfs=[pd.read_csv(path) for path in paths]

In [139]:
total_results = pd.concat(results_dfs,ignore_index=True)
total_results['powers'] = total_results.powers.apply(lambda x: eval(x) if type(x)==str else None)
total_results['fc_size'] = total_results.fc_size.apply(lambda x: eval(str(x))[0])
total_results['pow_y'] = total_results.powers.apply(lambda x: np.array(x[1])[0] if type(x)==tuple else None)
pow_x = total_results.powers.apply(lambda x: np.array(x[0]) if type(x)==tuple else None )
for chan in range(3):
    total_results[f'pow_x{chan+1}'] = [powx[chan] if  powx and (len(powx)>=chan+1) else None for powx in pow_x]
total_results['pow_x'] = pow_x.apply(lambda x: str(x) if x else None)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [140]:
row = total_results.iloc[0].powers
row

([0.5, 0.5], [0.5])

In [142]:
extract_powers(row, 3)

IndexError: too many indices for array: array is 0-dimensional, but 1 were indexed

In [144]:
powx_test = np.array([1,2,np.nan])
powx_test, [1,*powx_test]

(array([ 1.,  2., nan]), [1, 1.0, 2.0, nan])

In [141]:
def extract_powers(row, in_channels):
    powers = eval(row) if type(row)==str else None
    pow_y = np.array(powers[1])[0] if type(powers)==tuple else None
    pow_x = np.array(powers[0]) if type(powers)==tuple else None
    pow_xi = np.array(in_channels)
    if pow_x:
        for chan in range(in_channels):
            pow_xi[chan] = powx[chan] if  (len(powx)>=chan+1) else None
    else:
        for chan in range(in_channels):
            pow_xi[chan] = None
    return pd.DataFrame(data=[pow_y,*pow_xi])





In [None]:
analyse_results = total_results[total_results.training_iteration>=2]


In [None]:
lidar_res = total_results[total_results.source=='lidar' ]
lidar_res = lidar_res[~lidar_res.use_bg]
#lidar_res

## 1. Choosing Learning rate
1. X: range_corr(lidar), attbsc(molecular). Y:LC
> use_normalization = False
> use_power = True
> source = lidar

In [None]:
pow_lidar = lidar_res[(~lidar_res.dnorm & lidar_res.use_power ) ]
pow_lidar = pow_lidar[pow_lidar.powers==([0.5,0.5],[0.5])]
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))
pd.pivot_table(pow_lidar,
               values=['loss'],
               columns= ['lr'],
               index=['fc_size','note'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title='Choosing learning Rate')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

## 2. FC vs. hidden sizes
1. X: range_corr(lidar), attbsc(molecular). Y:LC
> use_normalization = False
> use_power = True
> source = lidar
> lr = 0.001 or lr = 0.005

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))

pd.pivot_table(pow_lidar[pow_lidar.lr!=0.0001],
               values=['loss'],
               columns= ['hsizes'],
               index=['fc_size','note'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title='FC size vs. inner layers` hidden sizes')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

## 3. Normalization vs. Power transform
1. X: range_corr(lidar), attbsc(molecular). Y:LC
> use_power = False
> source = lidar

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))
pd.pivot_table(lidar_res,
               values=['MARELoss'],
               columns= ['dnorm','use_power','note'],
               index=['fc_size'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title='Normalization vs. Power transform')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

## 3. Type of Power transform
1. X: range_corr(lidar), attbsc(molecular). Y:LC
> source = lidar
> use_bg = False
> dnorm = False
> lr = 0.001

In [None]:
pow_lidar = total_results[total_results.source=='lidar']
pow_lidar= pow_lidar[pow_lidar.use_power & ~pow_lidar.dnorm & ~total_results.use_bg]
pow_lidar = pow_lidar[pow_lidar.lr==0.001]

#pow_lidar['pow_y'] = pow_lidar.powers.apply(lambda x: np.array(x[1])[0])
#pow_lidar['pow_x'] = pow_lidar.powers.apply(lambda x: np.array(x[0]))
#pow_lidar['pow_x1'] = [powx[0] for powx in pow_lidar['pow_x']]
#pow_lidar['pow_x2'] = [powx[1] for powx in pow_lidar['pow_x']]
#pow_lidar['pow_x'] = pow_lidar.pow_x.apply(lambda x: (str(x[0]),str(x[1])))
#pow_lidar['pow_y'] = pow_lidar.pow_y.apply(lambda x: str(x))


In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))
pd.pivot_table(pow_lidar,
               values=['MARELoss'],
               columns= ['pow_y' ],
               index=['fc_size'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title='Choosing Power Transform')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))
pd.pivot_table(pow_lidar[pow_lidar['pow_y']==0.5][(pow_lidar['pow_x1']==0.5)],
               values=['MARELoss'],
               columns= ['pow_x2','note'],
               index=['hsizes'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title='Choosing Power Transform. $\gamma_{x1}=0.5 , \gamma_y=0.5$')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))
pd.pivot_table(pow_lidar[(pow_lidar['pow_y']==0.5)][(pow_lidar['pow_x1']==0.5)],
               values=['MARELoss'],
               columns= ['pow_x2','note'],
               index=['fc_size'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title=r'Choosing Power Transform. $\gamma_{x1}=0.5 , \gamma_y=0.5$')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

## 3. Type of source
1. X: range_corr(lidar), attbsc(molecular). Y:LC
> use_power = True
> use_bg = False
> source = all
> dnorm = True
> lr > 0.0001

In [None]:
sources_res = total_results[(total_results.use_power &
                             ~total_results.dnorm &
                             ~total_results.use_bg)]
sources_res = sources_res[total_results.lr==0.001]
sources_res = sources_res[total_results.fc_size>4]

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))
pd.pivot_table(sources_res,
               values=['MARELoss'],
               columns= ['source'],
               index=['fc_size'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title='Testing different sources')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()


fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 5))
pd.pivot_table(sources_res,
               values=['MARELoss'],
               columns= ['source'],
               index=['hsizes'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title='Testing different sources')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

## 3. Adding
1. X: range_corr(lidar), attbsc(molecular). Y:LC
> use_power = True
> fc_size>4
> source = lidar
> dnorm = False
> lr==0.001
> pow_y = 0.5

In [None]:
bg_res = total_results[total_results.source=='lidar']
bg_res = bg_res[bg_res.use_power &
                ~bg_res.dnorm ]
bg_res = bg_res[bg_res.lr==0.001]
bg_res = bg_res[bg_res.fc_size>4]


#bg_res['pow_y'] = bg_res.powers.apply(lambda x: np.array(x[1])[0])
#bg_res['pow_x'] = bg_res.powers.apply(lambda x: np.array(x[0]))
#bg_res = bg_res[bg_res['pow_y']==0.5]

#bg_res['pow_x1'] = [powx[0] for powx in bg_res['pow_x']]
#bg_res['pow_x2'] = [powx[1] for powx in bg_res['pow_x']]
#bg_res['pow_x3'] =[powx[2] if len(powx)==3 else None for powx in bg_res['pow_x']]



#bg_res['pow_x'] = bg_res.pow_x.apply(lambda x: str(x))
#bg_res['pow_y'] = bg_res.pow_y.apply(lambda x: str(x))

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7))
pd.pivot_table(bg_res,
               values=['MARELoss'],
               #columns= ['pow_x2'],#'pow_x'],
               index=['use_bg','note'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title=r'Testing additional channel of $<p_{bg}> $')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7))
pd.pivot_table(bg_res,
               values=['MARELoss'],
               columns= ['pow_x2','note'],
               index=['use_bg'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title=r'Testing additional channel of $<p_{bg}> $')
plt.tight_layout()

ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.show()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7))
pd.pivot_table(bg_res[bg_res.pow_x2==-0.25],
               values=['MARELoss'],
               columns= ['pow_x3','note'],#'fc_size'],
               index=['hsizes'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title=r'Testing power transform on additional channel of $<p_{bg}>$,$\gamma_{x1}=0.5$, $\gamma_{x2}=-0.25$')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7))
pd.pivot_table(bg_res[bg_res.use_bg][bg_res.pow_x3==1.0][bg_res.pow_x2==-0.25],
               values=['MARELoss'],
               columns= ['fc_size','note'],
               index=['hsizes'],
               aggfunc=np.min).\
    plot(kind='bar',ax =ax, title=r'Testing layers vs FC for $\gamma_{x1}=0.5$, $\gamma_{x2}=-0.25$, $\gamma_{x3}=1$')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7))
pd.pivot_table(bg_res[bg_res.pow_x2==-0.25],
               values=['MARELoss'],
               columns= ['fc_size','use_bg','note'],
               index=['hsizes'],
               aggfunc=np.min).\
    plot(kind='bar',ax =ax, title=r'Testing layers vs FC for $\gamma_{x1}=0.5$, $\gamma_{x2}=-0.25$, $\gamma_{x3}=1$')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 7))
pd.pivot_table(bg_res[bg_res.pow_x3==1.0],
               values=['MARELoss'],
               columns= ['hsizes','fc_size'],
               index=['pow_x2','note'],
               aggfunc=np.mean).\
    plot(kind='bar',ax =ax, title=r'Testing power transform on second channel $\gamma_{x1}=0.5$, $\gamma_{x3}=1$')
ax.get_yaxis().set_minor_locator(mpl.ticker.AutoMinorLocator())
ax.grid(b=True, which='minor', color='w', linewidth=0.8)
ax.grid(b=True, which='major', color='w', linewidth=1.2)
ax.xaxis.grid(False)
plt.legend( loc="lower center")
plt.tight_layout()
plt.show()

# Create a list of chec_points/experiments to restore (run from begining)

In [None]:






# TODO: show figure of different power transform on bg only
# TODO: show r^2 * pbg
# TODO: onclude O(r)
# TODO : a single FC
