In [2]:
# Load
%load_ext autoreload


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:

%autoreload
import os, sys
import numpy as np
import torch
import torch.nn as nn

import matplotlib.pyplot as plt

ckconv_source = os.path.join(os.getcwd(), '../')

if ckconv_source not in sys.path:
    sys.path.append(ckconv_source)

from ckconv.nn import ScaleFlexConv, ScaleCKConv, FlexConv
import disco.ses_conv_learnable as SESN

import numpy as np
import torch
from torch.nn.utils import weight_norm
from omegaconf import OmegaConf

import ckconv.nn as cknn

import disco.ses_conv_learnable as SESN
import utils.loaders as loaders


from matplotlib import pyplot as plt
from PIL import Image


# Load all runs

import pandas as pd 
import wandb
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.display import display
api = wandb.Api()


  from .autonotebook import tqdm as notebook_tqdm


In [4]:



def prepare_api_runs(runs, values = ['test/acc'], rounding=True):
    combined = []
    for run in runs: 
        temp_info_dict = run.summary._json_dict
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files 
        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        temp_info_dict.update(run.config)
        if isinstance(temp_info_dict['init_scales'], int) or isinstance(temp_info_dict['init_scales'], float):
            temp_info_dict['init_scales'] = [temp_info_dict['init_scales']]
        if rounding:
            if temp_info_dict['sample_scales'][0] != 'DISCRETE':
                temp_info_dict['sample_scales'] = str([round(sample_scale,3) for sample_scale in temp_info_dict['sample_scales'][1:]])
            else:
                temp_info_dict['sample_scales'] = str([round(sample_scale,3) for sample_scale in temp_info_dict['sample_scales'][1:]])
            temp_info_dict['init_scales'] = str([round(init_scale,3) for init_scale in temp_info_dict['init_scales']])

        else:
            temp_info_dict['sample_scales'] = str(temp_info_dict['sample_scales'])
            temp_info_dict['init_scales'] = str(temp_info_dict['init_scales'])
        if 'test/acc' in temp_info_dict.keys():
            temp_info_dict['Test Error'] = (1 - temp_info_dict['test/acc'])*100
            temp_info_dict['val/acc.max'] = temp_info_dict['val/acc']['max']
            combined.append(temp_info_dict)

    return combined

def plot_table(runs_in, name, indexes = ['sample_scales','init_scales'], values = ['Test Error'], rounding = True, Save=False):
    # Prepare and load into dataframe
    all_info = prepare_api_runs(runs_in, values)
    df = pd.DataFrame.from_dict(all_info)
    # display(df)

    df = df.pivot_table(index=indexes, values=values, aggfunc=(np.mean, np.std))
    # Save Df
    if rounding:
        df = df.astype(float).round(3)
    df[r'Learned $\sigma_{basis}$'] = df["Final Basis Min Scale"]["mean"].astype('str') + " ("  + df["Final Basis Min Scale"]["std"].astype('str') +")" 	
    df['Learned ISR'] = df["Final ISR"]["mean"].astype('str') + " ("  + df["Final ISR"]["std"].astype('str') +")" 

    display(df)
    if Save:
        df.to_csv(f'results/{name}')


# How does Initialization of scales when scale learning affect search for best internal scales?


## Hypothesis

We expect that initialization that for most ISRs does not have large influence on the learning process of the network. However, for ISR values that are multiple octaves too big or too small we think that the network either has too little time to find the best internal scales. Additionally, if the ISR is too big we think:
- If the ISR is initialized way too big: the largest data scale is still much much smaller than the second smallest internal scale (if we assume that the smallest internal scale is placed on the smallest data scale), this makes the scenario highlighted in the our hypothesis about learning almost impossible
- If the ISR is initialized way too small: there is an indistinguishable difference between the filters that stem from the internal scales and this leads to very hard to optimize ISR. 

We think that a good initialization is ~ 2, as is done by SESN and DISCO basically (only then the initialization is also the final value).

## Network/Data
Not sure whether we want to perform this experiment in our toy setting or if we want to perform it on original MNIST-scale. Intuition says that keeping toy setting is okay, and final performance is highlighted in a different experiment as well. 

The logarithmic value range that will be tested in this experiment: [1.023, 10] over 5 values $(10^{0.01}, 10^1)$. The first and last values represent the cases where we think the ISR is too large or too small to be able to rectify and learn the ISR. 


In [5]:
%autoreload
def visualize(runs, filter_scales = True):
    # Prepare and load into dataframe
    all_info = prepare_api_runs(runs, ['Test Error','Final ISR', 'Final Basis Min Scale'])
    df = pd.DataFrame.from_dict(all_info)
    # display(df)
    df[r'Init \sigma_{basis}'] = df['basis_min_scale']
    df['Init ISR'] = df['ISR_start']
    df['Data Range'] = df['sample_scales']
    if filter_scales:
        df = df.pivot_table(index=[r'Init \sigma_{basis}','Init ISR'], values=['Test Error','Final ISR', 'Final Basis Min Scale'], aggfunc=(np.mean, np.std))
    else:
        df = df.pivot_table(index=[r'Init \sigma_{basis}','Init ISR', 'Data Range'], values=['Test Error','Final ISR', 'Final Basis Min Scale'], aggfunc=(np.mean, np.std))
    # Save Df
    df = df.astype(float).round(3)
    df[r'Learned $\sigma_{basis}$'] = df["Final Basis Min Scale"]["mean"].astype('str') + r" $\pm$ "   + df["Final Basis Min Scale"]["std"].astype('str') 	
    df['Learned ISR'] = df["Final ISR"]["mean"].astype('str') + r" $\pm$ "  + df["Final ISR"]["std"].astype('str')
    df['Test Error 1'] = df["Test Error"]["mean"].astype('str') + r" $\pm$ "  + df["Test Error"]["std"].astype('str')
    df.drop(columns=['Final Basis Min Scale', 'Final ISR', 'Test Error'], inplace=True)
    df['Test Error'] = df['Test Error 1']
    df.drop(columns=['Test Error 1'], inplace=True)
    display(df)
    print(df.to_latex(escape=False))


api = wandb.Api()
exp_name = 'scale_learning_init_exp_4'
# Load all runs
for dist_range in [2.83, 4.76, 8]:
    runs = api.runs(f"mbasting/scale_learning", {
        "$and": [{"tags": exp_name}, {'config.sample_scales' : ["LOGUNIFORM", 1, dist_range]}]
    })
    visualize(runs)
    # print(df.to_latex())
    # if Save:
    #     df.to_csv(f'results/{name}')



  df.drop(columns=['Test Error 1'], inplace=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,Learned $\sigma_{basis}$,Learned ISR,Test Error
Init \sigma_{basis},Init ISR,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.5,1.268 $\pm$ 0.061,2.609 $\pm$ 0.269,2.487 $\pm$ 0.108
1,3.0,1.236 $\pm$ 0.139,3.3 $\pm$ 0.082,2.357 $\pm$ 0.024
1,6.0,1.313 $\pm$ 0.102,4.35 $\pm$ 0.396,2.309 $\pm$ 0.021
2,1.5,1.81 $\pm$ 0.057,2.405 $\pm$ 0.167,2.26 $\pm$ 0.025
2,3.0,1.973 $\pm$ 0.079,3.635 $\pm$ 0.647,2.368 $\pm$ 0.055
2,6.0,1.994 $\pm$ 0.012,5.336 $\pm$ 0.283,2.359 $\pm$ 0.097
4,1.5,2.778 $\pm$ 0.092,2.521 $\pm$ 0.199,2.421 $\pm$ 0.05
4,3.0,2.703 $\pm$ 0.081,3.817 $\pm$ 0.245,2.483 $\pm$ 0.089
4,6.0,2.832 $\pm$ 0.001,5.211 $\pm$ 0.416,2.42 $\pm$ 0.124


  print(df.to_latex())


\begin{tabular}{lllll}
\toprule
  &     & Learned \$\textbackslash sigma\_\{basis\}\$ &        Learned ISR &         Test Error \\
  &     \\
Init \textbackslash sigma\_\{basis\} & Init ISR &                          &                    &                    \\
\midrule
1 & 1.5 &        1.268 \$\textbackslash pm\$ 0.061 &  2.609 \$\textbackslash pm\$ 0.269 &  2.487 \$\textbackslash pm\$ 0.108 \\
  & 3.0 &        1.236 \$\textbackslash pm\$ 0.139 &    3.3 \$\textbackslash pm\$ 0.082 &  2.357 \$\textbackslash pm\$ 0.024 \\
  & 6.0 &        1.313 \$\textbackslash pm\$ 0.102 &   4.35 \$\textbackslash pm\$ 0.396 &  2.309 \$\textbackslash pm\$ 0.021 \\
2 & 1.5 &         1.81 \$\textbackslash pm\$ 0.057 &  2.405 \$\textbackslash pm\$ 0.167 &   2.26 \$\textbackslash pm\$ 0.025 \\
  & 3.0 &        1.973 \$\textbackslash pm\$ 0.079 &  3.635 \$\textbackslash pm\$ 0.647 &  2.368 \$\textbackslash pm\$ 0.055 \\
  & 6.0 &        1.994 \$\textbackslash pm\$ 0.012 &  5.336 \$\textbackslash pm\$ 0.283 &

  df.drop(columns=['Test Error 1'], inplace=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,Learned $\sigma_{basis}$,Learned ISR,Test Error
Init \sigma_{basis},Init ISR,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.5,1.294 $\pm$ 0.11,3.462 $\pm$ 0.41,3.033 $\pm$ 0.13
1,3.0,1.253 $\pm$ 0.07,3.829 $\pm$ 0.126,2.762 $\pm$ 0.087
1,6.0,1.331 $\pm$ 0.091,4.612 $\pm$ 0.188,2.727 $\pm$ 0.101
2,1.5,1.931 $\pm$ 0.068,2.932 $\pm$ 0.169,2.767 $\pm$ 0.128
2,3.0,1.975 $\pm$ 0.06,3.309 $\pm$ 0.092,2.527 $\pm$ 0.12
2,6.0,2.041 $\pm$ 0.092,4.882 $\pm$ 0.007,2.501 $\pm$ 0.157
4,1.5,2.515 $\pm$ 0.038,3.093 $\pm$ 0.157,2.648 $\pm$ 0.073
4,3.0,2.587 $\pm$ 0.04,3.638 $\pm$ 0.169,2.575 $\pm$ 0.07
4,6.0,2.752 $\pm$ nan,5.58 $\pm$ nan,2.77 $\pm$ nan


  print(df.to_latex())


\begin{tabular}{lllll}
\toprule
  &     & Learned \$\textbackslash sigma\_\{basis\}\$ &        Learned ISR &         Test Error \\
  &     \\
Init \textbackslash sigma\_\{basis\} & Init ISR &                          &                    &                    \\
\midrule
1 & 1.5 &         1.294 \$\textbackslash pm\$ 0.11 &   3.462 \$\textbackslash pm\$ 0.41 &   3.033 \$\textbackslash pm\$ 0.13 \\
  & 3.0 &         1.253 \$\textbackslash pm\$ 0.07 &  3.829 \$\textbackslash pm\$ 0.126 &  2.762 \$\textbackslash pm\$ 0.087 \\
  & 6.0 &        1.331 \$\textbackslash pm\$ 0.091 &  4.612 \$\textbackslash pm\$ 0.188 &  2.727 \$\textbackslash pm\$ 0.101 \\
2 & 1.5 &        1.931 \$\textbackslash pm\$ 0.068 &  2.932 \$\textbackslash pm\$ 0.169 &  2.767 \$\textbackslash pm\$ 0.128 \\
  & 3.0 &         1.975 \$\textbackslash pm\$ 0.06 &  3.309 \$\textbackslash pm\$ 0.092 &   2.527 \$\textbackslash pm\$ 0.12 \\
  & 6.0 &        2.041 \$\textbackslash pm\$ 0.092 &  4.882 \$\textbackslash pm\$ 0.007 &

  df.drop(columns=['Test Error 1'], inplace=True)


Unnamed: 0_level_0,Unnamed: 1_level_0,Learned $\sigma_{basis}$,Learned ISR,Test Error
Init \sigma_{basis},Init ISR,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.5,1.45 $\pm$ 0.13,3.767 $\pm$ 0.171,3.087 $\pm$ 0.173
1,3.0,1.275 $\pm$ 0.076,4.158 $\pm$ 0.123,3.131 $\pm$ 0.179
1,6.0,1.331 $\pm$ 0.097,5.423 $\pm$ 0.529,3.087 $\pm$ 0.178
2,1.5,1.755 $\pm$ 0.156,3.444 $\pm$ 0.025,3.01 $\pm$ 0.198
2,3.0,1.982 $\pm$ 0.068,4.095 $\pm$ 0.078,2.921 $\pm$ 0.082
2,6.0,2.079 $\pm$ 0.061,5.053 $\pm$ 0.375,2.745 $\pm$ 0.012
4,1.5,2.453 $\pm$ 0.151,3.466 $\pm$ 0.216,2.935 $\pm$ 0.036
4,3.0,2.607 $\pm$ 0.078,4.401 $\pm$ 0.343,2.803 $\pm$ 0.085


\begin{tabular}{lllll}
\toprule
  &     & Learned \$\textbackslash sigma\_\{basis\}\$ &        Learned ISR &         Test Error \\
  &     \\
Init \textbackslash sigma\_\{basis\} & Init ISR &                          &                    &                    \\
\midrule
1 & 1.5 &          1.45 \$\textbackslash pm\$ 0.13 &  3.767 \$\textbackslash pm\$ 0.171 &  3.087 \$\textbackslash pm\$ 0.173 \\
  & 3.0 &        1.275 \$\textbackslash pm\$ 0.076 &  4.158 \$\textbackslash pm\$ 0.123 &  3.131 \$\textbackslash pm\$ 0.179 \\
  & 6.0 &        1.331 \$\textbackslash pm\$ 0.097 &  5.423 \$\textbackslash pm\$ 0.529 &  3.087 \$\textbackslash pm\$ 0.178 \\
2 & 1.5 &        1.755 \$\textbackslash pm\$ 0.156 &  3.444 \$\textbackslash pm\$ 0.025 &   3.01 \$\textbackslash pm\$ 0.198 \\
  & 3.0 &        1.982 \$\textbackslash pm\$ 0.068 &  4.095 \$\textbackslash pm\$ 0.078 &  2.921 \$\textbackslash pm\$ 0.082 \\
  & 6.0 &        2.079 \$\textbackslash pm\$ 0.061 &  5.053 \$\textbackslash pm\$ 0.375 &

  print(df.to_latex())
