<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports-and-Constants" data-toc-modified-id="Imports-and-Constants-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports and Constants</a></span></li><li><span><a href="#DHS-OOC" data-toc-modified-id="DHS-OOC-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>DHS OOC</a></span></li><li><span><a href="#DHS-Incountry" data-toc-modified-id="DHS-Incountry-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>DHS Incountry</a></span></li><li><span><a href="#LSMS-Delta-Incountry" data-toc-modified-id="LSMS-Delta-Incountry-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>LSMS Delta Incountry</a></span></li><li><span><a href="#LSMS-Delta-Incountry-(OLD)" data-toc-modified-id="LSMS-Delta-Incountry-(OLD)-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>LSMS Delta Incountry (OLD)</a></span></li><li><span><a href="#LSMS-Delta-Incountry-(w/-orig_labels)" data-toc-modified-id="LSMS-Delta-Incountry-(w/-orig_labels)-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>LSMS Delta Incountry (w/ orig_labels)</a></span></li><li><span><a href="#LSMS-Delta-Incountry-Forward-(w/-orig_labels)" data-toc-modified-id="LSMS-Delta-Incountry-Forward-(w/-orig_labels)-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>LSMS Delta Incountry Forward (w/ orig_labels)</a></span></li><li><span><a href="#LSMS-Index-of-Delta-Incountry" data-toc-modified-id="LSMS-Index-of-Delta-Incountry-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>LSMS Index of Delta Incountry</a></span></li><li><span><a href="#LSMS-Delta-Class" data-toc-modified-id="LSMS-Delta-Class-9"><span class="toc-item-num">9&nbsp;&nbsp;</span>LSMS Delta Class</a></span></li></ul></div>

# Imports and Constants

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import ast
from glob import glob
import os
from pprint import pprint
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

sys.path.append('../')
from utils.general import colordisplay

LOGS_DIR = '../logs/'

In [None]:
def read_results(results_glob_str):
    '''
    Args
    - results_glob_str: str, glob pattern to match results.csv files

    Returns
    - all_results_df: pd.DataFrame
    '''
    results_paths = sorted(filter(lambda path: 'keep' not in path, glob(results_glob_str)))

    all_dfs = []
    modelid_to_modeldir = {}

    for results_path in results_paths:
        params_path = results_path.replace('results.csv', 'params.txt')
        with open(params_path, 'r') as f:
            s = f.read().split("('Checkpoint prefix")[0]
            params = ast.literal_eval(s)

        bands = ''
        if params['ls_bands'] is not None:
            bands += params['ls_bands']
        if params['nl_band'] is not None:
            bands += 'nl'
        dataset, lr, reg = params['dataset'], params['lr'], params['fc_reg']
        modeldir = results_path.split('/')[-2]
        modelid = (bands, dataset, lr, reg)
        modelid_to_modeldir[modelid] = modeldir

        df = pd.read_csv(results_path, float_precision='high', index_col=False)
        df['bands'], df['dataset'], df['lr'], df['reg'] = modelid
        all_dfs.append(df)
        
    all_results_df = pd.concat(all_dfs, sort=False).reset_index(drop=True)
    return all_results_df, modelid_to_modeldir

def get_best_epoch_mse(df):
    df = df.set_index(['epoch', 'split'])
    best_epoch = df.loc[(slice(None), 'val'), 'mse'].idxmin()[0]
    df = df.loc[(best_epoch, slice(None)), :]
    df = df.unstack('split')
    df.columns = [f'{split} {metric}' for (metric, split) in df.columns.values]
    return df

def get_best_epoch_acc(df):
    df.set_index(['epoch', 'split'], inplace=True)
    best_epoch = df.loc[(slice(None), 'val'), 'acc'].idxmax()[0]
    df = df.loc[(best_epoch, slice(None)), :]
    df = df.unstack('split')
    df.columns = [f'{split} {metric}' for (metric, split) in df.columns.values]
    return df

# DHS OOC

In [None]:
results_glob_str = os.path.join(LOGS_DIR, 'DHS_OOC/*/results.csv')
all_results_df, modelid_to_modeldir = read_results(results_glob_str)
with pd.option_context('display.max_rows', 4):
    display(all_results_df)

In [None]:
perf = (all_results_df
        .groupby(['bands', 'dataset', 'lr', 'reg'])['epoch', 'split', 'r2', 'R2', 'mse']
        .apply(get_best_epoch_mse)
        .reset_index('epoch'))

with pd.option_context('display.max_rows', 500):
    colordisplay(perf, columns='val R2')

In [None]:
best_mse_idx = perf.groupby(['bands', 'dataset'])['val mse'].idxmin()
colordisplay(perf.loc[best_mse_idx, :], 'val mse')

best_r2_idx = perf.groupby(['bands', 'dataset'])['val r2'].idxmax()
colordisplay(perf.loc[best_r2_idx, :], 'val r2')

In [None]:
cols = ['lr', 'reg', 'epoch', 'val mse', 'val r2']
best_mse_params = perf.loc[best_mse_idx, :].reset_index(['lr', 'reg'])[cols]
best_r2_params = perf.loc[best_r2_idx, :].reset_index(['lr', 'reg'])[cols]

with pd.option_context('precision', 3):
    display(pd.concat([best_mse_params, best_r2_params], keys=['mse', 'r2'], axis=1))

In [None]:
mse_plot_df = perf.loc[best_mse_idx, :].reset_index(level=['lr', 'reg'])
ax = mse_plot_df.unstack().plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')

ax = mse_plot_df.unstack(-2).plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')
plt.show()

In [None]:
r2_plot_df = perf.loc[best_r2_idx, :].reset_index(level=['lr', 'reg'])
ax = r2_plot_df.unstack().plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')

ax = r2_plot_df.unstack(-2).plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')
plt.show()

In [None]:
for modelid in best_mse_idx:
    print(modelid_to_modeldir[modelid])

# DHS Incountry

In [None]:
results_glob_str = os.path.join(LOGS_DIR, 'DHSIncountry/*/results.csv')
all_results_df, modelid_to_modeldir = read_results(results_glob_str)
with pd.option_context('display.max_rows', 4):
    display(all_results_df)

In [None]:
perf = (all_results_df
        .groupby(['bands', 'dataset', 'lr', 'reg'])['epoch', 'split', 'r2', 'R2', 'mse']
        .apply(get_best_epoch_mse)
        .reset_index('epoch'))

with pd.option_context('display.max_rows', 500):
    colordisplay(perf, columns='val R2')

In [None]:
best_mse_idx = perf.groupby(['bands', 'dataset'])['val mse'].idxmin()
colordisplay(perf.loc[best_mse_idx, :], 'val mse')

best_r2_idx = perf.groupby(['bands', 'dataset'])['val r2'].idxmax()
colordisplay(perf.loc[best_r2_idx, :], 'val r2')

In [None]:
cols = ['lr', 'reg', 'epoch', 'val mse', 'val r2']
best_mse_params = perf.loc[best_mse_idx, :].reset_index(['lr', 'reg'])[cols]
best_r2_params = perf.loc[best_r2_idx, :].reset_index(['lr', 'reg'])[cols]

with pd.option_context('precision', 3):
    display(pd.concat([best_mse_params, best_r2_params], keys=['mse', 'r2'], axis=1))

In [None]:
mse_plot_df = perf.loc[best_mse_idx, :].reset_index(level=['lr', 'reg'])
ax = mse_plot_df.unstack().plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')

ax = mse_plot_df.unstack(-2).plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')
plt.show()

In [None]:
r2_plot_df = perf.loc[best_r2_idx, :].reset_index(level=['lr', 'reg'])
ax = r2_plot_df.unstack().plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')

ax = r2_plot_df.unstack(-2).plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')
plt.show()

In [None]:
for modelid in best_mse_idx:
    print(modelid_to_modeldir[modelid])

# LSMS Delta Incountry
- random initialization + bi-directional augmentation
- weighted loss by household count

In [None]:
results_glob_str = os.path.join(LOGS_DIR, 'LSMSDeltaIncountry/*/results.csv')
all_results_df, modelid_to_modeldir = read_results(results_glob_str)
with pd.option_context('display.max_rows', 4):
    display(all_results_df)

In [None]:
perf = (all_results_df
        .groupby(['bands', 'dataset', 'lr', 'reg'])['epoch', 'split', 'r2', 'R2', 'mse']
        .apply(get_best_epoch_mse)
        .reset_index('epoch'))

with pd.option_context('display.max_rows', 500):
    colordisplay(perf, columns='val R2')

In [None]:
best_mse_idx = perf.groupby(['bands', 'dataset'])['val mse'].idxmin()
colordisplay(perf.loc[best_mse_idx, :], 'val mse')

best_r2_idx = perf.groupby(['bands', 'dataset'])['val r2'].idxmax()
colordisplay(perf.loc[best_r2_idx, :], 'val r2')

In [None]:
cols = ['lr', 'reg', 'epoch', 'val mse', 'val r2']
best_mse_params = perf.loc[best_mse_idx, :].reset_index(['lr', 'reg'])[cols]
best_r2_params = perf.loc[best_r2_idx, :].reset_index(['lr', 'reg'])[cols]

with pd.option_context('precision', 3):
    display(pd.concat([best_mse_params, best_r2_params], keys=['mse', 'r2'], axis=1))

In [None]:
mse_plot_df = perf.loc[best_mse_idx, :].reset_index(level=['lr', 'reg'])
ax = mse_plot_df.unstack().plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')

ax = mse_plot_df.unstack(-2).plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')
plt.show()

In [None]:
r2_plot_df = perf.loc[best_r2_idx, :].reset_index(level=['lr', 'reg'])
ax = r2_plot_df.unstack().plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')

ax = r2_plot_df.unstack(-2).plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')
plt.show()

In [None]:
for modelid in best_mse_idx:
    print(modelid_to_modeldir[modelid])

# LSMS Delta Incountry (OLD)
Random initialization + label=0 augmentation

In [None]:
results_glob_str = os.path.join(LOGS_DIR, 'LSMSDeltaIncountry/*/results.csv')
all_results_df, modelid_to_modeldir = read_results(results_glob_str)
with pd.option_context('display.max_rows', 4):
    display(all_results_df)

In [None]:
perf = (all_results_df
        .groupby(['bands', 'dataset', 'lr', 'reg'])['epoch', 'split', 'r2', 'R2', 'mse']
        .apply(get_best_epoch_mse)
        .reset_index('epoch'))

with pd.option_context('display.max_rows', 500):
    colordisplay(perf, columns='val R2')

In [None]:
best_mse_idx = perf.groupby(['bands', 'dataset'])['val mse'].idxmin()
colordisplay(perf.loc[best_mse_idx, :], 'val mse')

best_r2_idx = perf.groupby(['bands', 'dataset'])['val r2'].idxmax()
colordisplay(perf.loc[best_r2_idx, :], 'val r2')

In [None]:
cols = ['lr', 'reg', 'epoch', 'val mse', 'val r2']
best_mse_params = perf.loc[best_mse_idx, :].reset_index(['lr', 'reg'])[cols]
best_r2_params = perf.loc[best_r2_idx, :].reset_index(['lr', 'reg'])[cols]

with pd.option_context('precision', 3):
    display(pd.concat([best_mse_params, best_r2_params], keys=['mse', 'r2'], axis=1))

In [None]:
mse_plot_df = perf.loc[best_mse_idx, :].reset_index(level=['lr', 'reg'])
ax = mse_plot_df.unstack().plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')

ax = mse_plot_df.unstack(-2).plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')
plt.show()

In [None]:
r2_plot_df = perf.loc[best_r2_idx, :].reset_index(level=['lr', 'reg'])
ax = r2_plot_df.unstack().plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')

ax = r2_plot_df.unstack(-2).plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')
plt.show()

In [None]:
for modelid in best_mse_idx:
    print(modelid_to_modeldir[modelid])

# LSMS Delta Incountry (w/ orig_labels)
Random initialization + label=0 augmentation

In [None]:
results_glob_str = os.path.join(LOGS_DIR, 'LSMSDeltaIncountry_origlabels/*/results.csv')
all_results_df, modelid_to_modeldir = read_results(results_glob_str)
with pd.option_context('display.max_rows', 4):
    display(all_results_df)

In [None]:
perf = (all_results_df
        .groupby(['bands', 'dataset', 'lr', 'reg'])['epoch', 'split', 'r2', 'R2', 'mse']
        .apply(get_best_epoch_mse)
        .reset_index('epoch'))

with pd.option_context('display.max_rows', 500):
    colordisplay(perf, columns='val R2')

In [None]:
best_mse_idx = perf.groupby(['bands', 'dataset'])['val mse'].idxmin()
colordisplay(perf.loc[best_mse_idx, :], 'val mse')

best_r2_idx = perf.groupby(['bands', 'dataset'])['val r2'].idxmax()
colordisplay(perf.loc[best_r2_idx, :], 'val r2')

In [None]:
cols = ['lr', 'reg', 'epoch', 'val mse', 'val r2']
best_mse_params = perf.loc[best_mse_idx, :].reset_index(['lr', 'reg'])[cols]
best_r2_params = perf.loc[best_r2_idx, :].reset_index(['lr', 'reg'])[cols]

with pd.option_context('precision', 3):
    display(pd.concat([best_mse_params, best_r2_params], keys=['mse', 'r2'], axis=1))

In [None]:
mse_plot_df = perf.loc[best_mse_idx, :].reset_index(level=['lr', 'reg'])
ax = mse_plot_df.unstack().plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')

ax = mse_plot_df.unstack(-2).plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')
plt.show()

In [None]:
r2_plot_df = perf.loc[best_r2_idx, :].reset_index(level=['lr', 'reg'])
ax = r2_plot_df.unstack().plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')

ax = r2_plot_df.unstack(-2).plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')
plt.show()

In [None]:
for modelid in best_mse_idx:
    print(modelid_to_modeldir[modelid])

# LSMS Delta Incountry Forward (w/ orig_labels)
Random initialization + label=0 augmentation

In [None]:
results_glob_str = os.path.join(LOGS_DIR, 'LSMSDeltaIncountry_forward_origlabels/*/results.csv')
all_results_df, modelid_to_modeldir = read_results(results_glob_str)
with pd.option_context('display.max_rows', 4):
    display(all_results_df)

In [None]:
perf = (all_results_df
        .groupby(['bands', 'dataset', 'lr', 'reg'])['epoch', 'split', 'r2', 'R2', 'mse']
        .apply(get_best_epoch_mse)
        .reset_index('epoch'))

with pd.option_context('display.max_rows', 500):
    colordisplay(perf, columns='val R2')

In [None]:
best_mse_idx = perf.groupby(['bands', 'dataset'])['val mse'].idxmin()
colordisplay(perf.loc[best_mse_idx, :], 'val mse')

best_r2_idx = perf.groupby(['bands', 'dataset'])['val r2'].idxmax()
colordisplay(perf.loc[best_r2_idx, :], 'val r2')

In [None]:
cols = ['lr', 'reg', 'epoch', 'val mse', 'val r2']
best_mse_params = perf.loc[best_mse_idx, :].reset_index(['lr', 'reg'])[cols]
best_r2_params = perf.loc[best_r2_idx, :].reset_index(['lr', 'reg'])[cols]

with pd.option_context('precision', 3):
    display(pd.concat([best_mse_params, best_r2_params], keys=['mse', 'r2'], axis=1))

In [None]:
mse_plot_df = perf.loc[best_mse_idx, :].reset_index(level=['lr', 'reg'])
ax = mse_plot_df.unstack().plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')

ax = mse_plot_df.unstack(-2).plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')
plt.show()

In [None]:
r2_plot_df = perf.loc[best_r2_idx, :].reset_index(level=['lr', 'reg'])
ax = r2_plot_df.unstack().plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')

ax = r2_plot_df.unstack(-2).plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')
plt.show()

In [None]:
for modelid in best_mse_idx:
    print(modelid_to_modeldir[modelid])

# LSMS Index of Delta Incountry
Random initialization + label=0, bi-dir, and levels augmentation

In [None]:
results_glob_str = os.path.join(LOGS_DIR, 'LSMSIndexOfDeltaIncountry/*/results.csv')
all_results_df, modelid_to_modeldir = read_results(results_glob_str)
with pd.option_context('display.max_rows', 4):
    display(all_results_df)

In [None]:
perf = (all_results_df
        .groupby(['bands', 'dataset', 'lr', 'reg'])[['epoch', 'split', 'r2', 'R2', 'mse']]
        .apply(get_best_epoch_mse)
        .reset_index('epoch'))

with pd.option_context('display.max_rows', 500):
    colordisplay(perf, columns='val R2')

In [None]:
best_mse_idx = perf.groupby(['bands', 'dataset'])['val mse'].idxmin()
colordisplay(perf.loc[best_mse_idx, :], 'val mse')

best_r2_idx = perf.groupby(['bands', 'dataset'])['val r2'].idxmax()
colordisplay(perf.loc[best_r2_idx, :], 'val r2')

In [None]:
cols = ['lr', 'reg', 'epoch', 'val mse', 'val r2']
best_mse_params = perf.loc[best_mse_idx, :].reset_index(['lr', 'reg'])[cols]
best_r2_params = perf.loc[best_r2_idx, :].reset_index(['lr', 'reg'])[cols]

with pd.option_context('precision', 3):
    display(pd.concat([best_mse_params, best_r2_params], keys=['mse', 'r2'], axis=1))

In [None]:
mse_plot_df = perf.loc[best_mse_idx, :].reset_index(level=['lr', 'reg'])
ax = mse_plot_df.unstack().plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')

ax = mse_plot_df.unstack(-2).plot(kind='bar', y='val R2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val R^2')
plt.show()

In [None]:
r2_plot_df = perf.loc[best_r2_idx, :].reset_index(level=['lr', 'reg'])
ax = r2_plot_df.unstack().plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')

ax = r2_plot_df.unstack(-2).plot(kind='bar', y='val r2', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Folds')
ax.set_ylabel('val r^2')
plt.show()

In [None]:
for modelid in best_mse_idx:
    print(modelid_to_modeldir[modelid])

# LSMS Delta Class
Random initialization + label=0 augmentation

In [None]:
results_glob_str = os.path.join(LOGS_DIR, 'LSMSDeltaClassIncountry/*/results.csv')
all_results_df, modelid_to_modeldir = read_results(results_glob_str)
with pd.option_context('display.max_rows', 4):
    display(all_results_df)

In [None]:
perf = (all_results_df
        .groupby(['bands', 'dataset', 'lr', 'reg'])['epoch', 'split', 'loss_xent', 'acc']
        .apply(get_best_epoch_acc)
        .reset_index('epoch'))

with pd.option_context('display.max_rows', 500):
    colordisplay(perf, columns='val acc')

In [None]:
best_acc_idx = perf.groupby(['bands', 'dataset'])['val acc'].idxmax()
colordisplay(perf.loc[best_acc_idx, :], 'val acc')

In [None]:
acc_plot_df = perf.loc[best_acc_idx, :].reset_index(level=['lr', 'reg'])
ax = acc_plot_df.unstack().plot(kind='bar', y='val acc', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val acc')

ax = acc_plot_df.unstack(-2).plot(kind='bar', y='val acc', grid=True)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('val acc')
plt.show()

In [None]:
for modelid in best_acc_idx:
    print(modelid_to_modeldir[modelid])