In [8]:
# imports

import math
import pickle
import os

import matplotlib.pyplot as plt
# import seaborn as sns

import numpy as np
import pandas as pd

import wandb

import functools
import itertools

plt.rcParams['pdf.fonttype'] = 42
plt.rcParams["font.family"] = "Times"
plt.rcParams["font.weight"] = "light"

%matplotlib inline

In [12]:
def download_and_merge(path, filters, keys, verbose=True):
    # get experiments
    api = wandb.Api()
    runs = api.runs(path=path, filters=filters) # filters so loading doesn't take too much time
    print("number of runs: ", len(runs))

    ## loading block suggested by wandb, somewhat modified
    summary_list, config_list, name_list, history_list = [], [], [], []
    for idx, run in enumerate(runs): 
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files 
        summary_list.append(run.summary._json_dict)
        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config = {k: v for k,v in run.config.items()
            if not k.startswith('_')}
        config["id4ana"] = idx
        # .name is the human-readable name of the run.
        config["name"] = run.name
        config_list.append(config)

        history = run.history(keys=keys) #filter so that loading doesn't take too long
        history["id4ana"] = idx
        history_list.append(history)

    ## create config columns
    config_df = pd.DataFrame(config_list) # pandas is amazing, lol

    # look at it
    if verbose:
        print("config table: \n")
        print(config_df.head())
        print(config_df.info())

    #create history_df
    history_df = pd.concat(history_list)
    if verbose:
        print("history table: \n")
        print(history_df)

    #merge
    df = pd.merge(config_df, history_df, on="id4ana")
    if verbose:
        print("final df: \n")
        print(df)
        
    return df

def compute_speedups(df, reference_epoch):
    """
    Inputs:
        df: dataframe, which contains exactly 2 experiment conditions (which potentially multiple seeds per condition, and redloss can have additional seeds over uniform, but not the other way around):
            all uniform seeds
            all seeds of redloss, to be compared against uniform. (does not accept different redloss runs, e.g. with different IrLoMos)
        reference_epoch, int: will compare redloss against the maximum accuracy that uniform has reached by that epoch.
    Outputs:
        speedups: list of speedups, one for each seed. If redloss never reaches the acc of uniform at the target epoch, the speedup is 0
    """
    df = df.sort_values("epoch")
    seeds = df[df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection"]["seed"].unique()
    unif_seeds = df[df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection"]["seed"].unique()
    if len(seeds) == 0:
        print("no redloss runs, can't compute speedup")
        return [np.nan]
    if len(unif_seeds) == 0:
        print("no uniform runs, can't compute speedup")
        return [np.nan]
    speedups = []
    for seed in seeds:
        seed_df = df[df["seed"] == seed]
        if len(seed_df["selection_method/_target_"].unique()) == 2: # both selection methods have this seed
            unif_df = seed_df[(seed_df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection")]
            redloss_df = seed_df[(seed_df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection")]
            speedup = _compute_speedup(unif_df, redloss_df, reference_epoch)
        elif (len(seed_df["selection_method/_target_"].unique()) == 1): # only redloss method has this seed, take uniform from another seed
            unif_seed_df = df[df["seed"] == unif_seeds[0]]
            unif_df = unif_seed_df[(unif_seed_df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection")]
            redloss_df = seed_df[(seed_df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection")]
            speedup = _compute_speedup(unif_df, redloss_df, reference_epoch)
        else:
            print("this probably shouldn't happen")
            speedup = np.nan        
        speedups.append(speedup)
    return speedups

def _compute_speedup(unif_df, redloss_df, reference_epoch):
    unif_df["cummax_val_acc_epoch"] = unif_df.cummax()["val_acc_epoch"]
    unif_acc_at_reference = unif_df.loc[unif_df["epoch"] == reference_epoch, "cummax_val_acc_epoch"].iloc[0]
    if redloss_df["val_acc_epoch"].max() >= unif_acc_at_reference:
        rows_at_which_redloss_greater_unif_acc_at_reference = (redloss_df["val_acc_epoch"] >= unif_acc_at_reference)
        epoch_at_which_redloss_greater_unif_acc_at_reference = redloss_df[rows_at_which_redloss_greater_unif_acc_at_reference].iloc[0]["epoch"]
        speedup = reference_epoch/ epoch_at_which_redloss_greater_unif_acc_at_reference
    else:
        speedup = 0
    return speedup



# Loading, merging, and saving all dfs

In [None]:
dfs = {}
save_dir_dfs = "dfs"

## CIFAR-10

In [5]:
#####----- CIFAR10 -----#####
### hyperparameter transfer experiments
path = "goldiprox/jb_cifar10"
filters = {"tags": "exp_vary_target_hypers"}
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# replace any nans
df.loc[df["optimizer/weight_decay"].isna(), "optimizer/weight_decay"] = 0.01
weight_decays = df["optimizer/weight_decay"].unique()

# remove runs
df = df[df["seed"]==12]

# save
exp_name = "CIFAR10_hypers"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### architecture transfer experiments
path = "goldiprox/jb_cifar10"
filters = {"tags": "exp_vary_target_arch"}
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CIFAR10_archs"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### holdout set ablation experiments
path = "goldiprox/jb_cifar10"
filters = {"tags": "fraction_valset_exp"}
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CIFAR10_holdout_set"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### double IrLoMo experiments
path = "goldiprox/jb_cifar10"
filters = {"tags": "double_irlomo"}
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CIFAR10_double_IrLoMo"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### small CNN
path = "goldiprox/jb_cifar10"
filters = {"tags": "IrLoMo_ablation_4_ICLR_revision"}
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CIFAR10_small_CNN"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### default
path = "goldiprox/jb_cifar10"
filters = {"tags": "default_values_4_ICLR_revision"}
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CIFAR10_default"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)




number of runs:  86
number of runs:  48
number of runs:  18
number of runs:  6
number of runs:  6
number of runs:  6


## CINIC-10

In [6]:
#####----- CINIC10 -----#####
### hyperparameter transfer experiments
path = "goldiprox/cinic10_hyperparam_transfer"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# replace any nans
df.loc[df["optimizer/weight_decay"].isna(), "optimizer/weight_decay"] = 0.01
weight_decays = df["optimizer/weight_decay"].unique()

# remove runs
df = df[df["seed"]==1]

# save
exp_name = "CINIC10_hypers"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### architecture transfer experiments
path = "goldiprox/cinic_transfer"
filters = {"tags": "exp_vary_target_arch"}
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CINIC10_archs"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### holdout set ablation experiments
path = "goldiprox/cinic10_holdout_ablation"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# filter out irlomo training runs
df = df[~df["irreducible_loss_generator/f"].isna()]

# save
exp_name = "CINIC10_holdout_set"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### double IrLoMo experiments
path = "goldiprox/cinic10_holdout_ablation"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CINIC10_double_IrLoMo"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### small CNN
path = "goldiprox/cinic10_holdout_ablation"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CINIC10_small_CNN"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### default
path = "goldiprox/cinic10_holdout_ablation"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CINIC10_default"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)




number of runs:  72
number of runs:  46
number of runs:  44
number of runs:  44
number of runs:  44
number of runs:  44


## CIFAR-100

In [9]:
#####---- CIFAR100 -----#####
### hyperparameter transfer experiments
path = "goldiprox/cifar100_hyper_ablation"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# replace any nans
df.loc[df["optimizer/weight_decay"].isna(), "optimizer/weight_decay"] = 0.01

# remove runs
df = df[df["seed"]==12]

# save
exp_name = "CIFAR100_hypers"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### architecture transfer experiments
path = "goldiprox/cifar100_transfer_ablations"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# filter out IrLoMo training runs
df = df[~df["model/large_model/_target_"].isna()]

# filter on the correct eval set, but not available for two architectures
models_w_valset_runs_only = ["src.models.modules.cifar_model_zoo.inception.inception_v3", "src.models.modules.cifar_model_zoo.googlenet.googlenet"]
df = df[(df["eval_set"] == "test") | df["model/large_model/_target_"].isin(models_w_valset_runs_only)]

# save
exp_name = "CIFAR100_archs"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### holdout set ablation experiments
path = "goldiprox/cifar100_holdout_ablations"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# filter out potential IrLoMo training runs
df = df[~df["irreducible_loss_generator/checkpoint_path"].isna()]

# save
exp_name = "CIFAR100_holdout_set"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### double IrLoMo experiments
path = "goldiprox/cifar100_double_ablation"
filters = {"tags": "double_irlomo_figure_for_ICLR_revision"}
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# save
exp_name = "CIFAR100_double_IrLoMo"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### small CNN
path = "goldiprox/cifar100_holdout_ablations"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# filter out potential IrLoMo training runs
df = df[~df["irreducible_loss_generator/checkpoint_path"].isna()]

# save
exp_name = "CIFAR100_small_CNN"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)


### default
path = "goldiprox/cifar100_holdout_ablations"
filters = None
keys = ["trainer/global_step", "val_acc_epoch", "epoch"]

df = download_and_merge(path, filters, keys, verbose=False)

# filter out potential IrLoMo training runs
df = df[~df["irreducible_loss_generator/checkpoint_path"].isna()]

# save
exp_name = "CIFAR100_default"
dfs[exp_name] = df
with open(os.path.join(save_dir_dfs, exp_name + "_df.pkl"), "wb") as f:
    pickle.dump(df, f)




number of runs:  77
number of runs:  51
number of runs:  22
number of runs:  9
number of runs:  22
number of runs:  22


# Computing speedups

In [9]:
all_speedups = {}
save_dir_dfs = "dfs"
save_dir_speedups = "speedups"

def load_df(exp_name, path=save_dir_dfs):
    try:
        df = dfs[exp_name] #yes, yes, I probably shouldn't do it like this. Just don't call anything dfs, OK :-)
    except:
        with open(os.path.join(path, exp_name + "_df.pkl"), "rb") as f:
            df = pickle.load(f)
    return df

## CIFAR-10

### settings

In [10]:
# epoch in uniform training in reference to which the speedups are computed
cifar10_reference_epoch = 99

# determines which irreducible loss model to use for computing the speedups.
# Options are ("redloss_w_resnet18", "redloss_w_sCNN"). Does not influence
# "default" or "small_CNN" experiments, of course (these are alwas Resnet18 and
# small CNN, respectively)
cifar10_which_irlomo = "redloss_w_sCNN"

### computation

In [13]:
#####------------------------------#####
exp_name = "CIFAR10_hypers"
reference_epoch = cifar10_reference_epoch
which_irlomo = cifar10_which_irlomo

df = load_df(exp_name)

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/users/janner/goldiprox-hydra/outputs/2021-10-05/08-42-42-resnet-by-loss/checkpoints/irred_losses_and_checks.pt"), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/users/janner/goldiprox-hydra/outputs/2021-10-05/09-00-40-small-cnn-sel-4-loss/checkpoints/irred_losses_and_checks.pt") 
}

filtered = df[conditions["uniform"] | conditions[which_irlomo]]
grouped = filtered.groupby(["optimizer/lr", "datamodule/batch_size", "optimizer/weight_decay"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(group, reference_epoch)
    speedups[name] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CIFAR10_archs"
reference_epoch = cifar10_reference_epoch
which_irlomo = cifar10_which_irlomo

df = load_df(exp_name)

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/users/janner/goldiprox-hydra/outputs/2021-10-05/08-42-42-resnet-by-loss/checkpoints/irred_losses_and_checks.pt"), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/users/janner/goldiprox-hydra/outputs/2021-10-05/09-00-40-small-cnn-sel-4-loss/checkpoints/irred_losses_and_checks.pt") 
}

filtered = df[conditions["uniform"] | conditions["redloss_w_sCNN"]]
grouped = filtered.groupby(["model/large_model/_target_"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(group, reference_epoch)
    speedups[name] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CIFAR10_holdout_set"
reference_epoch = cifar10_reference_epoch
which_irlomo = cifar10_which_irlomo

df = load_df(exp_name)

irlomo_lookup_reverse = {
    "Resnet18, 0.75": "/users/janner/goldiprox-hydra/logs/runs/2021-11-16/14-40-48/checkpoints/irred_losses_and_checks.pt",
    "Resnet18, 0.5": "/users/janner/goldiprox-hydra/logs/runs/2021-11-16/14-41-23/checkpoints/irred_losses_and_checks.pt",
    "Resnet18, 0.33": "/users/janner/goldiprox-hydra/logs/runs/2021-11-16/14-40-35/checkpoints/irred_losses_and_checks.pt",
    "Resnet18, 0.25": "/users/janner/goldiprox-hydra/logs/runs/2021-11-16/14-41-05/checkpoints/irred_losses_and_checks.pt",
    "small CNN, 0.75": "/users/janner/goldiprox-hydra/logs/runs/2021-11-16/15-00-17/checkpoints/irred_losses_and_checks.pt",
    "small CNN, 0.5": "/users/janner/goldiprox-hydra/logs/runs/2021-11-16/15-00-00/checkpoints/irred_losses_and_checks.pt",
    "small CNN, 0.33": "/users/janner/goldiprox-hydra/logs/runs/2021-11-16/15-00-50/checkpoints/irred_losses_and_checks.pt",
    "small CNN, 0.25": "/users/janner/goldiprox-hydra/logs/runs/2021-11-16/15-00-33/checkpoints/irred_losses_and_checks.pt",
}

irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "Resnet18" in k])), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "small CNN" in k])),
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions[which_irlomo]]

grouped = redloss_df.groupby(["irreducible_loss_generator/f"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(pd.concat((group, unif_df)), reference_epoch)
    speedups[irlomo_lookup[name]] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CIFAR10_double_IrLoMo"
reference_epoch = cifar10_reference_epoch
which_irlomo = cifar10_which_irlomo

df = load_df(exp_name)

irlomo_lookup_reverse = {
    "Resnet18, double IrLoMo": "/users/janner/goldiprox-hydra/logs/runs/2021-11-17/17-46-03/checkpoints/irred_losses_and_checks_double_irrlomo.pt",
    "small CNN, double IrLoMo": "/users/janner/goldiprox-hydra/logs/runs/2021-11-17/17-52-13/checkpoints/irred_losses_and_checks_double_irrlomo.pt",
}

irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "Resnet18" in k])), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "small CNN" in k])),
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions[which_irlomo]]

grouped = redloss_df.groupby(["irreducible_loss_generator/f"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(pd.concat((group, unif_df)), reference_epoch)
    speedups[irlomo_lookup[name]] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CIFAR10_small_CNN"
reference_epoch = cifar10_reference_epoch

df = load_df(exp_name)

speedups = {}
speedup = compute_speedups(df, reference_epoch)
speedups["small_cnn"] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups

#####------------------------------#####
exp_name = "CIFAR10_default"
reference_epoch = cifar10_reference_epoch

df = load_df(exp_name)

speedups = {}
speedup = compute_speedups(df, reference_epoch)
speedups["default"] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unif_df["cummax_val_acc_epoch"] = unif_df.cummax()["val_acc_epoch"]


{(0.0001, 160, 0.001): [2.127659574468085], (0.0001, 160, 0.01): [2.0833333333333335], (0.0001, 160, 0.1): [2.127659574468085], (0.0001, 320, 0.001): [2.0833333333333335], (0.0001, 320, 0.01): [2.0], (0.0001, 320, 0.1): [2.0833333333333335], (0.0001, 960, 0.001): [1.5384615384615385], (0.0001, 960, 0.01): [1.5625], (0.0001, 960, 0.1): [1.639344262295082], (0.001, 160, 0.001): [2.857142857142857], (0.001, 160, 0.01): [2.857142857142857], (0.001, 160, 0.1): [3.4482758620689653], (0.001, 320, 0.001): [2.5641025641025643], (0.001, 320, 0.01): [2.857142857142857], (0.001, 320, 0.1): [3.225806451612903], (0.001, 960, 0.001): [1.7857142857142858], (0.001, 960, 0.01): [1.639344262295082], (0.001, 960, 0.1): [2.0], (0.01, 160, 0.001): [1.6129032258064515], (0.01, 160, 0.01): [2.0], (0.01, 160, 0.1): [2.3255813953488373], (0.01, 320, 0.001): [1.492537313432836], (0.01, 320, 0.01): [1.8181818181818181], (0.01, 320, 0.1): [2.0833333333333335], (0.01, 960, 0.001): [0.970873786407767], (0.01, 960, 0

## CINIC-10

### settings

In [14]:
# epoch in uniform training in reference to which the speedups are computed
cinic10_reference_epoch = 99

# determines which irreducible loss model to use for computing the speedups.
# Options are ("redloss_w_resnet18", "redloss_w_sCNN"). Does not influence
# "default" or "small_CNN" experiments, of course (these are alwas Resnet18 and
# small CNN, respectively)
cinic10_which_irlomo = "redloss_w_sCNN"

### computation

In [20]:
#####------------------------------#####
exp_name = "CINIC10_hypers"
reference_epoch = cinic10_reference_epoch
which_irlomo = cinic10_which_irlomo

df = load_df(exp_name)

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-10-01/13-18-09/checkpoints/irred_losses_and_checks_degraded_1.pt"), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-16/14-46-16/checkpoints/irred_losses_and_checks_degraded_1.pt") 
}

filtered = df[conditions["uniform"] | conditions["redloss_w_sCNN"]]
grouped = filtered.groupby(["optimizer/lr", "datamodule/batch_size", "optimizer/weight_decay"])
speedups = {}
for name, group in grouped:
    # print(group)
    speedup = compute_speedups(group, reference_epoch)
    speedups[name] = speedup

print(speedups)
with open("CIFAR10_hypers.pkl", "wb") as f:
    pickle.dump(speedups, f)


all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CINIC10_archs"
reference_epoch = cinic10_reference_epoch
which_irlomo = cinic10_which_irlomo

df = load_df(exp_name)

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-10-01/13-18-09/checkpoints/irred_losses_and_checks_degraded_1.pt"), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-16/14-46-16/checkpoints/irred_losses_and_checks_degraded_1.pt") 
}

filtered = df[conditions["uniform"] | conditions[which_irlomo]]
grouped = filtered.groupby(["model/large_model/_target_"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(group, reference_epoch)
    speedups[name] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CINIC10_holdout_set"
reference_epoch = cinic10_reference_epoch
which_irlomo = cinic10_which_irlomo

df = load_df(exp_name)

irlomo_lookup_reverse = {
    "Resnet18, 1": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-10-01/13-18-09/checkpoints/irred_losses_and_checks_degraded_1.pt",
    "Resnet18, 0.75": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-18/16-28-33/checkpoints/irred_losses_and_checks.pt",
    "Resnet18, 0.5": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-18/16-28-25/checkpoints/irred_losses_and_checks.pt",
    "Resnet18, 0.33": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-18/16-28-13/checkpoints/irred_losses_and_checks.pt",
    "Resnet18, 0.25": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-18/16-27-58/checkpoints/irred_losses_and_checks.pt",
    "small CNN, 1": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-16/14-46-16/checkpoints/irred_losses_and_checks_degraded_1.pt",
    "small CNN, 0.75": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-18/16-41-55/checkpoints/irred_losses_and_checks.pt",
    "small CNN, 0.5": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-18/16-29-10/checkpoints/irred_losses_and_checks.pt",
    "small CNN, 0.33": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-19/14-33-56/checkpoints/irred_losses_and_checks.pt",
    "small CNN, 0.25": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-18/16-29-06/checkpoints/irred_losses_and_checks.pt",
}

irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "Resnet18" in k])), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "small CNN" in k])),
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions[which_irlomo]]

grouped = redloss_df.groupby(["irreducible_loss_generator/f"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(pd.concat((group, unif_df)), reference_epoch)
    speedups[irlomo_lookup[name]] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CINIC10_double_IrLoMo"
reference_epoch = cinic10_reference_epoch
which_irlomo = cinic10_which_irlomo

df = load_df(exp_name)

irlomo_lookup_reverse = {
    "Resnet18, double IrLoMo": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-20/17-33-49/checkpoints/irred_losses_and_checks_double_irrlomo.pt",
    "small CNN, double IrLoMo": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-20/17-33-32/checkpoints/irred_losses_and_checks_double_irrlomo.pt",
}
irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "Resnet18" in k])), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "small CNN" in k])),
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions[which_irlomo]]

grouped = redloss_df.groupby(["irreducible_loss_generator/f"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(pd.concat((group, unif_df)), reference_epoch)
    speedups[irlomo_lookup[name]] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CINIC10_small_CNN"
reference_epoch = cinic10_reference_epoch

df = load_df(exp_name)

irlomo_lookup_reverse = {
    "small CNN, 1": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-11-16/14-46-16/checkpoints/irred_losses_and_checks_degraded_1.pt",
}

irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "small CNN" in k])),
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions["redloss_w_sCNN"]]

speedups = {}
speedup = compute_speedups(pd.concat((redloss_df, unif_df)), reference_epoch)
speedups["small_cnn"] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CINIC10_default"
reference_epoch = cinic10_reference_epoch

df = load_df(exp_name)

irlomo_lookup_reverse = {
    "Resnet18, 1": "/data/stats-sgmcmc/magd5198/goldiprox-hydra/logs/runs/2021-10-01/13-18-09/checkpoints/irred_losses_and_checks_degraded_1.pt",
}

irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "Resnet18" in k])), 
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions["redloss_w_resnet18"]]

speedups = {}
speedup = compute_speedups(pd.concat((redloss_df, unif_df)), reference_epoch)
speedups["default"] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups

no redloss runs, can't compute speedup
no redloss runs, can't compute speedup
no redloss runs, can't compute speedup
no redloss runs, can't compute speedup
no uniform runs, can't compute speedup
no redloss runs, can't compute speedup
no redloss runs, can't compute speedup
no redloss runs, can't compute speedup
no redloss runs, can't compute speedup
{(0.0001, 160, 0.001): [nan], (0.0001, 160, 0.1): [nan], (0.0001, 640, 0.001): [1.8181818181818181], (0.0001, 640, 0.01): [nan], (0.0001, 640, 0.1): [1.8867924528301887], (0.0001, 960, 0.001): [1.6129032258064515], (0.0001, 960, 0.01): [1.639344262295082], (0.0001, 960, 0.1): [1.7241379310344827], (0.001, 160, 0.001): [2.380952380952381], (0.001, 160, 0.01): [2.380952380952381], (0.001, 160, 0.1): [4.545454545454546], (0.001, 640, 0.001): [nan], (0.001, 640, 0.01): [nan], (0.001, 640, 0.1): [2.272727272727273], (0.001, 960, 0.001): [1.5873015873015872], (0.001, 960, 0.01): [1.5873015873015872], (0.001, 960, 0.1): [2.2222222222222223], (0.01,

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unif_df["cummax_val_acc_epoch"] = unif_df.cummax()["val_acc_epoch"]


{'small CNN, 1': [1.9230769230769231, 1.7543859649122806, 1.6129032258064515], 'small CNN, 0.25': [1.2195121951219512, 0.7299270072992701], 'small CNN, 0.5': [1.5151515151515151, 1.2658227848101267], 'small CNN, 0.75': [1.639344262295082, 1.5625], 'small CNN, 0.33': [1.1764705882352942, 1.2987012987012987]}
{'small CNN, double IrLoMo': [1.1627906976744187, 1.0204081632653061]}
{'small_cnn': [1.9230769230769231, 1.7543859649122806, 1.6129032258064515]}
{'default': [1.7857142857142858, 1.8181818181818181]}


## CIFAR-100

### settings

In [24]:
# epoch in uniform training in reference to which the speedups are computed
cifar100_reference_epoch = 99

# determines which irreducible loss model to use for computing the speedups.
# Options are ("redloss_w_resnet18", "redloss_w_sCNN"). Does not influence
# "default" or "small_CNN" experiments, of course (these are alwas Resnet18 and
# small CNN, respectively)
cifar100_which_irlomo = "redloss_w_sCNN"

### computation

In [25]:
#####------------------------------#####
exp_name = "CIFAR100_hypers"
reference_epoch = cifar100_reference_epoch
which_irlomo = cifar100_which_irlomo

df = load_df(exp_name)

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/home/yarin.oxford/mo/goldiprox-hydra/logs/multiruns/2021-11-20_09-02-23/0/checkpoints/irred_losses_and_checks.pt"), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"] == "/home/yarin.oxford/mo/goldiprox-hydra/logs/multiruns/2021-11-20_09-02-56/0/checkpoints/irred_losses_and_checks.pt") 
}

filtered = df[conditions["uniform"] | conditions[which_irlomo]]
grouped = filtered.groupby(["optimizer/lr", "datamodule/batch_size", "optimizer/weight_decay"])
speedups = {}
for name, group in grouped:
    # print(group)
    speedup = compute_speedups(group, reference_epoch)
    speedups[name] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CIFAR100_archs"
reference_epoch = cifar100_reference_epoch
which_irlomo = cifar100_which_irlomo

df = load_df(exp_name)

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") # no resnet IrLoMo runs available here
}

filtered = df[conditions["uniform"] | conditions[which_irlomo]]
grouped = filtered.groupby(["model/large_model/_target_"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(group, reference_epoch)
    speedups[name] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CIFAR100_holdout_set"
reference_epoch = cifar100_reference_epoch
which_irlomo = cifar100_which_irlomo

df = load_df(exp_name)

# automatic generation of irlomo_loookup_tabel from tags
irlomo_lookup_reverse_temp = {
    "Resnet18, 0.75": ("0.75", "resnet"),
    "Resnet18, 0.5": ("0.5", "resnet"),
    "Resnet18, 0.33": ("0.33", "resnet"),
    "Resnet18, 0.25": ("0.25", "resnet"),
    "small CNN, 0.75": ("0.75", "small_cnn"),
    "small CNN, 0.5": ("0.5", "small_cnn"),
    "small CNN, 0.33": ("0.33", "small_cnn"),
    "small CNN, 0.25": ("0.25", "small_cnn"),
}

irlomo_lookup_reverse = {}

for k,v in irlomo_lookup_reverse_temp.items():
    df_subset = df[df["logger/wandb/tags"].apply(lambda x: x == list(v))]
    assert len(df_subset["irreducible_loss_generator/checkpoint_path"].unique()) == 1
    irlomo_lookup_reverse[k] = df_subset["irreducible_loss_generator/checkpoint_path"].iloc[0]

irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/checkpoint_path"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "Resnet18" in k])), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/checkpoint_path"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "small CNN" in k])),
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions[which_irlomo]]

grouped = redloss_df.groupby(["irreducible_loss_generator/checkpoint_path"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(pd.concat((group, unif_df)), reference_epoch)
    speedups[irlomo_lookup[name]] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CIFAR100_double_IrLoMo"
reference_epoch = cifar100_reference_epoch
which_irlomo = cifar100_which_irlomo

df = load_df(exp_name)

# automatic generation of irlomo_loookup_tabel from tags
irlomo_lookup_reverse_temp = {
    "Resnet18, double IrLoMo": "resnet",
    "small CNN, double IrLoMo": "small_cnn",
}

irlomo_lookup_reverse = {}

for k,v in irlomo_lookup_reverse_temp.items():
    df_subset = df[df["logger/wandb/tags"].apply(lambda x: v in x)]
    assert len(df_subset["irreducible_loss_generator/f"].unique()) == 1
    irlomo_lookup_reverse[k] = df_subset["irreducible_loss_generator/f"].iloc[0]

irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "Resnet18" in k])), 
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/f"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "small CNN" in k])),
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions[which_irlomo]]

grouped = redloss_df.groupby(["irreducible_loss_generator/f"])
speedups = {}
for name, group in grouped:
    speedup = compute_speedups(pd.concat((group, unif_df)), reference_epoch)
    speedups[irlomo_lookup[name]] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CIFAR100_small_CNN"
reference_epoch = cifar100_reference_epoch

df = load_df(exp_name)

# automatic generation of irlomo_loookup_tabel from tags
irlomo_lookup_reverse_temp = {
    "small CNN, 1": ("one", "smallcnn"),
}

irlomo_lookup_reverse = {}

for k,v in irlomo_lookup_reverse_temp.items():
    df_subset = df[df["logger/wandb/tags"].apply(lambda x: x == list(v))]
    assert len(df_subset["irreducible_loss_generator/checkpoint_path"].unique()) == 1
    irlomo_lookup_reverse[k] = df_subset["irreducible_loss_generator/checkpoint_path"].iloc[0]

irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_sCNN": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/checkpoint_path"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "small CNN" in k])),
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions["redloss_w_sCNN"]]

speedups = {}
speedup = compute_speedups(pd.concat((redloss_df, unif_df)), reference_epoch)
speedups["small_cnn"] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups


#####------------------------------#####
exp_name = "CIFAR100_default"
reference_epoch = cifar100_reference_epoch

df = load_df(exp_name)

# automatic generation of irlomo_loookup_tabel from tags
irlomo_lookup_reverse_temp = {
    "Resnet18, 1": ("one", "resnet"),
}

irlomo_lookup_reverse = {}

for k,v in irlomo_lookup_reverse_temp.items():
    df_subset = df[df["logger/wandb/tags"].apply(lambda x: x == list(v))]
    assert len(df_subset["irreducible_loss_generator/checkpoint_path"].unique()) == 1
    irlomo_lookup_reverse[k] = df_subset["irreducible_loss_generator/checkpoint_path"].iloc[0]

irlomo_lookup = {v: k for (k,v) in irlomo_lookup_reverse.items()}

conditions = {
    "uniform": df["selection_method/_target_"] == "src.curricula.selection_methods.uniform_selection",
    "redloss_w_resnet18": (df["selection_method/_target_"] == "src.curricula.selection_methods.reducible_loss_selection") & (df["irreducible_loss_generator/checkpoint_path"].isin([v for (k,v) in irlomo_lookup_reverse.items() if "Resnet18" in k])), 
}

unif_df = df[conditions["uniform"]]
redloss_df = df[conditions["redloss_w_resnet18"]]

speedups = {}
speedup = compute_speedups(pd.concat((redloss_df, unif_df)), reference_epoch)
speedups["default"] = speedup

print(speedups)
with open(os.path.join(save_dir_speedups, exp_name + ".pkl"), "wb") as f:
    pickle.dump(speedups, f)

all_speedups[exp_name] = speedups

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  unif_df["cummax_val_acc_epoch"] = unif_df.cummax()["val_acc_epoch"]


no redloss runs, can't compute speedup
{(0.0001, 160, 0.001): [4.95], (0.0001, 160, 0.01): [4.5], (0.0001, 160, 0.1): [4.5], (0.0001, 320, 0.001): [4.125], (0.0001, 320, 0.01): [4.304347826086956], (0.0001, 320, 0.1): [3.6666666666666665], (0.0001, 960, 0.001): [0], (0.0001, 960, 0.01): [1.706896551724138], (0.0001, 960, 0.1): [1.736842105263158], (0.001, 160, 0.001): [3.5357142857142856], (0.001, 160, 0.01): [3.6666666666666665], (0.001, 160, 0.1): [3.8076923076923075], (0.001, 320, 0.001): [3.413793103448276], (0.001, 320, 0.1): [3.6666666666666665], (0.001, 960, 0.001): [nan], (0.001, 960, 0.01): [3.3], (0.001, 960, 0.1): [2.6052631578947367], (0.01, 160, 0.001): [2.5384615384615383], (0.01, 160, 0.01): [2.675675675675676], (0.01, 160, 0.1): [6.1875], (0.01, 320, 0.001): [2.6052631578947367], (0.01, 320, 0.01): [2.357142857142857], (0.01, 320, 0.1): [3.5357142857142856], (0.01, 960, 0.001): [1.98], (0.01, 960, 0.01): [1.4558823529411764], (0.01, 960, 0.1): [2.357142857142857]}
{'src

# show all data

In [27]:
for k,v in all_speedups.items():
    print(k + "\n")
    print(v)
    print("\n\n")



CIFAR10_hypers

{(0.0001, 160, 0.001): [2.127659574468085], (0.0001, 160, 0.01): [2.0833333333333335], (0.0001, 160, 0.1): [2.127659574468085], (0.0001, 320, 0.001): [2.0833333333333335], (0.0001, 320, 0.01): [2.0], (0.0001, 320, 0.1): [2.0833333333333335], (0.0001, 960, 0.001): [1.5384615384615385], (0.0001, 960, 0.01): [1.5625], (0.0001, 960, 0.1): [1.639344262295082], (0.001, 160, 0.001): [2.857142857142857], (0.001, 160, 0.01): [2.857142857142857], (0.001, 160, 0.1): [3.4482758620689653], (0.001, 320, 0.001): [2.5641025641025643], (0.001, 320, 0.01): [2.857142857142857], (0.001, 320, 0.1): [3.225806451612903], (0.001, 960, 0.001): [1.7857142857142858], (0.001, 960, 0.01): [1.639344262295082], (0.001, 960, 0.1): [2.0], (0.01, 160, 0.001): [1.6129032258064515], (0.01, 160, 0.01): [2.0], (0.01, 160, 0.1): [2.3255813953488373], (0.01, 320, 0.001): [1.492537313432836], (0.01, 320, 0.01): [1.8181818181818181], (0.01, 320, 0.1): [2.0833333333333335], (0.01, 960, 0.001): [0.970873786407767

# Plotting

In [None]:
def load_speedups(exp_name, path=""):
    try:
        speedups = all_speedups[exp_name] #yes, yes, I probably shouldn't do it like this. Just don't call anything dfs, OK :-)
    except:
        with open(path + exp_name + ".pkl", "rb") as f:
            speedups = pickle.load(f)
    return speedups