In [1]:
from tqdm import tqdm
import numpy as np
import pandas as pd
from multiprocess import Pool
from cfs_erf_spatial import erf_spatial
from pytorch_lightning import seed_everything

seed_everything(42);

  from .autonotebook import tqdm as notebook_tqdm
Global seed set to 42


### Load Environment

In [2]:
from spacebench import SpaceEnv
envname = "spacec_elect_cs_ed_above_college_election_dem_pct"
env = SpaceEnv(envname)

### Run spatial and spatial+ for each dataset in parallel

In [3]:
# for each masked variable
dataset_list = tqdm(env.make_all())
with Pool(4) as p: # 4 is the number of processes
    pool_outputs = np.column_stack(
        (
            tqdm(
                p.imap(erf_spatial, # does not preserve order of datasets
                    dataset_list), # each should be a vector of confounding, smoothness, erf_error_spatial, erf_error_spatialplus
                total=len(env.confounding_score_dict) # total number of datasets in env
            )
        )
    )

  pool_outputs = np.column_stack(
  9%|▉         | 4/45 [01:43<17:38, 25.81s/it]
9it [01:43, 11.49s/it]


KeyboardInterrupt: 

### Create table

In [5]:
smoothness_scores = pool_outputs[0, :]
confounding_scores = pool_outputs[1, :] 
erf_error_spatial = pool_outputs[2, :]
erf_error_spatialplus = pool_outputs[3, :]

erf_errors = dict(
    smoothness=["low" if x < 0.5 else "high" for x in smoothness_scores],
    confounding=["low" if x < 0.1 else "high" for x in confounding_scores],
    spatial=erf_error_spatial,
    spatialplus=erf_error_spatialplus,
)
erf_errors = pd.DataFrame(erf_errors)
erf_errors.groupby(["smoothness", "confounding"]).agg(["mean", "std"])

Unnamed: 0_level_0,Unnamed: 1_level_0,spatial,spatial,spatialplus,spatialplus
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
smoothness,confounding,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
high,high,310.610047,106.040477,309.964298,104.444811
high,low,215.8203,151.698644,216.777677,151.967323
low,high,216.324146,42.888639,217.610887,42.348767
low,low,190.616805,52.404172,192.268866,53.233696


### Alternatively, read in from out.csv where results are being saved

In [13]:
pool_outputs = np.loadtxt('out.csv', delimiter=',', skiprows=1).transpose()

smoothness_scores = pool_outputs[0, :]
confounding_scores = pool_outputs[1, :] 
erf_error_spatial = pool_outputs[2, :]
erf_error_spatialplus = pool_outputs[3, :]

erf_errors = dict(
    smoothness=["low" if x < 0.5 else "high" for x in smoothness_scores],
    confounding=["low" if x < 0.1 else "high" for x in confounding_scores],
    spatial=erf_error_spatial,
    spatialplus=erf_error_spatialplus,
)
erf_errors = pd.DataFrame(erf_errors)
erf_errors.groupby(["smoothness", "confounding"]).agg(["mean", "std"])

Unnamed: 0_level_0,Unnamed: 1_level_0,spatial,spatial,spatialplus,spatialplus
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
smoothness,confounding,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
high,low,425.494339,423.796668,425.965964,424.27759
low,high,206.488579,2.105307,207.958599,2.153853
