In [267]:
import numpy as np
import pandas as pd
import os
import shutil
from subprocess import Popen, PIPE
from IPython.display import clear_output
import time

In [273]:
PYPATH = '/home/shibal/anaconda3/envs/jasa/bin/python'
FILEPATH = '/home/shibal/Additive-Models-with-Structured-Interactions/SparseAMsWithInteractions/src/AMsWithInteractionsL0/AMsWithInteractionsL0-Synthetic.py'
PATH = "/pool001/shibal/results-synthetic"

dataset = 'large-synthetic'
version = 23
# n = 100
r = 1.5
dist = 'normal'
train_size = 1000

In [274]:
def make_bash_file(seed, version, dist, r):
    bash_folder_path = f"{PATH}/bashes/{dataset}/{dist}/v{version}_r{r}"
    bash_file_path = os.path.join(bash_folder_path, "seed{}.sh".format(seed))
    log_path = f"{PATH}/logs/{dataset}/{dist}/v{version}_r{r}/seed{seed}"
    os.makedirs(bash_folder_path,exist_ok=True)
    os.makedirs(log_path,exist_ok=True)
    with open(bash_file_path,"w") as f:
        f.write("#!/bin/bash\n")
        f.write("#SBATCH --cpus-per-task=2\n")
        f.write("#SBATCH --time=1-00:00\n")
        f.write("#SBATCH --mem=16G\n")
#         f.write("#SBATCH -p sched_mit_sloan_batch\n")
        f.write("#SBATCH -p sched_mit_sloan_interactive\n")
        f.write("#SBATCH --mail-type=FAIL\n")
        f.write("#SBATCH --mail-user=shibal@mit.edu\n")
        f.write(f"#SBATCH -o {log_path}/seed{seed}_%j.out\n")
        f.write(f"#SBATCH -e {log_path}/_seed{seed}_%j.err\n\n")
        f.write("module load sloan/python/modules/python-3.6/gurobipy/9.0.1\n\n")
        f.write(f"{PYPATH} -u {FILEPATH}  --dataset {dataset} --dist {dist} --seed {seed} --train_size {train_size} --version {version} --r {r} |& tee -a {log_path}/output_{train_size}.txt\n\n") 
    return bash_file_path


In [275]:
bash_files = []
seeds = np.arange(25)
for seed in seeds:
    bash_files.append(make_bash_file(seed, version, dist, r))

In [276]:
torun = seeds
# torun = range(1,6)
submitted = []
print(len(torun))

25


In [277]:
exit_code = 1
for i, seed in enumerate(seeds):
    if i % 100 == 0:
        clear_output(wait=True)
    print(i)
    sh = make_bash_file(seed, version, dist, r)
    while True:
        process = Popen(["sbatch",sh], stdout=PIPE)
        (output, err) = process.communicate()
        exit_code = process.wait()
        print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),output,err)
        if exit_code == 0:
            print(sh,"submitted!")
            tmp_id = str(output)[-11:-3]
            print("job id:", tmp_id)
            submitted.append(tmp_id)
            break
        time.sleep(10000)

0
2023-08-24 01:52:46 b'Submitted batch job 51308599\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/v23_r1.0/seed0.sh submitted!
job id: 51308599
1
2023-08-24 01:52:46 b'Submitted batch job 51308600\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/v23_r1.0/seed1.sh submitted!
job id: 51308600
2
2023-08-24 01:52:47 b'Submitted batch job 51308601\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/v23_r1.0/seed2.sh submitted!
job id: 51308601
3
2023-08-24 01:52:47 b'Submitted batch job 51308602\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/v23_r1.0/seed3.sh submitted!
job id: 51308602
4
2023-08-24 01:52:47 b'Submitted batch job 51308603\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/v23_r1.0/seed4.sh submitted!
job id: 51308603
5
2023-08-24 01:52:48 b'Submitted batch job 51308604\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/v23_r1.0/seed5.sh su

In [142]:
# command = """/home/shibal/anaconda3/envs/jasa/bin/python -u /home/shibal/Additive-Models-with-Structured-Interactions/SparseAMsWithInteractions/src/AMsWithInteractionsL0/AMsWithInteractionsL0-Synthetic.py  --dataset large-synthetic --dist normal --seed 0 --train_size 200 --version 19 --r 1.0 |& tee -a /pool001/shibal/results-synthetic/logs/large-synthetic/normal/v19_r1.0/seed0/output_200.txt"""

In [143]:
# !{command}

In [144]:
from subprocess import Popen, PIPE

In [266]:
for job in range(51305446, 51305467):
    process = Popen(['scancel',str(job)], stdout=PIPE)
    (output, err) = process.communicate()
    exit_code = process.wait()
    if exit_code ==0:
        print(job, "deleted!")

51305446 deleted!
51305447 deleted!
51305448 deleted!
51305449 deleted!
51305450 deleted!
51305451 deleted!
51305452 deleted!
51305453 deleted!
51305454 deleted!
51305455 deleted!
51305456 deleted!
51305457 deleted!
51305458 deleted!
51305459 deleted!
51305460 deleted!
51305461 deleted!
51305462 deleted!
51305463 deleted!
51305464 deleted!
51305465 deleted!
51305466 deleted!


In [33]:
for seed in range(100):
    command = f"""rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed{seed}/AMsWithInteractionsL0/v13/r1.0"""
    print(command)
    !{command}

rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed0/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed1/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed2/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed3/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed4/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed5/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed6/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed7/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed8/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic

rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed78/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed79/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed80/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed81/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed82/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed83/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed84/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed85/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed86/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/hetero

In [2]:
import numpy as np

In [13]:
MISE = []
mains = []
interactions = []
fprs_main = []
fnrs_main = []
f1s_main = []
fprs_interaction = []
fnrs_interaction = []
f1s_interaction = []


for seed in np.arange(25):
    filename = '/pool001/shibal/results-synthetic/large-synthetic/normal/N_train_1000/seed{}/AMsWithInteractionsL0/v23/r1.0'.format(seed)
    
    try:
        with open(filename+'/Results.txt') as file:
            lines = file.readlines()
            mise = float([line for line in lines if "True" in line][0].split(" ")[-1].split("\n")[0])
            MISE.append(mise)
            fpr_main = float([line for line in lines if "FPR (main)" in line][0].split(" ")[-1].split("\n")[0])
            fprs_main.append(fpr_main)
            fnr_main = float([line for line in lines if "FNR (main)" in line][0].split(" ")[-1].split("\n")[0])
            fnrs_main.append(fnr_main)
            f1_main = float([line for line in lines if "F1 (main)" in line][0].split(" ")[-1].split("\n")[0])
            f1s_main.append(f1_main)
            fpr_interaction = float([line for line in lines if "FPR (interactions)" in line][0].split(" ")[-1].split("\n")[0])
            fprs_interaction.append(fpr_interaction)
            fnr_interaction = float([line for line in lines if "FNR (interactions)" in line][0].split(" ")[-1].split("\n")[0])
            fnrs_interaction.append(fnr_interaction)
            f1_interaction = float([line for line in lines if "F1 (interactions)" in line][0].split(" ")[-1].split("\n")[0])
            f1s_interaction.append(f1_interaction)
            print("Seed: ", seed, " mise:", mise)
            
        with open(filename+'/support_set.npy', 'rb') as f:
            main_set = np.load(f)
            interaction_set = np.load(f)
            mains.append(main_set)
            interactions.append(interaction_set)
    except:
        pass

Seed:  0  mise: 0.0878975628717445
Seed:  1  mise: 0.1050802460857798
Seed:  2  mise: 0.21075620683449997
Seed:  3  mise: 0.07761845494237664
Seed:  4  mise: 0.07738820296142207
Seed:  5  mise: 0.2852766622986521
Seed:  6  mise: 0.07899483641679113
Seed:  7  mise: 0.11407031138869814
Seed:  8  mise: 0.3901042307071032
Seed:  9  mise: 0.1077776688508339
Seed:  10  mise: 0.09456175027097409
Seed:  11  mise: 0.10784297341317606
Seed:  12  mise: 0.08714790155316625
Seed:  13  mise: 0.10270093557784425
Seed:  14  mise: 0.140145252200519
Seed:  15  mise: 0.09853737487421364
Seed:  16  mise: 0.13059832950679628
Seed:  17  mise: 0.07056519537304665
Seed:  18  mise: 0.32196611788128315
Seed:  19  mise: 0.13769217464830924
Seed:  20  mise: 0.08469471045408668
Seed:  21  mise: 0.08464434602132379
Seed:  22  mise: 0.0711437849863742
Seed:  23  mise: 0.09019461253634403
Seed:  24  mise: 0.12167200018151834


In [14]:
len(MISE)

25

In [15]:
print("MISE:", np.mean(MISE), "std-err:", np.std(MISE)/np.sqrt(len(MISE)))
print("FPR (main):", np.mean(fprs_main), "std-err:", np.std(fprs_main)/np.sqrt(len(fprs_main)))
print("FNR (main):", np.mean(fnrs_main), "std-err:", np.std(fnrs_main)/np.sqrt(len(fnrs_main)))
print("F1 (main):", np.mean(f1s_main), "std-err:", np.std(f1s_main)/np.sqrt(len(f1s_main)))
print("FPR (interactions):", np.mean(fprs_interaction), "std-err:", np.std(fprs_interaction)/np.sqrt(len(fprs_interaction)))
print("FNR (interactions):", np.mean(fnrs_interaction), "std-err:", np.std(fnrs_interaction)/np.sqrt(len(fnrs_interaction)))
print("F1 (interactions):", np.mean(f1s_interaction), "std-err:", np.std(f1s_interaction)/np.sqrt(len(f1s_interaction)))


MISE: 0.13116287371347507 std-err: 0.016228327128134733
FPR (main): 0.0 std-err: 0.0
FNR (main): 0.804 std-err: 0.015471263684650977
F1 (main): 0.320945720945721 std-err: 0.02113942767247869
FPR (interactions): 1.2505511242935086e-05 std-err: 1.980801822788299e-06
FNR (interactions): 0.096 std-err: 0.035991110013446376
F1 (interactions): 0.8167015207015207 std-err: 0.03108046590489163


In [16]:
mains

[array([225, 275]),
 array([225, 275]),
 array([425]),
 array([225, 425]),
 array([225, 275]),
 array([225, 275]),
 array([225]),
 array([225, 275]),
 array([225]),
 array([225]),
 array([225]),
 array([225, 275]),
 array([225, 275]),
 array([225, 275]),
 array([225]),
 array([275]),
 array([225, 275, 325, 475]),
 array([225, 275]),
 array([375, 425, 475]),
 array([225, 325, 375]),
 array([225, 275, 425]),
 array([225, 275, 425]),
 array([225, 275]),
 array([225, 275]),
 array([225, 275])]

In [286]:
np.unique(np.concatenate(mains).ravel(), return_counts=True)

(array([225, 275, 325, 375, 425, 475]), array([21, 15,  2,  2,  5,  2]))

In [287]:
interactions = [inter for inter in interactions if len(inter)>0]
np.unique(np.concatenate(interactions, axis=0), return_counts=True, axis=0)

(array([[ 25,  75],
        [ 25, 125],
        [ 25, 475],
        [ 75, 125],
        [ 75, 175],
        [ 75, 275],
        [ 75, 425],
        [125, 175],
        [125, 325],
        [125, 425],
        [125, 475],
        [175, 275],
        [175, 375],
        [175, 475],
        [225, 325],
        [225, 425],
        [275, 375],
        [275, 425],
        [325, 375],
        [325, 425],
        [325, 475],
        [375, 425],
        [375, 475],
        [425, 475]]),
 array([24, 22,  1,  1,  1,  1,  1, 24,  1,  1,  2,  1,  6,  2,  2,  1,  3,
         1, 18,  4,  3,  2,  5, 20]))