In [118]:
import numpy as np
import pandas as pd
import os
import shutil
from subprocess import Popen, PIPE
from IPython.display import clear_output
import time

In [125]:
PYPATH = '/home/shibal/anaconda3/envs/jasa/bin/python'
FILEPATH = '/home/shibal/Additive-Models-with-Structured-Interactions/SparseAMsWithInteractions/src/AMsWithInteractionsStrongHierarchy/AMsWithInteractionsStrongHierarchy-Synthetic.py'
PATH = "/pool001/shibal/results-synthetic"

version = 23
# n = 100
r = 1.0
train_size = 1000
dataset = 'large-synthetic'
dist = 'normal'

In [126]:
def make_bash_file(seed, version, dist, r):
    bash_folder_path = f"{PATH}/bashes/{dataset}/{dist}/train_size_{train_size}/v{version}_r{r}"
    bash_file_path = os.path.join(bash_folder_path, "seed{}.sh".format(seed))
    log_path = f"{PATH}/logs/{dataset}/{dist}/train_size_{train_size}/v{version}_r{r}/seed{seed}"
    os.makedirs(bash_folder_path,exist_ok=True)
    os.makedirs(log_path,exist_ok=True)
    with open(bash_file_path,"w") as f:
        f.write("#!/bin/bash\n")
        f.write("#SBATCH --cpus-per-task=2\n")
        f.write("#SBATCH --time=1-00:00\n")
        f.write("#SBATCH --mem=32G\n")
#         f.write("#SBATCH -p sched_mit_sloan_batch\n")
        f.write("#SBATCH -p sched_mit_sloan_interactive\n")
        f.write("#SBATCH --mail-type=FAIL\n")
        f.write("#SBATCH --mail-user=shibal@mit.edu\n")
        f.write(f"#SBATCH -o {log_path}/seed{seed}_%j.out\n")
        f.write(f"#SBATCH -e {log_path}/_seed{seed}_%j.err\n\n")
        f.write("module load sloan/python/modules/python-3.6/gurobipy/9.0.1\n\n")
        f.write(f"{PYPATH} -u {FILEPATH} --dataset {dataset} --dist {dist} --seed {seed} --train_size {train_size} --version {version} --r {r} |& tee -a {log_path}/output.txt") 
    return bash_file_path


In [127]:
bash_files = []
seeds = np.arange(25)
for seed in seeds:
    bash_files.append(make_bash_file(seed, version, dist, r))

In [128]:
torun = seeds
# torun = range(1,6)
submitted = []
print(len(torun))

25


In [129]:
# command = """/home/shibal/anaconda3/envs/jasa/bin/python -u /home/shibal/Additive-Models-with-Structured-Interactions/SparseAMsWithInteractions/src/AMsWithInteractionsStrongHierarchy/AMsWithInteractionsStrongHierarchy-Synthetic.py --dataset large-synthetic --dist normal --seed 0 --train_size 1000 --version 21 --r 1.0 |& tee -a /pool001/shibal/results-synthetic/logs/large-synthetic/normal/v21_r1.0/seed0/output.txt"""
# !{command}

In [130]:
exit_code = 1
for i, seed in enumerate(seeds):
    if i % 100 == 0:
        clear_output(wait=True)
    print(i)
    sh = make_bash_file(seed, version, dist, r)
    while True:
        process = Popen(["sbatch",sh], stdout=PIPE)
        (output, err) = process.communicate()
        exit_code = process.wait()
        print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),output,err)
        if exit_code == 0:
            print(sh,"submitted!")
            tmp_id = str(output)[-11:-3]
            print("job id:", tmp_id)
            submitted.append(tmp_id)
            break
        time.sleep(10000)

0
2023-08-25 06:41:45 b'Submitted batch job 51347003\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/train_size_1000/v23_r1.0/seed0.sh submitted!
job id: 51347003
1
2023-08-25 06:41:45 b'Submitted batch job 51347004\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/train_size_1000/v23_r1.0/seed1.sh submitted!
job id: 51347004
2
2023-08-25 06:41:46 b'Submitted batch job 51347005\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/train_size_1000/v23_r1.0/seed2.sh submitted!
job id: 51347005
3
2023-08-25 06:41:46 b'Submitted batch job 51347006\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/train_size_1000/v23_r1.0/seed3.sh submitted!
job id: 51347006
4
2023-08-25 06:41:46 b'Submitted batch job 51347007\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic/normal/train_size_1000/v23_r1.0/seed4.sh submitted!
job id: 51347007
5
2023-08-25 06:41:47 b'Submitted batch job 51347008\n' None
/poo

In [2]:
from subprocess import Popen, PIPE

In [117]:
for job in range(51329424, 51329464):
    process = Popen(['scancel',str(job)], stdout=PIPE)
    (output, err) = process.communicate()
    exit_code = process.wait()
    if exit_code ==0:
        print(job, "deleted!")

51329424 deleted!
51329425 deleted!
51329426 deleted!
51329427 deleted!
51329428 deleted!
51329429 deleted!
51329430 deleted!
51329431 deleted!
51329432 deleted!
51329433 deleted!
51329434 deleted!
51329435 deleted!
51329436 deleted!
51329437 deleted!
51329438 deleted!
51329439 deleted!
51329440 deleted!
51329441 deleted!
51329442 deleted!
51329443 deleted!
51329444 deleted!
51329445 deleted!
51329446 deleted!
51329447 deleted!
51329448 deleted!
51329449 deleted!
51329450 deleted!
51329451 deleted!
51329452 deleted!
51329453 deleted!
51329454 deleted!
51329455 deleted!
51329456 deleted!
51329457 deleted!
51329458 deleted!
51329459 deleted!
51329460 deleted!
51329461 deleted!
51329462 deleted!
51329463 deleted!


In [214]:
for seed in range(100):
    command = f"""rm -r /pool001/shibal/results-synthetic/N_train_100/seed{seed}/AMsWithInteractionsStrongHierarchy/v12/r1.0"""
    print(command)
    !{command}

rm -r /pool001/shibal/results-synthetic/N_train_100/seed0/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed1/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed2/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed3/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed4/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed5/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed6/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed7/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed8/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed9/AMsWithInteractionsStron

rm -r /pool001/shibal/results-synthetic/N_train_100/seed80/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed81/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed82/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed83/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed84/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed85/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed86/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed87/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed88/AMsWithInteractionsStrongHierarchy/v12/r1.0
rm -r /pool001/shibal/results-synthetic/N_train_100/seed89/AMsWithInterac

In [131]:
import numpy as np

In [203]:
vals = []
MISE = []
mains = []
interactions = []
fprs_main = []
fnrs_main = []
f1s_main = []
fprs_interaction = []
fnrs_interaction = []
f1s_interaction = []


for seed in np.arange(100):
    filename = '/pool001/shibal/results-synthetic/large-synthetic/normal/N_train_1000/seed{}/AMsWithInteractionsStrongHierarchy/v23/r1.0'.format(seed)
    
    try:
        with open(filename+'/Results-HS.txt') as file:
            lines = file.readlines()
            val = float([line for line in lines if "val" in line][0].split("val: ")[1].split(",")[0])
            vals.append(val)
            mise = float([line for line in lines if "True" in line][0].split(" ")[-1].split("\n")[0])
            MISE.append(mise)
            fpr_main = float([line for line in lines if "FPR (main)" in line][0].split(" ")[-1].split("\n")[0])
            fprs_main.append(fpr_main)
            fnr_main = float([line for line in lines if "FNR (main)" in line][0].split(" ")[-1].split("\n")[0])
            fnrs_main.append(fnr_main)
            f1_main = float([line for line in lines if "F1 (main)" in line][0].split(" ")[-1].split("\n")[0])
            f1s_main.append(f1_main)
            fpr_interaction = float([line for line in lines if "FPR (interactions)" in line][0].split(" ")[-1].split("\n")[0])
            fprs_interaction.append(fpr_interaction)
            fnr_interaction = float([line for line in lines if "FNR (interactions)" in line][0].split(" ")[-1].split("\n")[0])
            fnrs_interaction.append(fnr_interaction)
            f1_interaction = float([line for line in lines if "F1 (interactions)" in line][0].split(" ")[-1].split("\n")[0])
            f1s_interaction.append(f1_interaction)
            print("Seed: ", seed, " mise:", mise)
            
        with open(filename+'/support_set.npy', 'rb') as f:
            main_set = np.load(f)
            interaction_set = np.load(f)
            mains.append(main_set)
            interactions.append(interaction_set)
    except:
        print("Seed: ", seed, "Not found")

Seed:  0  mise: 0.05746948823054627
Seed:  1  mise: 0.10864654573857895
Seed:  2  mise: 0.14212003165823628
Seed:  3  mise: 0.13030285348364568
Seed:  4  mise: 0.10690098756700583
Seed:  5  mise: 0.28050942452727323
Seed:  6  mise: 0.13346516333125424
Seed:  7  mise: 0.12316804410129348
Seed:  8  mise: 0.14965100230796105
Seed:  9  mise: 0.14823474995258423
Seed:  10  mise: 0.08493331585172205
Seed:  11 Not found
Seed:  12 Not found
Seed:  13 Not found
Seed:  14 Not found
Seed:  15 Not found
Seed:  16 Not found
Seed:  17 Not found
Seed:  18 Not found
Seed:  19 Not found
Seed:  20 Not found
Seed:  21 Not found
Seed:  22 Not found
Seed:  23 Not found
Seed:  24 Not found
Seed:  25 Not found
Seed:  26 Not found
Seed:  27 Not found
Seed:  28 Not found
Seed:  29 Not found
Seed:  30 Not found
Seed:  31 Not found
Seed:  32 Not found
Seed:  33 Not found
Seed:  34 Not found
Seed:  35 Not found
Seed:  36 Not found
Seed:  37 Not found
Seed:  38 Not found
Seed:  39 Not found
Seed:  40 Not found
See

In [204]:
len(MISE)

11

In [205]:
np.median(MISE)

0.13030285348364568

In [206]:
print("val:", np.mean(vals), "std-err:", np.std(vals)/np.sqrt(len(vals)))
print("MISE:", np.mean(MISE), "std-err:", np.std(MISE)/np.sqrt(len(MISE)))
print("FPR (main):", np.mean(fprs_main), "std-err:", np.std(fprs_main)/np.sqrt(len(fprs_main)))
print("FNR (main):", np.mean(fnrs_main), "std-err:", np.std(fnrs_main)/np.sqrt(len(fnrs_main)))
print("F1 (main):", np.mean(f1s_main), "std-err:", np.std(f1s_main)/np.sqrt(len(f1s_main)))
print("FPR (interactions):", np.mean(fprs_interaction), "std-err:", np.std(fprs_interaction)/np.sqrt(len(fprs_interaction)))
print("FNR (interactions):", np.mean(fnrs_interaction), "std-err:", np.std(fnrs_interaction)/np.sqrt(len(fnrs_interaction)))
print("F1 (interactions):", np.mean(f1s_interaction), "std-err:", np.std(f1s_interaction)/np.sqrt(len(f1s_interaction)))


val: 0.19497518181818183 std-err: 0.016628892357032927
MISE: 0.13321832788637283 std-err: 0.01619731288262166
FPR (main): 0.012430426716141009 std-err: 0.0032076043713754527
FNR (main): 0.009090909090909089 std-err: 0.008667841720414472
F1 (main): 0.7938066708979178 std-err: 0.04696821894433722
FPR (interactions): 5.90295111117573e-05 std-err: 7.662219910995276e-06
FNR (interactions): 0.018181818181818177 std-err: 0.017335683440828944
F1 (interactions): 0.5885935568949923 std-err: 0.032828942147774695


In [207]:
vals

[0.12009,
 0.16861,
 0.203428,
 0.190615,
 0.170027,
 0.348311,
 0.196529,
 0.182323,
 0.209608,
 0.208821,
 0.146365]

In [208]:
f1s_interaction

[0.625,
 0.625,
 0.45454545454545453,
 0.5555555555555556,
 0.5,
 0.7272727272727272,
 0.47619047619047616,
 0.5263157894736842,
 0.8333333333333333,
 0.5263157894736842,
 0.625]

In [173]:
mains

[array([ 25,  75, 125, 175, 225, 275, 285, 325, 375, 425, 475]),
 array([ 25,  75, 125, 174, 175, 225, 275, 325, 375, 425, 475]),
 array([ 25,  75, 125, 126, 162, 175, 225, 275, 282, 313, 325, 336, 354,
        375, 423, 425, 432, 475, 489])]

In [44]:
np.unique(np.concatenate(mains).ravel(), return_counts=True)

(array([ 16,  21,  22,  24,  25,  26,  29,  41,  61,  75,  78,  80,  83,
         86,  89, 105, 108, 109, 115, 122, 124, 125, 126, 127, 148, 151,
        174, 175, 183, 194, 217, 218, 220, 225, 231, 250, 262, 267, 268,
        275, 277, 284, 290, 297, 321, 325, 328, 337, 338, 344, 348, 357,
        375, 391, 393, 399, 405, 412, 419, 420, 425, 432, 434, 468, 470,
        474, 475, 489]),
 array([ 1,  1,  1, 13, 16, 12,  1,  2,  1, 16,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1, 16,  1,  2,  1,  1,  1, 16,  1,  1,  1,  1,  1, 16,
         1,  1,  1,  1,  1, 16,  1,  2,  1,  1,  2, 16,  1,  1,  1,  1,  1,
         1, 16,  1,  1,  1,  1,  1,  1,  1, 16,  1,  1,  1,  1,  1, 16,  1]))

In [115]:
interactions = [inter for inter in interactions if len(inter)>0]
np.unique(np.concatenate(interactions, axis=0), return_counts=True, axis=0)

(array([[  7, 175],
        [ 22, 375],
        [ 25,  75],
        [ 25, 125],
        [ 25, 375],
        [ 25, 425],
        [ 25, 475],
        [ 25, 486],
        [ 26,  75],
        [ 27, 125],
        [ 75, 175],
        [ 75, 209],
        [ 75, 475],
        [ 92, 175],
        [125, 126],
        [125, 175],
        [125, 275],
        [125, 282],
        [125, 294],
        [125, 325],
        [125, 336],
        [125, 354],
        [125, 375],
        [125, 379],
        [125, 423],
        [125, 425],
        [125, 432],
        [125, 435],
        [125, 463],
        [125, 475],
        [126, 175],
        [162, 375],
        [164, 175],
        [165, 175],
        [175, 198],
        [175, 227],
        [175, 275],
        [175, 285],
        [175, 348],
        [175, 375],
        [175, 463],
        [175, 464],
        [175, 470],
        [175, 475],
        [225, 325],
        [248, 475],
        [275, 425],
        [275, 475],
        [289, 425],
        [325, 375],
