In [4]:
import numpy as np
import pandas as pd
import os
import shutil
from subprocess import Popen, PIPE
from IPython.display import clear_output
import time

In [625]:
PYPATH = '/home/shibal/anaconda3/envs/jasa/bin/python'
FILEPATH = '/home/shibal/Additive-Models-with-Structured-Interactions/SparseAMsWithInteractions/src/AMsWithInteractionsL0/AMsWithInteractionsL0-Synthetic.py'
PATH = "/pool001/shibal/results-synthetic"

dataset = 'large-synthetic-correlated'
version = 35
Ki = 10
Kij = 6
r = 1.0
dist = 'normal'
correlation = 0.5
train_size = 1000
num_seeds = 25
init_seed = 0

In [626]:
def make_bash_file(seed, version, dist, r):
    bash_folder_path = f"{PATH}/bashes/{dataset}/{dist}/v{version}_r{r}/train_size_{train_size}"
    bash_file_path = os.path.join(bash_folder_path, f"seed{seed}.sh")
    log_path = f"{PATH}/logs/{dataset}/{dist}/v{version}_r{r}/train_size_{train_size}/seed{seed}"
    os.makedirs(bash_folder_path,exist_ok=True)
    os.makedirs(log_path,exist_ok=True)
    with open(bash_file_path,"w") as f:
        f.write("#!/bin/bash\n")
        f.write("#SBATCH --cpus-per-task=2\n")
        f.write("#SBATCH --time=1-00:00\n")
        f.write("#SBATCH --mem=24G\n")
        f.write("#SBATCH -p sched_mit_sloan_batch\n")
#         f.write("#SBATCH -p sched_mit_sloan_interactive\n")
        f.write("#SBATCH --mail-type=FAIL\n")
        f.write("#SBATCH --mail-user=shibal@mit.edu\n")
        f.write(f"#SBATCH -o {log_path}/seed{seed}_%j.out\n")
        f.write(f"#SBATCH -e {log_path}/_seed{seed}_%j.err\n\n")
        f.write("module load sloan/python/modules/python-3.6/gurobipy/9.0.1\n\n")
        f.write(f"{PYPATH} -u {FILEPATH}  --dataset {dataset} --dist {dist} --correlation {correlation} --seed {seed} --train_size {train_size} --version {version} --r {r} --Ki {Ki} --Kij {Kij} |& tee -a {log_path}/output_{train_size}.txt\n\n") 
    return bash_file_path


In [627]:
bash_files = []
seeds = np.arange(init_seed, init_seed+num_seeds)
for seed in seeds:
    bash_files.append(make_bash_file(seed, version, dist, r))

In [628]:
torun = seeds
# torun = range(1,6)
submitted = []
print(len(torun))

25


In [629]:
exit_code = 1
for i, seed in enumerate(seeds):
    if i % 100 == 0:
        clear_output(wait=True)
    print(i)
    sh = make_bash_file(seed, version, dist, r)
    while True:
        process = Popen(["sbatch",sh], stdout=PIPE)
        (output, err) = process.communicate()
        exit_code = process.wait()
        print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),output,err)
        if exit_code == 0:
            print(sh,"submitted!")
            tmp_id = str(output)[-11:-3]
            print("job id:", tmp_id)
            submitted.append(tmp_id)
            break
        time.sleep(10000)

0
2023-09-03 20:07:20 b'Submitted batch job 51573154\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic-correlated/normal/v35_r1.0/train_size_1000/seed0.sh submitted!
job id: 51573154
1
2023-09-03 20:07:21 b'Submitted batch job 51573155\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic-correlated/normal/v35_r1.0/train_size_1000/seed1.sh submitted!
job id: 51573155
2
2023-09-03 20:07:21 b'Submitted batch job 51573156\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic-correlated/normal/v35_r1.0/train_size_1000/seed2.sh submitted!
job id: 51573156
3
2023-09-03 20:07:21 b'Submitted batch job 51573157\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic-correlated/normal/v35_r1.0/train_size_1000/seed3.sh submitted!
job id: 51573157
4
2023-09-03 20:07:21 b'Submitted batch job 51573158\n' None
/pool001/shibal/results-synthetic/bashes/large-synthetic-correlated/normal/v35_r1.0/train_size_1000/seed4.sh submitted!
job id: 51573158
5
2023-09-

In [451]:
# command = """/home/shibal/anaconda3/envs/jasa/bin/python -u /home/shibal/Additive-Models-with-Structured-Interactions/SparseAMsWithInteractions/src/AMsWithInteractionsL0/AMsWithInteractionsL0-Synthetic.py  --dataset large-synthetic --dist normal --seed 0 --train_size 200 --version 19 --r 1.0 |& tee -a /pool001/shibal/results-synthetic/logs/large-synthetic/normal/v19_r1.0/seed0/output_200.txt"""

In [143]:
# !{command}

In [7]:
from subprocess import Popen, PIPE

In [423]:
for job in range(51544471, 51544634):
    process = Popen(['scancel',str(job)], stdout=PIPE)
    (output, err) = process.communicate()
    exit_code = process.wait()
    if exit_code ==0:
        print(job, "deleted!")

51544471 deleted!
51544472 deleted!
51544473 deleted!
51544474 deleted!
51544475 deleted!
51544476 deleted!
51544477 deleted!
51544478 deleted!
51544479 deleted!
51544480 deleted!
51544482 deleted!
51544483 deleted!
51544484 deleted!
51544485 deleted!
51544486 deleted!
51544487 deleted!
51544488 deleted!
51544489 deleted!
51544490 deleted!
51544491 deleted!
51544492 deleted!
51544493 deleted!
51544494 deleted!
51544495 deleted!
51544496 deleted!
51544497 deleted!
51544498 deleted!
51544499 deleted!
51544500 deleted!
51544501 deleted!
51544502 deleted!
51544503 deleted!
51544504 deleted!
51544505 deleted!
51544506 deleted!
51544507 deleted!
51544508 deleted!
51544509 deleted!
51544510 deleted!
51544511 deleted!
51544512 deleted!
51544513 deleted!
51544514 deleted!
51544515 deleted!
51544516 deleted!
51544517 deleted!
51544518 deleted!
51544519 deleted!
51544520 deleted!
51544521 deleted!
51544522 deleted!
51544523 deleted!
51544524 deleted!
51544525 deleted!
51544526 deleted!
51544527 d

In [33]:
for seed in range(100):
    command = f"""rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed{seed}/AMsWithInteractionsL0/v13/r1.0"""
    print(command)
    !{command}

rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed0/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed1/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed2/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed3/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed4/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed5/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed6/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed7/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed8/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic

rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed78/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed79/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed80/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed81/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed82/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed83/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed84/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed85/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/heteroskedastic/N_train_400/seed86/AMsWithInteractionsL0/v13/r1.0
rm -r /pool001/shibal/results-synthetic/hetero

In [1]:
import numpy as np
from sklearn.metrics import f1_score

In [26]:
rs = [1.0, 1.5, 2.0]
vals = {1.0: {}, 1.5: {}, 2.0: {}}
MISE = {1.0: {}, 1.5: {}, 2.0: {}}
mains = {1.0: {}, 1.5: {}, 2.0: {}}
interactions = {1.0: {}, 1.5: {}, 2.0: {}}
features = {1.0: {}, 1.5: {}, 2.0: {}}
fprs_main = {1.0: {}, 1.5: {}, 2.0: {}}
fnrs_main = {1.0: {}, 1.5: {}, 2.0: {}}
f1s_main = {1.0: {}, 1.5: {}, 2.0: {}}
fprs_interaction = {1.0: {}, 1.5: {}, 2.0: {}}
fnrs_interaction = {1.0: {}, 1.5: {}, 2.0: {}}
f1s_interaction = {1.0: {}, 1.5: {}, 2.0: {}}
f1s_feature = {1.0: {}, 1.5: {}, 2.0: {}}

data = 'large-synthetic-correlated'
train_size = 1000
version = 39
for seed in np.arange(25):
    filename = f"/pool001/shibal/results-synthetic/{data}/normal/N_train_{train_size}/seed{seed}/AMsWithInteractionsL0/v{version}"
    
    for r in rs:
        print(f"====================r: {r}")
        try:
            with open(filename+f"/r{r}/Results.txt") as file:
                lines = file.readlines()
                val = float([line for line in lines if "val:" in line and "Optimal" in line][0].split("val: ")[-1].split(",")[0])
                mise = float([line for line in lines if "True" in line][0].split(" ")[-1].split("\n")[0])
                fpr_main = float([line for line in lines if "FPR (main)" in line][0].split(" ")[-1].split("\n")[0])
                fnr_main = float([line for line in lines if "FNR (main)" in line][0].split(" ")[-1].split("\n")[0])
                f1_main = float([line for line in lines if "F1 (main)" in line][0].split(" ")[-1].split("\n")[0])
                fpr_interaction = float([line for line in lines if "FPR (interactions)" in line][0].split(" ")[-1].split("\n")[0])
                fnr_interaction = float([line for line in lines if "FNR (interactions)" in line][0].split(" ")[-1].split("\n")[0])
                f1_interaction = float([line for line in lines if "F1 (interactions)" in line][0].split(" ")[-1].split("\n")[0])
                vals[r][seed] = val
                MISE[r][seed] = mise
                fprs_main[r][seed] = fpr_main
                fnrs_main[r][seed] = fnr_main
                f1s_main[r][seed] = f1_main
                fprs_interaction[r][seed] = fpr_interaction
                fnrs_interaction[r][seed] = fnr_interaction
                f1s_interaction[r][seed] = f1_interaction
                print("Seed: ", seed, " mise:", mise)

            with open(filename+f"/r{r}/support_set.npy", 'rb') as f:
                main_set = np.load(f)
                interaction_set = np.load(f)
                mains[r][seed] = main_set
                interactions[r][seed] = interaction_set
                feature_set = np.unique(list(main_set)+list(np.unique(interaction_set)))
                features[r][seed] = feature_set
                if data in ['large-synthetic', 'large-synthetic-correlated']:
                    p = 500
                    k = 10
                    feature_support_truth = np.zeros(p)
                    true_support = np.arange((int)(p/(2*k)),p,(int)(p/k))
                    feature_support_truth[true_support] = 1
                    feature_support_recovered = np.zeros(p)
                    feature_support_recovered[feature_set] = 1
                    f1_feature = f1_score(feature_support_truth, feature_support_recovered)
                    f1s_feature[r][seed] = f1_feature
                elif data in ['large-synthetic-correlated-aoas']:
                    p = 500
                    k = 50
                    feature_support_truth = np.zeros(p)
                    true_support = np.arange((int)(p/(2*k)),p,(int)(p/k))
                    feature_support_truth[true_support] = 1
                    feature_support_recovered = np.zeros(p)
                    feature_support_recovered[feature_set] = 1
                    f1_feature = f1_score(feature_support_truth, feature_support_recovered)
                    f1s_feature[r][seed] = f1_feature
        except:
            pass

Seed:  0  mise: 0.37119114332908576
Seed:  0  mise: 0.4409276126374069
Seed:  0  mise: 0.3694288144977337
Seed:  1  mise: 0.7820670613312485
Seed:  1  mise: 0.8013291251637336
Seed:  1  mise: 0.7789428771290408
Seed:  2  mise: 0.7941065384427783
Seed:  2  mise: 0.4466118932541024
Seed:  2  mise: 0.26306649821015904
Seed:  3  mise: 0.5605968694118657
Seed:  3  mise: 0.6565863814005929
Seed:  3  mise: 0.5488418475076547
Seed:  4  mise: 0.3594912928726258
Seed:  4  mise: 0.24889507317273218
Seed:  4  mise: 0.20712079242873283
Seed:  5  mise: 0.842552905043
Seed:  5  mise: 0.40312653577766017
Seed:  5  mise: 0.2756216710834236
Seed:  6  mise: 0.6946549787593176
Seed:  6  mise: 0.6553723972240884
Seed:  6  mise: 0.5468244067146207
Seed:  7  mise: 0.4109980164214342
Seed:  7  mise: 0.37576436701544647
Seed:  7  mise: 0.3992085119380645
Seed:  8  mise: 0.6491136669297927
Seed:  8  mise: 0.49984156302757476
Seed:  8  mise: 0.4039764533931349
Seed:  9  mise: 0.6610238684064609
Seed:  9  mise: 0

In [27]:
vals = pd.DataFrame(vals)
display(vals)
MISE = pd.DataFrame(MISE)
fprs_main = pd.DataFrame(fprs_main)
fnrs_main = pd.DataFrame(fnrs_main)
f1s_main = pd.DataFrame(f1s_main)
fprs_interaction = pd.DataFrame(fprs_interaction)
fnrs_interaction = pd.DataFrame(fnrs_interaction)
f1s_interaction = pd.DataFrame(f1s_interaction)
f1s_feature = pd.DataFrame(f1s_feature)
best_r = {}

for index in vals.index:
    r = vals.loc[index][vals.loc[index]==vals.loc[index].min()].index
    best_r[index] = r[0]
    vals.loc[index, "best"] = np.array(vals.loc[index, r])[0]
    MISE.loc[index, "best"] = np.array(MISE.loc[index, r])[0]
    fprs_main.loc[index, "best"] = np.array(fprs_main.loc[index, r])[0]
    fnrs_main.loc[index, "best"] = np.array(fnrs_main.loc[index, r])[0]
    f1s_main.loc[index, "best"] = np.array(f1s_main.loc[index, r])[0]
    fprs_interaction.loc[index, "best"] = np.array(fprs_interaction.loc[index, r])[0]
    fnrs_interaction.loc[index, "best"] = np.array(fnrs_interaction.loc[index, r])[0]
    f1s_interaction.loc[index, "best"] = np.array(f1s_interaction.loc[index, r])[0]
    f1s_feature.loc[index, "best"] = np.array(f1s_feature.loc[index, r])[0]

vals = vals[["best"]].values
MISE = MISE[["best"]].values
fprs_main = fprs_main[["best"]].values
fnrs_main = fnrs_main[["best"]].values
f1s_main = f1s_main[["best"]].values
fprs_interaction = fprs_interaction[["best"]].values
fnrs_interaction = fnrs_interaction[["best"]].values
f1s_interaction = f1s_interaction[["best"]].values
f1s_feature = f1s_feature[["best"]].values

Unnamed: 0,1.0,1.5,2.0
0,0.39608,0.47618,0.404474
1,0.787514,0.858477,0.833501
2,1.531344,1.341842,0.515818
3,0.766076,0.760332,0.964577
4,0.300434,0.286277,0.229153
5,2.090473,1.247226,0.889762
6,0.779953,0.830982,0.822695
7,0.537836,0.498825,0.562198
8,0.795015,0.676941,0.863929
9,0.811066,1.116965,0.86761


In [28]:
len(MISE)

25

In [29]:
print("val:", np.mean(vals), "std-err:", np.std(vals)/np.sqrt(len(vals)))
print("MISE:", np.mean(MISE), "std-err:", np.std(MISE)/np.sqrt(len(MISE)))
print("FPR (main):", np.mean(fprs_main), "std-err:", np.std(fprs_main)/np.sqrt(len(fprs_main)))
print("FNR (main):", np.mean(fnrs_main), "std-err:", np.std(fnrs_main)/np.sqrt(len(fnrs_main)))
print("F1 (main):", np.mean(f1s_main), "std-err:", np.std(f1s_main)/np.sqrt(len(f1s_main)))
print("FPR (interactions):", np.mean(fprs_interaction), "std-err:", np.std(fprs_interaction)/np.sqrt(len(fprs_interaction)))
print("FNR (interactions):", np.mean(fnrs_interaction), "std-err:", np.std(fnrs_interaction)/np.sqrt(len(fnrs_interaction)))
print("F1 (interactions):", np.mean(f1s_interaction), "std-err:", np.std(f1s_interaction)/np.sqrt(len(f1s_interaction)))
print("F1 (feature):", np.mean(f1s_feature), "std-err:", np.std(f1s_feature)/np.sqrt(len(f1s_feature)))


val: 0.5873078 std-err: 0.04093988523852991
MISE: 0.53253835843089 std-err: 0.04966745008554144
FPR (main): 0.0004081632653061229 std-err: 0.0002827838053173675
FNR (main): 0.396 std-err: 0.05011347124277064
F1 (main): 0.7135812986463143 std-err: 0.04147882768172064
FPR (interactions): 1.6033092302514262e-05 std-err: 2.399613529864085e-06
FNR (interactions): 0.42 std-err: 0.03992492955535426
F1 (interactions): 0.6280173943703355 std-err: 0.040350051142710834
F1 (feature): 0.9312727272727274 std-err: 0.011309819346166528


In [30]:
print("Number of features: ", np.mean([len(features[r][(int)(seed)]) for seed, r in best_r.items()]))
print("Number of main effects: ", np.mean([len(mains[r][(int)(seed)]) for seed, r in best_r.items()]))
print("Number of interaction effects: ", np.mean([len(interactions[r][(int)(seed)]) for seed, r in best_r.items()]))

Number of features:  11.56
Number of main effects:  6.24
Number of interaction effects:  6.64


In [31]:
MISE

array([[0.37119114],
       [0.78206706],
       [0.2630665 ],
       [0.65658638],
       [0.20712079],
       [0.27562167],
       [0.69465498],
       [0.37576437],
       [0.49984156],
       [0.66102387],
       [0.26737894],
       [0.30310749],
       [0.8933887 ],
       [0.64039598],
       [0.39884227],
       [0.2976407 ],
       [0.91409465],
       [0.78885588],
       [0.22179792],
       [0.55045575],
       [0.55666087],
       [0.47379661],
       [1.2198216 ],
       [0.51808308],
       [0.4822002 ]])

In [32]:
np.median(MISE)

0.49984156302757476

In [33]:
best_r

{0: 1.0,
 1: 1.0,
 2: 2.0,
 3: 1.5,
 4: 2.0,
 5: 2.0,
 6: 1.0,
 7: 1.5,
 8: 1.5,
 9: 1.0,
 10: 1.0,
 11: 1.5,
 12: 1.5,
 13: 1.5,
 14: 1.5,
 15: 1.0,
 16: 1.5,
 17: 1.5,
 18: 1.5,
 19: 2.0,
 20: 1.0,
 21: 1.5,
 22: 1.0,
 23: 2.0,
 24: 1.5}

In [371]:
np.unique(np.concatenate(mains).ravel(), return_counts=True)

TypeError: The first input argument needs to be a sequence

In [29]:
interactions = [inter for inter in interactions if len(inter)>0]
np.unique(np.concatenate(interactions, axis=0), return_counts=True, axis=0)

(array([[ 25,  75],
        [ 25, 125],
        [ 75, 125],
        [ 75, 175],
        [125, 175],
        [125, 375],
        [125, 425],
        [175, 375],
        [175, 425],
        [325, 375],
        [375, 425],
        [425, 475]]),
 array([15, 15,  1,  1, 15,  1,  1,  1,  1, 12,  1, 10]))

In [436]:
from sklearn.metrics import f1_score

In [439]:
f1_score([1,1,1,1,0,0,0,0,0,0], [1,1,1,0,0,1,1,0,0,0])

0.6666666666666665