In [10]:
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve, average_precision_score
import matplotlib.pyplot as plt
import numpy as np
import json

In [11]:
learning_rate = [1e-2, 1e-3, 1e-4]    
weight_decay = [1e-3, 1e-4, 1e-5]
hidden_dims = [[512, 256, 128], [256, 128, 64], [512, 256, 128, 64], [1028, 512, 156, 128]]
dropout_rate = [0.3, 0.5, 0.7]

folders = {
    (lr, wd, str(hd), dr): f'tune_hyperparams/lr{lr}_wd{wd}_hd{hd}_dr{dr}'.replace(' ', '_').replace('.', '').replace(',', '')
    for lr in learning_rate
    for wd in weight_decay
    for hd in hidden_dims
    for dr in dropout_rate
}
print(folders)

{(0.01, 0.001, '[512, 256, 128]', 0.3): 'tune_hyperparams/lr001_wd0001_hd[512_256_128]_dr03', (0.01, 0.001, '[512, 256, 128]', 0.5): 'tune_hyperparams/lr001_wd0001_hd[512_256_128]_dr05', (0.01, 0.001, '[512, 256, 128]', 0.7): 'tune_hyperparams/lr001_wd0001_hd[512_256_128]_dr07', (0.01, 0.001, '[256, 128, 64]', 0.3): 'tune_hyperparams/lr001_wd0001_hd[256_128_64]_dr03', (0.01, 0.001, '[256, 128, 64]', 0.5): 'tune_hyperparams/lr001_wd0001_hd[256_128_64]_dr05', (0.01, 0.001, '[256, 128, 64]', 0.7): 'tune_hyperparams/lr001_wd0001_hd[256_128_64]_dr07', (0.01, 0.001, '[512, 256, 128, 64]', 0.3): 'tune_hyperparams/lr001_wd0001_hd[512_256_128_64]_dr03', (0.01, 0.001, '[512, 256, 128, 64]', 0.5): 'tune_hyperparams/lr001_wd0001_hd[512_256_128_64]_dr05', (0.01, 0.001, '[512, 256, 128, 64]', 0.7): 'tune_hyperparams/lr001_wd0001_hd[512_256_128_64]_dr07', (0.01, 0.001, '[1028, 512, 156, 128]', 0.3): 'tune_hyperparams/lr001_wd0001_hd[1028_512_156_128]_dr03', (0.01, 0.001, '[1028, 512, 156, 128]', 0.5)

In [12]:
def extract_metrics(csv):
    df = pd.read_csv(csv)

    y_true = df['true_label']
    y_score = df['probability']
    
    roc_auc = roc_auc_score(y_true, y_score)
    auprc = average_precision_score(y_true, y_score)
    acc = np.sum(df['true_label'] == df['predicted']) / len(df['true_label'])

    return roc_auc, auprc, acc

In [14]:
metrics = {
    key: extract_metrics(f'{value}/species_Azadirachta_indica/Azadirachta_indica_predictions.csv')
    for key, value in folders.items()
}
print(metrics)

{(0.01, 0.001, '[512, 256, 128]', 0.3): (0.6882338691290468, 0.44574001470902647, 0.6146551724137931), (0.01, 0.001, '[512, 256, 128]', 0.5): (0.7096621352029184, 0.47748046614064316, 0.6362068965517241), (0.01, 0.001, '[512, 256, 128]', 0.7): (0.7131604822161423, 0.47731177333355534, 0.6715517241379311), (0.01, 0.001, '[256, 128, 64]', 0.3): (0.7113863714090287, 0.47803894319236884, 0.6586206896551724), (0.01, 0.001, '[256, 128, 64]', 0.5): (0.6834245326037391, 0.42490044799242566, 0.628448275862069), (0.01, 0.001, '[256, 128, 64]', 0.7): (0.7021845075239399, 0.4558298477336914, 0.6810344827586207), (0.01, 0.001, '[512, 256, 128, 64]', 0.3): (0.6972148597811219, 0.48297248063883913, 0.6258620689655172), (0.01, 0.001, '[512, 256, 128, 64]', 0.5): (0.6877173107615139, 0.42054305815706633, 0.6310344827586207), (0.01, 0.001, '[512, 256, 128, 64]', 0.7): (0.702327006383949, 0.45578761639164506, 0.6793103448275862), (0.01, 0.001, '[1028, 512, 156, 128]', 0.3): (0.705732729138167, 0.48334682

### Metrics Over Single Axis

In [21]:
def describe(metrics):
    for key, value in metrics.items():
        print(f'{key}: {value}')

In [22]:
lr_metrics = {
    lr: [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for wd in weight_decay
            for hd in hidden_dims
            for dr in dropout_rate
        ])
        for i in range(3)
    ]
    for lr in learning_rate
}

describe(lr_metrics)

0.01: [0.7055122517353194, 0.467883305801404, 0.663146551724138]
0.001: [0.7054660385633581, 0.465384907332828, 0.6684386973180076]
0.0001: [0.6646817268170189, 0.41331845996515526, 0.6639607279693487]


In [23]:
wd_metrics = {
    wd: [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for lr in learning_rate
            for hd in hidden_dims
            for dr in dropout_rate
        ])
        for i in range(3)
    ]
    for wd in weight_decay
}

describe(wd_metrics)

0.001: [0.6920310184393526, 0.4485113460836193, 0.6624999999999999]
0.0001: [0.691569975252698, 0.44945721022712426, 0.6694444444444445]
1e-05: [0.6920590234236461, 0.44861811678864366, 0.6636015325670497]


In [24]:
hd_metrics = {
    str(hd): [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for lr in learning_rate
            for wd in weight_decay
            for dr in dropout_rate
        ])
        for i in range(3)
    ]
    for hd in hidden_dims
}

describe(hd_metrics)

[512, 256, 128]: [0.6963367105563155, 0.45158713679783635, 0.6669220945083013]
[256, 128, 64]: [0.6920086375420106, 0.4445892885308491, 0.6634738186462323]
[512, 256, 128, 64]: [0.6801940966416714, 0.4406919954552115, 0.6660280970625798]
[1028, 512, 156, 128]: [0.6990072447475976, 0.4585804766819528, 0.6643039591315453]


In [25]:
dr_metrics = {
    dr: [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for lr in learning_rate
            for wd in weight_decay
            for hd in hidden_dims
        ])
        for i in range(3)
    ]
    for dr in dropout_rate
}

describe(dr_metrics)

0.3: [0.7050219665951006, 0.46625498828871337, 0.6649425287356322]
0.5: [0.6973626528696104, 0.45103567003857375, 0.6636254789272029]
0.7: [0.6732753976509855, 0.42929601477210017, 0.6669779693486588]


### Metrics Over 2 Axes

In [27]:
lr_wd_metrics = {
    (lr, wd): [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for hd in hidden_dims
            for dr in dropout_rate
        ])
        for i in range(3)
    ]
    for wd in weight_decay
    for lr in learning_rate
}

describe(lr_wd_metrics)

(0.01, 0.001): [0.7025971604727163, 0.4634987391324949, 0.6543103448275862]
(0.001, 0.001): [0.7086061592757257, 0.4686895474062485, 0.6691810344827586]
(0.0001, 0.001): [0.6648897355696154, 0.4133457517121146, 0.6640086206896552]
(0.01, 0.0001): [0.7048091584017327, 0.46838720322831323, 0.6683189655172415]
(0.001, 0.0001): [0.7062332563839488, 0.46776709104740144, 0.6746408045977011]
(0.0001, 0.0001): [0.6636675109724123, 0.41221733640565805, 0.6653735632183908]
(0.01, 1e-05): [0.7091304363315093, 0.4717639750434041, 0.6668103448275863]
(0.001, 1e-05): [0.7015587000303998, 0.4596980835448338, 0.6614942528735632]
(0.0001, 1e-05): [0.6654879339090287, 0.41439229177769316, 0.6625]


In [28]:
lr_hd_metrics = {
    (lr, str(hd)): [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for wd in weight_decay
            for dr in dropout_rate
        ])
        for i in range(3)
    ]
    for hd in hidden_dims
    for lr in learning_rate
}

describe(lr_hd_metrics)

(0.01, '[512, 256, 128]'): [0.7033266754699296, 0.46616191348021757, 0.6662835249042145]
(0.001, '[512, 256, 128]'): [0.7086745389370219, 0.47056617674641443, 0.6741379310344828]
(0.0001, '[512, 256, 128]'): [0.6770089172619953, 0.4180333201668771, 0.6603448275862069]
(0.01, '[256, 128, 64]'): [0.7041456480848152, 0.46219104772812813, 0.6649425287356322]
(0.001, '[256, 128, 64]'): [0.7028174399604803, 0.45770199443768994, 0.6686781609195402]
(0.0001, '[256, 128, 64]'): [0.6690628245807367, 0.41387482342672915, 0.6568007662835249]
(0.01, '[512, 256, 128, 64]'): [0.702954001367989, 0.46440275620088645, 0.6542145593869733]
(0.001, '[512, 256, 128, 64]'): [0.7003098558544865, 0.4584258555517603, 0.671360153256705]
(0.0001, '[512, 256, 128, 64]'): [0.6373184327025384, 0.3992473746129875, 0.6725095785440612]
(0.01, '[1028, 512, 156, 128]'): [0.7116226820185437, 0.47877750579638434, 0.6671455938697318]
(0.001, '[1028, 512, 156, 128]'): [0.710062319501444, 0.4748456025954469, 0.659578544061302

In [29]:
lr_dr_metrics = {
    (lr, dr): [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for wd in weight_decay
            for hd in hidden_dims
        ])
        for i in range(3)
    ]
    for dr in dropout_rate
    for lr in learning_rate
}

describe(lr_dr_metrics)

(0.01, 0.3): [0.7072828495497036, 0.4777921700017212, 0.6507183908045977]
(0.001, 0.3): [0.712373472887217, 0.48221533664711624, 0.6718390804597701]
(0.0001, 0.3): [0.6954095773483813, 0.4387574582173026, 0.6722701149425286]
(0.01, 0.5): [0.7054305128058974, 0.4646672177935384, 0.6664511494252874]
(0.001, 0.5): [0.7073373256763946, 0.4688120694208581, 0.6658764367816091]
(0.0001, 0.5): [0.6793201201265391, 0.4196277229013246, 0.6585488505747127]
(0.01, 0.7): [0.7038233928503571, 0.46119052960895274, 0.6722701149425289]
(0.001, 0.7): [0.696687317126463, 0.4451273159305095, 0.6676005747126436]
(0.0001, 0.7): [0.6193154829761361, 0.3815701987768385, 0.6610632183908046]


In [31]:
wd_hd_metrics = {
    (wd, str(hd)): [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for lr in learning_rate
            for dr in dropout_rate
        ])
        for i in range(3)
    ]
    for wd in weight_decay
    for hd in hidden_dims
}

describe(wd_hd_metrics)

(0.001, '[512, 256, 128]'): [0.6983239759081927, 0.4528638508604366, 0.6564176245210729]
(0.001, '[256, 128, 64]'): [0.6904863406926078, 0.44194196211865067, 0.6584291187739464]
(0.001, '[512, 256, 128, 64]'): [0.6793154196433095, 0.43947904288584927, 0.6654214559386973]
(0.001, '[1028, 512, 156, 128]'): [0.6999983375132999, 0.45976052846954096, 0.6697318007662836]
(0.0001, '[512, 256, 128]'): [0.6962504195799766, 0.45138992345653495, 0.6772988505747127]
(0.0001, '[256, 128, 64]'): [0.691270084422658, 0.4441844566683861, 0.664463601532567]
(0.0001, '[512, 256, 128, 64]'): [0.6803022242488727, 0.4421413409121139, 0.6669540229885058]
(0.0001, '[1028, 512, 156, 128]'): [0.6984571727592845, 0.4601131198714621, 0.6690613026819923]
(1e-05, '[512, 256, 128]'): [0.6944357361807773, 0.4505076360765377, 0.6670498084291188]
(1e-05, '[256, 128, 64]'): [0.6942694875107666, 0.4476414468055105, 0.667528735632184]
(1e-05, '[512, 256, 128, 64]'): [0.6809646460328317, 0.4404556025676711, 0.6657088122605

In [32]:
wd_dr_metrics = {
    (wd, dr): [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for lr in learning_rate
            for hd in hidden_dims
        ])
        for i in range(3)
    ]
    for wd in weight_decay
    for dr in dropout_rate
}

describe(wd_dr_metrics)

(0.001, 0.3): [0.7042293661650708, 0.46439811766585554, 0.6594109195402299]
(0.001, 0.5): [0.6967805351307191, 0.4493243984756797, 0.6608477011494253]
(0.001, 0.7): [0.6750831540222678, 0.4318115221093228, 0.667241379310345]
(0.0001, 0.3): [0.7034223179339566, 0.46528816510927057, 0.6670258620689654]
(0.0001, 0.5): [0.6979963769664842, 0.4539997712190349, 0.6739942528735633]
(0.0001, 0.7): [0.6732912308576532, 0.4290836943530673, 0.6673132183908046]
(1e-05, 0.3): [0.7074142156862746, 0.46907868209101383, 0.6683908045977011]
(1e-05, 0.5): [0.6973110465116279, 0.44978284042100647, 0.6560344827586208]
(1e-05, 0.7): [0.6714518080730354, 0.4269928278539106, 0.6663793103448276]


In [34]:
hd_dr_metrics = {
    (str(hd), dr): [
        np.mean([
            metrics[lr, wd, str(hd), dr][i]
            for lr in learning_rate
            for wd in weight_decay
        ])
        for i in range(3)
    ]
    for hd in hidden_dims
    for dr in dropout_rate
}

describe(hd_dr_metrics)

('[512, 256, 128]', 0.3): [0.7031343020089174, 0.459376711914925, 0.6773946360153258]
('[512, 256, 128]', 0.5): [0.6994184463190961, 0.4558335555591841, 0.6627394636015325]
('[512, 256, 128]', 0.7): [0.6864573833409333, 0.4395511429194002, 0.660632183908046]
('[256, 128, 64]', 0.3): [0.7051247340021278, 0.4672237360019788, 0.6682950191570882]
('[256, 128, 64]', 0.5): [0.6920381928611237, 0.44122256434183144, 0.6587164750957855]
('[256, 128, 64]', 0.7): [0.6788629857627805, 0.42532156524873715, 0.6634099616858237]
('[512, 256, 128, 64]', 0.3): [0.7027608362466434, 0.46584770258254893, 0.6599616858237548]
('[512, 256, 128, 64]', 0.5): [0.6940814681815879, 0.44564484505909024, 0.6631226053639847]
('[512, 256, 128, 64]', 0.7): [0.6437399854967827, 0.41058343872399505, 0.6749999999999999]
('[1028, 512, 156, 128]', 0.3): [0.7090679941227136, 0.47257180265540083, 0.6541187739463602]
('[1028, 512, 156, 128]', 0.5): [0.7039125041166338, 0.4614417151941891, 0.6699233716475095]
('[1028, 512, 156,