# SC - Random S (with Probabilities)

Author: Steven M. Hernandez

The goal of this notebook is to see if randomly generating schedules will produce good or bad accuracies for the Eavesdropper. Specifically, we want to see if the random schedules should have specific probabilties for each of the transmitters. For example, if the probability that transmitter TX-A transmits is high or low, does that affect the eavesdropper accuracy?

In [1]:
import optuna

storage = optuna.storages.get_storage('postgresql://postgres:postgresPW@postgres:5432/optuna')

In [2]:
# import json
import ipywidgets as widgets
ta = widgets.Textarea(layout={'width': '100%'})

n = 0
google_drive_directory = "all_experiments/AntiEavesdrop/ttyUSB0"
n_trials = 10
optuna_labels = []

tx_mac_addresses = {
    'A': '7C:9E:BD:4C:9C:5C',
    'B': 'C8:2B:96:8E:92:5C',
    'C': '94:B9:7E:C3:82:98',
    'D': '24:0A:C4:ED:30:50',
    'E': '94:B9:7E:C3:95:AC',
}
macs = list(tx_mac_addresses.values())
macs = json.dumps(macs).replace('"', '\\"')

mongo_db = 'anti_eavesdrop'

for _ in range(10):
    for k, v in {
        'random-S:probabilities':              '',
        'random-S:probabilities:min=5':       '--min_station_percentage=5',
        'random-S:probabilities:min=10':       '--min_station_percentage=10',
        'random-S:probabilities:min=15':       '--min_station_percentage=15',
        'random-S:probabilities:min=20':       '--min_station_percentage=20',
    }.items():
        optuna_name = f"SC:{k}"
        S__n_epochs = [0]
        C__n_epochs = [100]
        optuna_labels.append(optuna_name)
        ta.value += f"tsp docker-compose exec -T jupyter ipython -c '%run ../projects/anti_eavesdrop/optuna/2022_10_06__optuna_script_randomS.py --google_drive_directory=\"{google_drive_directory}\" --optuna_name=\"{optuna_name}\" --n_trials={n_trials} --S__n_epochs=\"{S__n_epochs}\" --C__n_epochs=\"{C__n_epochs}\"  --macs=\"{macs}\" {v} '\n\n"
        n += 1

print("#N =", n)
display(ta)

#N = 10


Textarea(value='tsp docker-compose exec -T jupyter ipython -c \'%run ../projects/anti_eavesdrop/optuna/2022_10…

In [3]:
import optuna


nums = {}
results = {
    'accuracy': {},
    'n_trials_completed': {},
    'best_sacred_id': {},
}
i = 0
_sum = 0
for optuna_name in pd.unique(optuna_labels):
    try:
        _df = None
        study = None
        print(optuna_name)
        study = optuna.load_study(
            storage=storage,
            study_name=optuna_name,
        )
        
        _df = study.trials_dataframe()
        _df = _df[_df.state == 'COMPLETE']
        
        print(
#             len(study.trials_dataframe()),
            len(_df),
            "-", 
            f"F{sum(study.trials_dataframe().state == 'FAIL')}",
            f"R{sum(study.trials_dataframe().state == 'RUNNING')}",
            f'{(study.best_trial.value*100):.2f}%', 
            study.best_trial.user_attrs['sacred_id'],
            optuna_name, 
        )
        
        results['accuracy'][optuna_name] = study.best_trial.value*100
        results['n_trials_completed'][optuna_name] = len(study.trials_dataframe())
        results['best_sacred_id'][optuna_name] = study.best_trial.user_attrs['sacred_id']
#         _sum += len(study.trials_dataframe())
        _sum += len(_df)
        i += 1

        nums[optuna_name] = [x*100 for x in _df.sort_values(['value']).value if not np.isnan(x)]
        print(str([f'{x:.2f}%' for x in nums[optuna_name]]).replace("'", ''))
        
    except Exception as e:
        print(e)
        
        pass
    print()
print()
print(_sum, "out of", len(pd.unique(optuna_labels)) * 100, "=", (_sum / (len(pd.unique(optuna_labels)) * 100)) * 100, "%")

SC:random-S:probabilities:#1
100 - F5 R6 38.18% 3003 SC:random-S:probabilities:#1
[38.18%, 39.89%, 40.06%, 40.26%, 41.27%, 42.44%, 42.49%, 42.77%, 43.29%, 43.33%, 43.43%, 43.51%, 43.55%, 44.47%, 44.64%, 45.17%, 45.36%, 45.46%, 46.41%, 46.44%, 46.51%, 46.84%, 46.91%, 46.97%, 47.02%, 47.28%, 47.73%, 47.77%, 47.78%, 49.62%, 50.13%, 50.27%, 50.43%, 50.57%, 50.68%, 50.87%, 51.25%, 51.79%, 52.93%, 53.12%, 53.58%, 54.02%, 54.09%, 54.83%, 55.02%, 55.48%, 56.07%, 56.09%, 56.60%, 56.65%, 56.95%, 57.07%, 57.82%, 57.92%, 58.24%, 58.30%, 58.69%, 58.80%, 58.92%, 59.07%, 59.16%, 59.25%, 59.47%, 60.77%, 61.15%, 61.78%, 63.01%, 63.94%, 65.21%, 67.02%, 68.55%, 68.80%, 69.29%, 69.51%, 69.64%, 69.79%, 70.06%, 71.28%, 72.02%, 72.03%, 72.63%, 73.83%, 74.56%, 74.62%, 75.12%, 75.55%, 76.25%, 78.23%, 79.24%, 79.90%, 80.48%, 81.84%, 82.35%, 82.86%, 82.87%, 83.62%, 84.12%, 85.00%, 86.47%, 87.74%]


100 out of 100 = 100.0 %


In [4]:
study.best_trial.params

{'random_station_percentages__0': 0,
 'random_station_percentages__1': 0,
 'random_station_percentages__2': 0,
 'random_station_percentages__3': 2,
 'random_station_percentages__4': 98}

In [5]:
study.trials_dataframe()

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_random_station_percentages__0,params_random_station_percentages__1,params_random_station_percentages__2,params_random_station_percentages__3,params_random_station_percentages__4,user_attrs_sacred_id,state
0,0,0.465088,2022-10-07 12:55:58.098017,2022-10-07 13:09:50.312414,0 days 00:13:52.214397,1,22,4,4,69,2946.0,COMPLETE
1,1,0.818359,2022-10-07 12:55:58.174579,2022-10-07 13:09:46.595725,0 days 00:13:48.421146,10,24,64,1,1,2950.0,COMPLETE
2,2,0.685547,2022-10-07 12:55:58.207204,2022-10-07 13:09:42.871129,0 days 00:13:44.663925,1,2,77,2,18,2948.0,COMPLETE
3,3,,2022-10-07 12:55:58.221474,2022-10-07 12:56:00.123380,0 days 00:00:01.901906,72,0,18,10,0,,FAIL
4,4,0.687988,2022-10-07 12:55:58.284340,2022-10-07 13:09:47.765169,0 days 00:13:49.480829,24,71,5,0,0,2949.0,COMPLETE
5,5,0.792358,2022-10-07 12:55:58.298673,2022-10-07 13:09:57.376632,0 days 00:13:59.077959,31,13,8,45,3,2951.0,COMPLETE
6,6,0.696411,2022-10-07 12:56:02.957445,2022-10-07 13:09:50.435265,0 days 00:13:47.477820,20,42,0,3,35,2952.0,COMPLETE
7,7,0.496216,2022-10-07 13:09:43.291179,2022-10-07 13:23:36.177287,0 days 00:13:52.886108,2,11,14,20,53,2953.0,COMPLETE
8,8,0.695068,2022-10-07 13:09:47.023869,2022-10-07 13:23:43.413812,0 days 00:13:56.389943,21,26,2,4,47,2954.0,COMPLETE
9,9,0.591553,2022-10-07 13:09:48.278254,2022-10-07 13:23:27.007821,0 days 00:13:38.729567,1,0,10,89,0,2955.0,COMPLETE


In [6]:
fig = go.Figure()

for i in range(5):
    fig.add_trace(go.Violin(x=_df[f'params_random_station_percentages__{i}'], # * 100,
                            name=f"TX-{'ABCDE'[i]}",
                            box_visible=False,
                            meanline_visible=True, points='all', pointpos=0.1, jitter=0.05))

fig.update_traces(orientation='h', side='positive', width=3)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
fig.update_xaxes(title="Station Probability (%)", range=(0, 100))
fig.update_yaxes(title="Station(TX-y)")
fig.show()

In [7]:
fig = go.Figure()

for i, (k, v) in enumerate(nums.items()):
    fig.add_trace(go.Violin(x=v,
                            box_visible=False,
                            meanline_visible=True, points='all', pointpos=0.1, jitter=0.05))

fig.update_traces(orientation='h', side='positive', width=3)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
fig.update_xaxes(title="Accuracy of Eave (%)", range=(0, 100))
fig.update_yaxes(title="Distribution")
fig.show()

In [8]:
p = [
    f'random_station_percentages__{i}' for i in range(5)
]

fig = optuna.visualization.plot_parallel_coordinate(study, params=p)

fig.data[0]['dimensions'][0]['values'] = [100 * x for x in fig.data[0]['dimensions'][0]['values']]
fig.data[0]['dimensions'][0]['range'] = [0, 100]

for i in range(5):
    fig.data[0]['dimensions'][i + 1]['label'] = f'TX-{"ABCDE"[i]} (probability)'
    fig.data[0]['dimensions'][i + 1]['range'] = [0, 100]

fig.show()

In [9]:
fig = optuna.visualization.plot_slice(study)
fig.get_subplot(col=1, row=1).yaxis['title']['text'] = 'Eave Accuracy (%)'
for i in range(5):
    fig.get_subplot(col=i+1, row=1).xaxis['title']['text'] = f'TX-{"ABCDE"[i]} (probabilities)'
fig.update_layout(width=1000, height=400)
fig.show()

## Confirm Results

Now that we found a `best_trial`, we need to confirm that it is the best and it is not actually just low Eave-accuracy because of random chance.

In [10]:
import ipywidgets as widgets
from helpers.config import config
from ml.train_and_evaluate import TrainAndEvaluate

ta = widgets.Textarea(layout={'width': '100%'})

sacred_key = "anti_eavesdrop"
client = pymongo.MongoClient(config("omniboard", f"{sacred_key}.mongodbURI"))
db_name = config("omniboard", f"{sacred_key}.path").replace("/","")

best_trial_sacred_id = study.best_trial.user_attrs['sacred_id']

top_trials = study.trials_dataframe().sort_values(['value']).iloc[0:5].copy(deep=True)
top_trials['rerun_label'] = ''

count = 0
for i, row in top_trials.iterrows():
    sacred_id = int(row['user_attrs_sacred_id'])
    sacred_db = client[db_name]
    runsDF = pd.DataFrame(list(sacred_db['runs'].find({
        '_id': {'$in': [sacred_id]}
    })))

    c = runsDF.config.iloc[0]
    del c['seed']
    if 'optuna' in c['experiment_params']:
        del c['experiment_params']['optuna']
    c['experiment_params']['experiment_group_label'] += f':sid-{sacred_id}:rerun'
    top_trials.rerun_label.loc[i] = c['experiment_params']['experiment_group_label']

    if sacred_id in []:
        continue

    for _ in range(15):
        ta.value += TrainAndEvaluate().tsp(
            c, 
            python_script="../projects/anti_eavesdrop/sc_optimizer/train_and_evaluate_SC.py",
            omniboard_config=sacred_key
        )  # + "\ntsp -u\n\n"
        count += 1

print("#", count)
ta

# 75


Textarea(value='tsp docker-compose exec -T jupyter ipython -c "%run ../projects/anti_eavesdrop/max_min_optimiz…

In [11]:
top_trials

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_random_station_percentages__0,params_random_station_percentages__1,params_random_station_percentages__2,params_random_station_percentages__3,params_random_station_percentages__4,user_attrs_sacred_id,state,rerun_label
57,57,0.381836,2022-10-07 14:59:01.799573,2022-10-07 15:06:58.410451,0 days 00:07:56.610878,0,0,0,2,98,3003.0,COMPLETE,SC:random-S:probabilities:#1:sid-3003:rerun
63,63,0.398926,2022-10-07 15:13:43.057769,2022-10-07 15:20:30.540303,0 days 00:06:47.482534,6,26,21,40,7,3009.0,COMPLETE,SC:random-S:probabilities:#1:sid-3009:rerun
87,87,0.400635,2022-10-08 03:47:09.792052,2022-10-08 03:50:16.099747,0 days 00:03:06.307695,6,29,25,36,4,3240.0,COMPLETE,SC:random-S:probabilities:#1:sid-3240:rerun
78,78,0.402588,2022-10-07 15:47:35.020101,2022-10-07 15:50:37.938517,0 days 00:03:02.918416,4,34,23,39,0,3024.0,COMPLETE,SC:random-S:probabilities:#1:sid-3024:rerun
60,60,0.41272,2022-10-07 15:06:58.761772,2022-10-07 15:13:42.331155,0 days 00:06:43.569383,2,13,21,59,5,3006.0,COMPLETE,SC:random-S:probabilities:#1:sid-3006:rerun


In [12]:
from incense import ExperimentLoader

url = config("omniboard", "anti_eavesdrop.mongodbURI")
db_name = config("omniboard", "anti_eavesdrop.path").replace("/", "")

loader = ExperimentLoader(
    mongo_uri=url,
    db_name=db_name
)

l = []

for i, row in top_trials.iterrows():
    experiments = loader.find({
        'status': 'COMPLETED',
        'config.experiment_params.experiment_group_label': {'$in': [row['rerun_label']]},
    })

    out = [100 * exp.metrics['C.validation accuracy'].tolist()[-1] for exp in experiments]

    probs = [str(row[f'params_random_station_percentages__{i}']) for i in range(5)]
    
    fig = go.Figure(layout={
        'title': f"{row['rerun_label']} (Expect: {row['value'] * 100:.1f}%) [{','.join(probs)}] #{len(experiments)}"
    })
    fig.add_trace(go.Violin(x=out,
                            box_visible=False,
                            meanline_visible=True, points='all', pointpos=0.1, jitter=0.05))

    l.append(np.mean(out))


    fig.update_traces(orientation='h', side='positive', width=3)

    fig.add_vline(
        row.value*100, line_dash="dash"
    )

    fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
    fig.update_xaxes(title="Accuracy of Eave (%)", range=(0, 100))
    fig.update_yaxes(title="Distribution")
    fig.show()
print(np.mean(l), np.median(l))

43.6982421875 40.935546875


## Compare all Experiments

In [13]:
optuna_labels = {
    'SC:random-S:probabilities':        'min=0',
    'SC:random-S:probabilities:min=5':  'min=5',
    'SC:random-S:probabilities:min=10': 'min=10',
    'SC:random-S:probabilities:min=15': 'min=15',
    'SC:random-S:probabilities:min=20': 'min=20',
}

import optuna


output = []
nums = {}
results = {
    'accuracy': {},
    'n_trials_completed': {},
    'best_sacred_id': {},
}
i = 0
_sum = 0
for optuna_name, new_name in optuna_labels.items():
    try:
        _df = None
        study = None
        print(optuna_name)
        study = optuna.load_study(
            storage=storage,
            study_name=optuna_name,
        )
        
        _df = study.trials_dataframe()
        _df = _df[_df.state == 'COMPLETE']
        
        print(
#             len(study.trials_dataframe()),
            len(_df),
            "-", 
            f"F{sum(study.trials_dataframe().state == 'FAIL')}",
            f"R{sum(study.trials_dataframe().state == 'RUNNING')}",
            f'{(study.best_trial.value*100):.2f}%', 
            study.best_trial.user_attrs['sacred_id'],
            optuna_name, 
        )
        
        results['accuracy'][optuna_name] = study.best_trial.value*100
        results['n_trials_completed'][optuna_name] = len(study.trials_dataframe())
        results['best_sacred_id'][optuna_name] = study.best_trial.user_attrs['sacred_id']
#         _sum += len(study.trials_dataframe())
        _sum += len(_df)
        i += 1

        nums[optuna_name] = [x*100 for x in _df.sort_values(['value']).value if not np.isnan(x)]
        
        output.append({
            'name': new_name,
            'eave_accuracies': nums[optuna_name],
            'study': study
        })
        
    except Exception as e:
        print(e)
        
        pass
    print()

SC:random-S:probabilities:#1
100 - F5 R6 38.18% 3003 SC:random-S:probabilities:#1

SC:random-S:probabilities:min=5:#1
105 - F2 R0 39.61% 3134 SC:random-S:probabilities:min=5:#1

SC:random-S:probabilities:min=10:#2
100 - F0 R0 43.49% 3399 SC:random-S:probabilities:min=10:#2

SC:random-S:probabilities:min=15:#1
100 - F0 R0 41.50% 3532 SC:random-S:probabilities:min=15:#1

SC:random-S:probabilities:min=20:#1
100 - F0 R0 56.32% 3821 SC:random-S:probabilities:min=20:#1



In [14]:
x = []
_means = []
_stdevs = []
for d in output:
    x.append(int(d['name'].split('=')[1]))
    _means.append(np.mean(d['eave_accuracies']))
    _stdevs.append(np.std(d['eave_accuracies']))

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=x, y=_means,
    error_y=dict(
        type='data', # value of error bar given in data coordinates
        array=_stdevs,
        visible=True)
))

fig.update_layout(title="FIGURE")
fig.update_xaxes(title="Min. Station Probability")
fig.update_yaxes(title="Eavesdropper Accuracy (%)")
fig.show('notebook_connected')

df = pd.DataFrame({'min_station_probability': x, 'mean': _means, 'stdev': _stdevs})
df.to_csv('/tmp/experiments_server/effect_of_min_station_probability.csv')
dl_btn('/tmp/experiments_server/effect_of_min_station_probability.csv', 'effect_of_min_station_probability.csv')

HTML(value='<html>\n    <head>\n    <meta name="viewport" content="width=device-width, initial-scale=1">\n    …

In [15]:
fig = go.Figure()

for d in output:
    fig.add_trace(go.Violin(x=d['eave_accuracies'],
                            name=d['name'],
                            box_visible=False,
                            meanline_visible=True, points='all', pointpos=0.1, jitter=0.05))

fig.update_traces(orientation='h', side='positive', width=3)
fig.update_layout(xaxis_showgrid=False, xaxis_zeroline=False)
fig.update_xaxes(title="Eavesdropper Accuracy", range=(0, 100))
fig.update_yaxes(title="Distributions")
fig.show()

In [16]:
for d in output:
    study = d['study']
    fig = optuna.visualization.plot_slice(study)
    fig.get_subplot(col=1, row=1).yaxis['title']['text'] = 'Eave Accuracy (%)'
    for i in range(5):
        sp = fig.get_subplot(col=i+1, row=1)
        sp.xaxis['title']['text'] = f'TX-{"ABCDE"[i]} (probabilities)'
        sp.xaxis['range'] = [0, 100]
    fig.update_layout(width=1000, height=400, title=d['name'])
    fig.show()

    study.trials_dataframe().to_csv(f'/tmp/experiments_server/study_results__{d["name"]}.csv')
    dl_btn(f'/tmp/experiments_server/study_results__{d["name"]}.csv', f'study_results__{d["name"]}.csv')

HTML(value='<html>\n    <head>\n    <meta name="viewport" content="width=device-width, initial-scale=1">\n    …

HTML(value='<html>\n    <head>\n    <meta name="viewport" content="width=device-width, initial-scale=1">\n    …

HTML(value='<html>\n    <head>\n    <meta name="viewport" content="width=device-width, initial-scale=1">\n    …

HTML(value='<html>\n    <head>\n    <meta name="viewport" content="width=device-width, initial-scale=1">\n    …

HTML(value='<html>\n    <head>\n    <meta name="viewport" content="width=device-width, initial-scale=1">\n    …

In [17]:
for d in output:
    study = d['study']

    p = [
        f'random_station_percentages__{i}' for i in range(5)
    ]

    fig = optuna.visualization.plot_parallel_coordinate(study, params=p)

    fig.data[0]['dimensions'][0]['values'] = [100 * x for x in fig.data[0]['dimensions'][0]['values']]
    fig.data[0]['dimensions'][0]['range'] = [0, 100]

    for i in range(5):
        fig.data[0]['dimensions'][i + 1]['label'] = f'TX-{"ABCDE"[i]} (probability)'
        fig.data[0]['dimensions'][i + 1]['range'] = [0, 100]

    fig.update_layout(title=d['name'])
        
    fig.show()

In [18]:

from incense import ExperimentLoader

url = config("omniboard", "anti_eavesdrop.mongodbURI")
db_name = config("omniboard", "anti_eavesdrop.path").replace("/", "")

loader = ExperimentLoader(
    mongo_uri=url,
    db_name=db_name
)

for d in output[0:1]:
    study = d['study']

    top_trials = study.trials_dataframe().sort_values(['value']).iloc[0:5].copy(deep=True)
    top_trials['rerun_label'] = ''

    count = 0
    for i, row in top_trials.iterrows():
        sacred_id = int(row['user_attrs_sacred_id'])
        sacred_db = client[db_name]
        runsDF = pd.DataFrame(list(sacred_db['runs'].find({
            '_id': {'$in': [sacred_id]}
        })))

        c = runsDF.config.iloc[0]
        del c['seed']
        if 'optuna' in c['experiment_params']:
            del c['experiment_params']['optuna']
        c['experiment_params']['experiment_group_label'] += f':sid-{sacred_id}:rerun'
        top_trials.rerun_label.loc[i] = c['experiment_params']['experiment_group_label']

    print("\\hline")
    for i, row in reversed(list(top_trials.iterrows())):
        experiments = loader.find({
            'status': 'COMPLETED',
            'config.experiment_params.experiment_group_label': {'$in': [row['rerun_label']]},
        })

        out = [100 * exp.metrics['C.validation accuracy'].tolist()[-1] for exp in experiments]

        probs = [str(row[f'params_random_station_percentages__{i}']) for i in range(5)]
        
        expected = row.value*100
        actual = np.mean(out)
        _stdev = np.std(out)
        print(f"{'% & '.join(probs)}% & {expected:.2f}% & {actual:.2f}% & ({actual-expected:+.2f}%) \\\\\n\\hline".replace('%', '\%'))



\hline
2\% & 13\% & 21\% & 59\% & 5\% & 41.27\% & 44.73\% & (+3.45\%) \\
\hline
4\% & 34\% & 23\% & 39\% & 0\% & 40.26\% & 40.60\% & (+0.34\%) \\
\hline
6\% & 29\% & 25\% & 36\% & 4\% & 40.06\% & 40.94\% & (+0.87\%) \\
\hline
6\% & 26\% & 21\% & 40\% & 7\% & 39.89\% & 40.85\% & (+0.96\%) \\
\hline
0\% & 0\% & 0\% & 2\% & 98\% & 38.18\% & 51.38\% & (+13.19\%) \\
\hline
