In [84]:
%load_ext autoreload
%autoreload 2


import os
import pandas as pd
from scripts.utils.utils import load_yaml, make_dir, save_json, load_json

run = '1000000'
networks_json = f'../data/{run}/generation/networks.json'
solutions_df = f'../data/{run}/solutions/solutions.parquet.gzip'

selected_folder =  f'../data/{run}/selected'



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Loading data

We are loading the solutions calculated in a previous step. Each row represents an individual action of an individual solution. The dataset contains all solutions of all networks with regret less then 105.

In [85]:
df = pd.read_parquet(solutions_df)
df.head()

Unnamed: 0,network_id,solution_id,actions,action,step,source_node,target_node,reward,other_reward,cum_reward,2step_lookahead,other_2step_lookahead,lookahead,other_lookahead,total_reward,total_regret
0,42_0,58,111010,0,0,1,0,140.0,140.0,140.0,160.0,120.0,520.0,520.0,440.0,80.0
1,42_0,58,111010,0,1,0,1,-100.0,20.0,40.0,40.0,160.0,300.0,380.0,440.0,80.0
2,42_0,58,111010,1,2,1,2,140.0,140.0,180.0,120.0,160.0,400.0,360.0,440.0,80.0
3,42_0,58,111010,1,3,2,1,-20.0,-20.0,160.0,120.0,0.0,260.0,180.0,440.0,80.0
4,42_0,58,111010,1,4,1,2,140.0,140.0,300.0,120.0,160.0,280.0,200.0,440.0,80.0


# Metrics at the level of actions

First we are evaluating individual actions of potential solutions.

In [86]:
groupby = ['network_id', 'solution_id']

# reward is strictly larger then alternative reward
df['myopic'] = df['reward'] > df['other_reward']
# rewards are identical
df['indifferent'] = df['reward'] == df['other_reward']
# the maximum reward of the next two steps
df['2step_myopic'] = df['2step_lookahead'] > df['other_2step_lookahead']
# action is strictly myopic, or if indifferent has the larger two step reward
df['loose_myopic'] = df['myopic'] | (df['indifferent'] & df['2step_myopic'])


# reward is -100 and other reward is different
df['large_loss'] = (df['reward'] == -100) & ~df['indifferent']
# large loss in the first two steps
df['early_large_loss'] = df['large_loss'] & (df['step'] == 1)
#  steps after the first two are loosely myopic
df['later_loose_myopic'] = df['myopic'] | (df['step'] == 1) # test


# Aggregated metrics at the level of solutions

In [87]:

# all actions of solution are loosly myopic
df['all_loose_myopic'] = df.groupby(groupby)['loose_myopic'].transform('all')
# number of unique nodes in the solution
df['unique_nodes'] = df.groupby(groupby)['source_node'].transform('nunique')
# solution has a large loss in the first two actions
df['any_early_large_loss'] = df.groupby(groupby)['early_large_loss'].transform('any')
# all actions after the first two are myopic
df['all_later_loose_myopic'] = df.groupby(groupby)['later_loose_myopic'].transform('all')

# Selection of solution into three (partly disjunct) categories

We select from each category the best solution as reference.

In [88]:
w_alien = (
    df['any_early_large_loss'] &
    df['all_later_loose_myopic'] &
    (df['unique_nodes'] >= 6)
)
w_decoy = (
    df['all_loose_myopic']
)
w_other = ~df['any_early_large_loss']

In [89]:
# get minimal regret per solution category and network
alien = df[w_alien].groupby('network_id')['total_regret'].min()
decoy = df[w_decoy].groupby('network_id')['total_regret'].min()
other = df[w_other].groupby('network_id')['total_regret'].min()

# get cumulative reward at step 4 for the best alien and the best decoy solution
alien_idx = df[w_alien].set_index(['network_id', 'solution_id']).groupby('network_id')['total_regret'].idxmin()
alien_sol = df.set_index(['network_id', 'solution_id']).loc[alien_idx]
alien_step_4_reward = alien_sol.loc[alien_sol['step'] == 4, 'cum_reward']
alien_step_4_reward.index = alien_step_4_reward.index.droplevel('solution_id')

decoy_idx = df[w_decoy].set_index(['network_id', 'solution_id']).groupby('network_id')['total_regret'].idxmin()
decoy_sol = df.set_index(['network_id', 'solution_id']).loc[decoy_idx]
decoy_step_4_reward = decoy_sol.loc[decoy_sol['step'] == 4, 'cum_reward']
decoy_step_4_reward.index = decoy_step_4_reward.index.droplevel('solution_id')

best = pd.DataFrame({
    'alien': alien,
    'decoy': decoy,
    'other': other,
    'decoy_step_4_reward': decoy_step_4_reward,
    'alien_step_4_reward': alien_step_4_reward
})

# Select of networks

We select a network if,
* the best solution is `alien`
* the next best non alien solution is at least 40 points worse
* there is a decoy solution not more then 60 points worse
* the decoy solution as a higher cumulative payoff after the first 4 steps

In [None]:
w = (
    (best['alien'] == 0) &
    (best['other'] >= 40) &
    (best['decoy'] <= 60) &
    (best['decoy_step_4_reward'] > best['alien_step_4_reward'])
)

print(f'There are {w.sum()} networks fitting the criteria.')

selected_network_ids = w[w].index.tolist()

There are 56 networks fitting the criteria.


# Create test and train set

We split the selected networks into train and test, and save the corresponding networks.

In [None]:
import random
test_ids = random.sample(selected_network_ids, 10)
training_ids = list(set(selected_network_ids) - set(test))

In [None]:
networks = load_json(networks_json)

make_dir(selected_folder)
test_file = os.path.join(selected_folder, 'test.json')
train_file = os.path.join(selected_folder, 'train.json')

net_max_r = df.groupby('network_id')['total_reward'].max()

test_networks = [
    {**n, 'max_reward': net_max_r[n['network_id']]} 
    for n in networks if n['network_id'] in test_ids
]
train_networks = [
    {**n, 'max_reward': net_max_r[n['network_id']]} 
    for n in networks if n['network_id'] in training_ids
]

save_json(test_networks, test_file)
save_json(train_networks, train_file)