In [2]:
from docopt import docopt
import pandas as pd
from scripts.utils import store
from scripts.utils.utils import load_yaml
from scripts.pruning_models.classification import apply_models
from scripts.preselection import reward_features, node_trace, complexity

parameter_yaml = '../params/10000/preselection.yml'
networks_json = '../data/10000/generation/networks.json'
solutions_df = '../data/10000/solutions/solutions.parquet.gzip'

networks = store.load_json(networks_json)

In [50]:
df_metrics = pd.read_parquet(solutions_df)
df_metrics['isRisky'] = (df_metrics['Action_Reward'] == -100) & (df_metrics['Other_Reward'] != -100)
df_metrics['iNoWin'] = (df_metrics['Action_Reward'] != 140)
df_metrics['isBest'] = (df_metrics['Total_Regret'] == 0)

w = df_metrics[df_metrics['Action_IDx'] <= 1].groupby(['Environment_ID', 'Solution_IDx'])['isRisky'].any()
df_metrics = df_metrics.set_index(['Environment_ID', 'Solution_IDx'])
df_metrics['first2Risky'] = w
df_metrics = df_metrics.reset_index()


w = df_metrics[df_metrics['Action_IDx'] <= 3].groupby(['Environment_ID', 'Solution_IDx'])['iNoWin'].all()
df_metrics = df_metrics.set_index(['Environment_ID', 'Solution_IDx'])
df_metrics['first4NoWin'] = w
df_metrics = df_metrics.reset_index()


df_metrics['Cum_Action_Reward'] = df_metrics.groupby(['Environment_ID', 'Solution_IDx'])['Action_Reward'].cumsum()
df_metrics['anyfirst2Risky'] = df_metrics.groupby(['Environment_ID'])['first2Risky'].transform('any')

In [51]:
ws = (df_metrics['first2Risky'] & df_metrics['first4NoWin'])
wb = df_metrics['isBest']

any_pos = (ws & wb).groupby(df_metrics['Environment_ID']).transform('any')
any_neg = (~ws & wb).groupby(df_metrics['Environment_ID']).transform('any')

w = any_pos & ~any_neg
# df_metrics[w]

In [54]:
df_metrics[w & wb].head(32)

Unnamed: 0,Environment_ID,Solution_IDx,Starting_Node,Total_Reward,Lookahead_Reward,Action_IDx,Source_Node,Target_Node,Action_Reward,Other_Reward,Total_Regret,isRisky,iNoWin,isBest,first2Risky,first4NoWin,Cum_Action_Reward,anyfirst2Risky
10498072,42_5126,3,5,400,400,0,5,3,20,-100,0,False,True,True,True,True,20,True
10498073,42_5126,3,5,400,400,1,3,2,20,140,0,True,True,True,True,True,40,True
10498074,42_5126,3,5,400,400,2,2,0,-20,20,0,False,True,True,True,True,20,True
10498075,42_5126,3,5,400,400,3,0,1,-20,-20,0,False,True,True,True,True,0,True
10498076,42_5126,3,5,400,400,4,1,0,140,140,0,False,False,True,True,True,140,True
10498077,42_5126,3,5,400,400,5,0,1,-20,-20,0,False,True,True,True,True,120,True
10498078,42_5126,3,5,400,400,6,1,3,140,140,0,False,False,True,True,True,260,True
10498079,42_5126,3,5,400,400,7,3,4,140,20,0,False,False,True,True,True,400,True
12343848,42_6027,69,5,0,0,0,5,0,-20,20,0,False,True,True,True,True,-20,True
12343849,42_6027,69,5,0,0,1,0,4,-100,20,0,True,True,True,True,True,-120,True


In [53]:
df_metrics[w & wb]['Environment_ID'].nunique()

5

In [16]:






df_metrics['bestInGroup'] = df_metrics.groupby(['Environment_ID', 'first2Risky','Action_IDx'])['Cum_Action_Reward'].transform('max') == df_metrics['Cum_Action_Reward']
df_metrics['bestInGroupTotal'] = df_metrics.groupby(['Environment_ID', 'first2Risky'])['Total_Regret'].transform('max') == df_metrics['Total_Regret']

cum_action_reward = df_metrics[df_metrics['bestInGroup']].groupby(['Environment_ID', 'first2Risky','Action_IDx'])['Cum_Action_Reward'].mean()

risky_shorttime_worse = cum_action_reward.loc[:,False,3] > cum_action_reward.loc[:,True,3]

df_metrics2 = df_metrics[df_metrics['Environment_ID'].isin(risky_shorttime_worse[risky_shorttime_worse].index)]

cum_action_reward2 = df_metrics2[df_metrics2['bestInGroup']].groupby(['Environment_ID', 'first2Risky','Action_IDx'])['Cum_Action_Reward'].max()

risky_longtime_better = cum_action_reward2.loc[:,False,7] < cum_action_reward2.loc[:,True,7]

In [17]:
df_metrics3 = df_metrics[df_metrics['Environment_ID'].isin(risky_longtime_better[risky_longtime_better].index)]

In [18]:
df_metrics3[df_metrics3['bestInGroupTotal']].sort_values(['Environment_ID', 'first2Risky', 'Action_IDx']).head(32)

Unnamed: 0,Environment_ID,Solution_IDx,Starting_Node,Total_Reward,Lookahead_Reward,Action_IDx,Source_Node,Target_Node,Action_Reward,Other_Reward,Total_Regret,isRisky,first2Risky,Cum_Action_Reward,anyfirst2Risky,notfirst2Risky,anynotfirst2Risky,bestInGroup,bestInGroupTotal
2050208,42_1001,20,5,360,440,0,5,3,140,140,-80,False,False,140,True,True,True,True,True
2050224,42_1001,22,5,360,440,0,5,3,140,140,-80,False,False,140,True,True,True,True,True
2051232,42_1001,148,5,360,440,0,5,4,140,140,-80,False,False,140,True,True,True,True,True
2051248,42_1001,150,5,360,440,0,5,4,140,140,-80,False,False,140,True,True,True,True,True
2050209,42_1001,20,5,360,440,1,3,0,20,-100,-80,False,False,160,True,True,True,True,True
2050225,42_1001,22,5,360,440,1,3,0,20,-100,-80,False,False,160,True,True,True,True,True
2051233,42_1001,148,5,360,440,1,4,0,20,-100,-80,False,False,160,True,True,True,True,True
2051249,42_1001,150,5,360,440,1,4,0,20,-100,-80,False,False,160,True,True,True,True,True
2050210,42_1001,20,5,360,440,2,0,3,20,-100,-80,False,False,180,True,True,True,True,True
2050226,42_1001,22,5,360,440,2,0,3,20,-100,-80,False,False,180,True,True,True,True,True


In [23]:
df_metrics3[df_metrics3['Environment_ID'] == '42_1001']['Total_Regret'].value_counts()

-280    272
-320    272
-200    248
-120    232
-360    208
-480    176
-240    160
-440    120
-400    112
-520     64
-40      56
-80      48
-160     32
 0       16
-600     16
-640     16
Name: Total_Regret, dtype: int64

In [24]:
df_metrics2[df_metrics2['Environment_ID'] == '42_1001']['Total_Regret'].value_counts()

-280    272
-320    272
-200    248
-120    232
-360    208
-480    176
-240    160
-440    120
-400    112
-520     64
-40      56
-80      48
-160     32
 0       16
-600     16
-640     16
Name: Total_Regret, dtype: int64

In [13]:
cum_action_reward2.loc[:,False,7] < cum_action_reward2.loc[:,True,7]

Environment_ID
42_0       False
42_100     False
42_1000    False
42_1001     True
42_1002    False
           ...  
42_9994    False
42_9995    False
42_9996    False
42_9998    False
42_9999    False
Name: Cum_Action_Reward, Length: 7275, dtype: bool

In [14]:
cum_action_reward2.loc[:,True,7]

Environment_ID
42_0       320
42_100     640
42_1000    640
42_1001    440
42_1002    400
          ... 
42_9994    360
42_9995    400
42_9996    480
42_9998    480
42_9999     80
Name: Cum_Action_Reward, Length: 7275, dtype: int64

In [15]:
cum_action_reward2.loc[:,False,7]

Environment_ID
42_0       320
42_100     880
42_1000    760
42_1001    360
42_1002    560
          ... 
42_9994    640
42_9995    640
42_9996    520
42_9998    800
42_9999    360
Name: Cum_Action_Reward, Length: 7275, dtype: int64

In [119]:
df_metrics3['Environment_ID'].value_counts()

42_2624    2048
42_3341    2048
42_5541    2048
42_1999    2048
42_6225    2048
           ... 
42_6376    2048
42_9696    2048
42_4678    2048
42_7702    2048
42_9944    2048
Name: Environment_ID, Length: 235, dtype: int64