In [2]:
import pandas as pd

In [73]:
# df = pd.read_csv("../hawkdove_risk-variable_2023-09-26T115400_410252.csv")

# updated batch run csv that includes agent points and only goes for 200 iterations
df = pd.read_csv("../hawkdove_risk-variable_2023-09-27T171431_729869.csv")


In [74]:
df.head()

Unnamed: 0,RunId,iteration,Step,grid_size,risk_attitudes,max_agent_points,percent_hawk,AgentID,risk_level,choice,points
0,0,0,0,20,variable,24.0,0.4975,,,,
1,0,0,1,20,variable,42.0,0.215,0.0,8.0,dove,10.2
2,0,0,1,20,variable,42.0,0.215,1.0,8.0,hawk,9.0
3,0,0,1,20,variable,42.0,0.215,2.0,8.0,dove,13.5
4,0,0,1,20,variable,42.0,0.215,3.0,6.0,hawk,12.0


In [75]:
# can we filter model data from agent data based on presence of agent id?
model_df = df[df.AgentID.isna()]
model_df.head()

Unnamed: 0,RunId,iteration,Step,grid_size,risk_attitudes,max_agent_points,percent_hawk,AgentID,risk_level,choice,points
0,0,0,0,20,variable,24.0,0.4975,,,,
80001,1,1,0,20,variable,21.0,0.515,,,,
160002,2,2,0,20,variable,21.0,0.52,,,,
240003,3,3,0,20,variable,21.0,0.54,,,,
320004,4,4,0,20,variable,24.0,0.4775,,,,


In [76]:
# in variable risk mode we don't have agent risk level; individual agents report their risk level
run_df = df[['RunId', 'iteration', 'Step', 'percent_hawk']]

In [77]:
run_df = run_df.drop_duplicates()
run_df

Unnamed: 0,RunId,iteration,Step,percent_hawk
0,0,0,0,0.4975
1,0,0,1,0.2150
401,0,0,2,0.6175
801,0,0,3,0.5175
1201,0,0,4,0.3300
...,...,...,...,...
398005,4,4,196,0.3775
398405,4,4,197,0.5450
398805,4,4,198,0.5050
399205,4,4,199,0.3775


In [79]:
import altair as alt

alt.data_transformers.disable_max_rows()

alt.Chart(run_df).mark_line().encode(
    x='Step',
    y='percent_hawk',
    color='RunId:N',
).properties(
    width=800,
    height=300
)

In [80]:
runzero = run_df[run_df.RunId == 0]
runzero

Unnamed: 0,RunId,iteration,Step,percent_hawk
0,0,0,0,0.4975
1,0,0,1,0.2150
401,0,0,2,0.6175
801,0,0,3,0.5175
1201,0,0,4,0.3300
...,...,...,...,...
78001,0,0,196,0.3775
78401,0,0,197,0.5325
78801,0,0,198,0.5125
79201,0,0,199,0.3775


In [81]:
run_one = run_df[run_df.RunId == 1]


In [83]:
run_zero_chart = alt.Chart(runzero).mark_line().encode(
    x='Step', # alt.X('Step', scale=alt.Scale(domain=[0, 1])),
    y='percent_hawk',
    # color='agent_risk_level:N',
).properties(
    width=800,
    height=300
)
run_zero_chart

In [84]:
# how to work with this oscillating pattern of alternating hawks?
# can we use a rolling mean?

line = alt.Chart(runzero[runzero.Step < 300]).mark_line(
    color='red',
    size=3
).transform_window(
    rolling_mean='mean(percent_hawk)',
    frame=[-15, 15]
).encode(
    x='Step',
    y='rolling_mean:Q'
).properties(
    width=800,
    height=300
)

points = alt.Chart(runzero[runzero.Step < 300]).mark_line().encode(
    x='Step',
    y='percent_hawk'
)

points + line


In [87]:
# create and display charts for each run - should be about the same, since same starting conditions

# this stabilizes quickly, so only display first 200 rounds

charts = []

total_runs = len(run_df.RunId.unique())

for i in range(total_runs):
    run_i = run_df[(run_df.RunId == i) & (run_df.Step < 200)]
    run_chart = alt.Chart(run_i).mark_line().encode(
        x='Step',
        y=alt.Y('percent_hawk', scale=alt.Scale(domain=[0, 1.0]))
        # color='agent_risk_level:N',
    ).properties(
        title=f'Run {i}',
        width=800,
        height=90
    )
    charts.append(run_chart)

combined_chart = None
for c in charts:
    if combined_chart is None:
        combined_chart = c
    else:
        combined_chart = alt.vconcat(combined_chart, c)

combined_chart

In [88]:
# do the same thing, but display beginning instead of end and add the rolling mean

rollmean_charts = []

for i in range(total_runs):
    run_i = run_df[run_df.RunId == i]
    run_chart = alt.Chart(run_i).mark_line().encode(
        x='Step',
        y=alt.Y('percent_hawk', scale=alt.Scale(domain=[0, 1.0]))
        # color='agent_risk_level:N',
    ).properties(
        title=f'Run {i}',
        width=800,
        height=90
    )
    # graph the rolling mean
    rollmean_line = alt.Chart(run_i).mark_line(
        color='red',
        size=3
    ).transform_window(
        rolling_mean='mean(percent_hawk)',
        frame=[-15, 15]
    ).encode(
        x='Step',
        y='rolling_mean:Q'
    # ).properties(
        # width=800,
        # height=300
    )
    
    rollmean_charts.append(run_chart + rollmean_line)

rollmean_combined_chart = None
for c in rollmean_charts:
    if rollmean_combined_chart is None:
        rollmean_combined_chart = c
    else:
        rollmean_combined_chart = alt.vconcat(rollmean_combined_chart, c)

rollmean_combined_chart

In [89]:
last_step = run_df.Step.max()

In [90]:
df[df.Step == last_step].head()

Unnamed: 0,RunId,iteration,Step,grid_size,risk_attitudes,max_agent_points,percent_hawk,AgentID,risk_level,choice,points
79601,0,0,200,20,variable,4210.8,0.5325,0.0,8.0,dove,2406.3
79602,0,0,200,20,variable,4210.8,0.5325,1.0,8.0,dove,2185.1
79603,0,0,200,20,variable,4210.8,0.5325,2.0,8.0,dove,2121.4
79604,0,0,200,20,variable,4210.8,0.5325,3.0,6.0,dove,2119.9
79605,0,0,200,20,variable,4210.8,0.5325,4.0,7.0,dove,2266.6


In [91]:
# we want to calculate percent hawk by risk level per round

# create a minimal df with step, and risk level
# NOTE: collapsing all runs together
phawk_by_risk = df[df.risk_level.notna()][['Step', 'risk_level']].drop_duplicates().copy()
phawk_by_risk.head()

Unnamed: 0,Step,risk_level
1,1,8.0
4,1,6.0
5,1,7.0
6,1,1.0
11,1,3.0


In [92]:
phawk_by_risk

Unnamed: 0,Step,risk_level
1,1,8.0
4,1,6.0
5,1,7.0
6,1,1.0
11,1,3.0
...,...,...
79611,200,3.0
79614,200,2.0
79617,200,4.0
79621,200,0.0


In [93]:
def calculate_percent_hawk(step):
    # step_df = df[(df.risk_level == step.risk_level) & (df.RunId == step.RunId) & (df.Step == step.Step)]
    step_df = df[(df.risk_level == step.risk_level) & (df.Step == step.Step)]
    # in at least some cases, hawk is not present which results in an attribute error; assume 0% hawks
    try:
        # number of hawks / total number of agents
        return step_df.choice.value_counts().hawk / len(step_df.choice)
    except AttributeError:
        return 0

phawk_by_risk['percent_hawk'] = phawk_by_risk.apply(lambda row: calculate_percent_hawk(row), axis=1)
phawk_by_risk.head(10)

Unnamed: 0,Step,risk_level,percent_hawk
1,1,8.0,0.486726
4,1,6.0,0.497797
5,1,7.0,0.546256
6,1,1.0,0.461905
11,1,3.0,0.518182
14,1,2.0,0.557851
17,1,4.0,0.553488
21,1,0.0,0.476415
25,1,5.0,0.484163
401,2,8.0,0.0


In [94]:
alt.Chart(phawk_by_risk).mark_line().encode(
    x='Step:Q',
    y=alt.Y('percent_hawk', scale=alt.Scale(domain=[0, 1.0])),
    color='risk_level:N',
).properties(
    title=f'Percent hawk by risk level',
    width=800,
    height=500
)

In [95]:
# calculate a rolling average
phawk_by_risk['rolling_percent_hawk'] = phawk_by_risk.percent_hawk.rolling(window=10).mean()
phawk_by_risk.head(10)


Unnamed: 0,Step,risk_level,percent_hawk,rolling_percent_hawk
1,1,8.0,0.486726,
4,1,6.0,0.497797,
5,1,7.0,0.546256,
6,1,1.0,0.461905,
11,1,3.0,0.518182,
14,1,2.0,0.557851,
17,1,4.0,0.553488,
21,1,0.0,0.476415,
25,1,5.0,0.484163,
401,2,8.0,0.0,0.458278


In [96]:
alt.Chart(phawk_by_risk).mark_line().encode(
    x='Step:Q',
    y=alt.Y('rolling_percent_hawk', scale=alt.Scale(domain=[0, 1.0])),
    color='risk_level:N',
).properties(
    title=f'Rolling average Percent hawk by risk level',
    width=800,
    height=500
)

In [97]:
# what about points?

# get points at the last round only, so we're looking at the end state

last_round_n = df.Step.max()

last_round = df[df.Step == last_round_n]
last_round

Unnamed: 0,RunId,iteration,Step,grid_size,risk_attitudes,max_agent_points,percent_hawk,AgentID,risk_level,choice,points
79601,0,0,200,20,variable,4210.8,0.5325,0.0,8.0,dove,2406.3
79602,0,0,200,20,variable,4210.8,0.5325,1.0,8.0,dove,2185.1
79603,0,0,200,20,variable,4210.8,0.5325,2.0,8.0,dove,2121.4
79604,0,0,200,20,variable,4210.8,0.5325,3.0,6.0,dove,2119.9
79605,0,0,200,20,variable,4210.8,0.5325,4.0,7.0,dove,2266.6
...,...,...,...,...,...,...,...,...,...,...,...
400000,4,4,200,20,variable,4809.0,0.5450,395.0,7.0,dove,2265.5
400001,4,4,200,20,variable,4809.0,0.5450,396.0,7.0,dove,2334.8
400002,4,4,200,20,variable,4809.0,0.5450,397.0,8.0,dove,2773.7
400003,4,4,200,20,variable,4809.0,0.5450,398.0,1.0,hawk,2403.4


In [98]:

points_mean = last_round.groupby('risk_level', as_index=False).aggregate('points').mean() # : ['mean', 'sum']})
points_mean

Unnamed: 0,risk_level,points
0,0.0,2818.093396
1,1.0,2785.355238
2,2.0,2705.011983
3,3.0,2647.818636
4,4.0,2583.976744
5,5.0,2486.330769
6,6.0,2454.14978
7,7.0,2464.065639
8,8.0,2444.124779


In [99]:
alt.Chart(points_mean).mark_bar().encode(
    x=alt.Y('risk_level:N', title='risk level'),
    y=alt.Y('points', title='average points'),
).properties(
    title='average points by risk level',
    width=500,
    height=400
)

In [100]:
# what about min/max?

# aggregrate each count by risk level and type so we can graph together in altair
points_mean['type'] = 'mean'

points_min = last_round.groupby('risk_level', as_index=False).aggregate('points').min()
points_min['type'] = 'min'

points_max = last_round.groupby('risk_level', as_index=False).aggregate('points').max()
points_max['type'] = 'max'

points_combined = pd.concat([points_mean, points_min, points_max])

points_combined

Unnamed: 0,risk_level,points,type
0,0.0,2818.093396,mean
1,1.0,2785.355238,mean
2,2.0,2705.011983,mean
3,3.0,2647.818636,mean
4,4.0,2583.976744,mean
5,5.0,2486.330769,mean
6,6.0,2454.14978,mean
7,7.0,2464.065639,mean
8,8.0,2444.124779,mean
0,0.0,1212.0,min


In [102]:
alt.Chart(points_combined).mark_line().encode(
    x=alt.Y('risk_level:N', title='risk level'),
    y=alt.Y('points', title='average points'),
    color='type'
).properties(
    title='points by risk level',
    width=500,
    height=400
)

In [104]:
alt.Chart(last_round).mark_boxplot(extent="min-max").encode(
    alt.Y("points:Q").scale(zero=False),
    alt.X("risk_level:N", title="risk level"),
).properties(
    title='range of points per agent by risk level',
    width=500,
    height=400
)


In [109]:
# display the same information as a table
# for each run (= risk level), what are upper and lower values for individual points?

points = []

for i in range(9) :
    run_i = last_round[last_round.risk_level == i]
    # add one entry for each value with a type, so we can graph all at once in altair with a legend
    points.append({
        'risk_level': i, 
        'max': run_i.points.max(), 
        'mean': run_i.points.mean(), 
        'min': run_i.points.min()
    })

points_df = pd.DataFrame(points)
points_df

Unnamed: 0,risk_level,max,mean,min
0,0,4182.0,2818.093396,1212.0
1,1,4187.6,2785.355238,1417.5
2,2,4785.0,2705.011983,1643.6
3,3,4189.5,2647.818636,1590.5
4,4,4384.4,2583.976744,1786.2
5,5,3976.4,2486.330769,1664.9
6,6,3699.6,2454.14978,2037.8
7,7,3361.8,2464.065639,1825.1
8,8,2994.9,2444.124779,1823.3
