In [4]:
import pandas as pd
import numpy as np
import altair as alt
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from matplotlib import cm
import os

In [7]:
security_incidents_clean = pd.read_csv("../data/processed/security_incidents_clean.csv")

In [8]:
security_incidents_clean.head(2)

Unnamed: 0,Incident ID,Year,Month,Country Code,Country,Region,District,City,UN,INGO,...,Means of attack,Attack context,Location,Motive,Actor type,Actor name,Details,Verified,Source,Group
0,1,1997,1,KH,Cambodia,Banteay Meanchey,Unknown,Unknown,0,0,...,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,1 ICRC national staff killed while working in ...,Archived,Archived,Asia
1,2,1997,1,RW,Rwanda,Northern,Musanze,Ruhengeri,0,4,...,Shooting,Raid,Office/compound,Unknown,Unknown,Unknown,"3 INGO international (Spanish) staff killed, 1...",Archived,Archived,Africa


# Total affected across the world

In [276]:
# Convert Year to string (Altair handles x-axis better this way)
security_incidents_clean['Year'] = security_incidents_clean['Year'].astype(str)

# Group by year
yearly = security_incidents_clean.groupby('Year')['Total affected'].sum().reset_index()

# Get tick labels for every 3rd year
tick_values = yearly['Year'][::3].tolist()

# Line chart
line = alt.Chart(yearly).mark_line(
    color='red'
).encode(
    x=alt.X('Year:N', title='Year', axis=alt.Axis(values=tick_values, labelAngle=0, 
                labelFont='Courier New',
                labelFontSize=14)),
    y=alt.Y('Total affected:Q', title='Total Affected', axis=alt.Axis(grid=False, 
                labelFont='Courier New',
                labelFontSize=14)),
    tooltip=[
        alt.Tooltip('Year:N'),
        alt.Tooltip('Total affected:Q')
    ]
).properties(
    title='Total Aid Workers Affected Over Time',
    width=700,
    height=400
)

# Red markers
points = alt.Chart(yearly).mark_point(
    color='red',
    filled=True,
    size=80
).encode(
    x='Year:N',
    y='Total affected:Q',
    tooltip=[
        alt.Tooltip('Year:N'),
        alt.Tooltip('Total affected:Q')
    ]
)

final_chart = (line + points).interactive().configure_axis(
    titleFont='Courier New',
    titleFontSize=16
).configure_view(
    stroke=None
).configure_title(
    font='Courier New',
    fontSize=18
)
final_chart.save('../plots/total_affected_over_time.html')
final_chart



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



# Group wise total affected

In [267]:
security_incidents_clean['Year'] = security_incidents_clean['Year'].astype(str)

In [268]:
# Africa
africa = security_incidents_clean[security_incidents_clean['Group'] == 'Africa']
africa_grouped = africa.groupby('Year')['Total affected'].sum().reset_index()
tick_values = africa_grouped['Year'][::3].tolist()

africa_chart = (
    alt.Chart(africa_grouped).mark_line(color='red')
    .encode(
        x=alt.X('Year:N',
            axis=alt.Axis(
                values=tick_values,
                labelAngle=0,
                labelFont='Courier New',
                labelFontSize=14
            ),
            title='Year'
        ),
        y=alt.Y('Total affected:Q',
            axis=alt.Axis(
                grid=False,
                labelFont='Courier New',
                labelFontSize=14
            ),
            title='Total Affected'
        ),
        tooltip=['Year:N', 'Total affected:Q']
    )
    .properties(
        title='Africa: Total Aid Workers Affected Over Time',
        width=700,
        height=400
    )
    + alt.Chart(africa_grouped).mark_point(color='red', filled=True, size=80)
    .encode(
        x='Year:N',
        y='Total affected:Q',
        tooltip=['Year:N', 'Total affected:Q']
    )
).interactive().configure_view(
    stroke=None
).configure_title(
    font='Courier New',
    fontSize=18,
    anchor='start'
).configure_axis(
    titleFont='Courier New',
    titleFontSize=16
).configure_title(
    font='Courier New',
    fontSize=18
)

africa_chart.save("../plots/africa_total_affected.html")
africa_chart



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [269]:
# Asia
asia = security_incidents_clean[security_incidents_clean['Group'] == 'Asia']
asia_grouped = asia.groupby('Year')['Total affected'].sum().reset_index()
tick_values = asia_grouped['Year'][::3].tolist()

asia_chart = (
    alt.Chart(asia_grouped).mark_line(color='red')
    .encode(
        x=alt.X('Year:N', axis=alt.Axis(values=tick_values, labelAngle=0, labelFont='Courier New', labelFontSize=14), title='Year'),
        y=alt.Y('Total affected:Q', axis=alt.Axis(grid=False, labelFont='Courier New', labelFontSize=14), title='Total Affected'),
        tooltip=['Year:N', 'Total affected:Q']
    )
    .properties(title='Asia: Total Aid Workers Affected Over Time', width=700, height=400)
    + alt.Chart(asia_grouped).mark_point(color='red', filled=True, size=80)
    .encode(x='Year:N', y='Total affected:Q', tooltip=['Year:N', 'Total affected:Q'])
).interactive().configure_view(stroke=None
).configure_title(
    font='Courier New',
    fontSize=18,
    anchor='start'
).configure_axis(
    titleFont='Courier New',
    titleFontSize=16
).configure_title(
    font='Courier New',
    fontSize=18
)

asia_chart.save("../plots/asia_total_affected.html")
asia_chart



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [275]:
import altair as alt
import os

# Ensure plots directory exists
os.makedirs("../plots", exist_ok=True)

security_incidents_clean['Group'] = security_incidents_clean['Group'].str.strip().str.replace('\u00A0', ' ', regex=True)


# Convert Year to string for Altair axis
security_incidents_clean['Year'] = security_incidents_clean['Year'].astype(str)

# Loop through each region (Group)
for group_name in security_incidents_clean['Group'].unique():
    group_df = security_incidents_clean[security_incidents_clean['Group'] == group_name]
    grouped = group_df.groupby('Year')['Total affected'].sum().reset_index()
    tick_values = grouped['Year'][::3].tolist()

    # Create the chart
    chart = (
        alt.Chart(grouped).mark_line(color='red')
        .encode(
            x=alt.X('Year:N',
                title='Year',
                axis=alt.Axis(
                    values=tick_values,
                    labelAngle=0,
                    labelFont='Courier New',
                    labelFontSize=14
                )
            ),
            y=alt.Y('Total affected:Q',
                title='Total Affected',
                axis=alt.Axis(
                    grid=False,
                    labelFont='Courier New',
                    labelFontSize=14
                )
            ),
            tooltip=['Year:N', 'Total affected:Q']
        )
        .properties(
            title=f'{group_name}: Total Aid Workers Affected Over Time',
            width=700,
            height=400
        )
        + alt.Chart(grouped).mark_point(color='red', filled=True, size=80)
        .encode(
            x='Year:N',
            y='Total affected:Q',
            tooltip=['Year:N', 'Total affected:Q']
        )
    ).interactive().configure_view(
        stroke=None
    ).configure_title(
        font='Courier New',
        fontSize=18
    ).configure_axis(
        titleFont='Courier New',
        titleFontSize=16
    )

    # Clean file name (lowercase, no spaces)
    file_name = group_name.lower().replace(" ", "_") + "_total_affected.html"
    chart.save(f"../plots/{file_name}")
    chart


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [274]:
#

me = security_incidents_clean[security_incidents_clean['Group'] == 'Middle East']
me_grouped = me.groupby('Year')['Total affected'].sum().reset_index()
tick_values = me_grouped['Year'][::3].tolist()

me_chart = (
    alt.Chart(me_grouped).mark_line(color='red')
    .encode(
        x=alt.X('Year:N', axis=alt.Axis(values=tick_values, labelAngle=0), title='Year'),
        y=alt.Y('Total affected:Q', axis=alt.Axis(grid=False), title='Total Affected'),
        tooltip=['Year:N', 'Total affected:Q']
    )
    .properties(title='Middle East: Total Aid Workers Affected Over Time', width=700, height=400)
    + alt.Chart(me_grouped).mark_point(color='red', filled=True, size=80)
    .encode(x='Year:N', y='Total affected:Q', tooltip=['Year:N', 'Total affected:Q'])
).interactive().configure_view(stroke=None)
me_chart.save("../plots/middle_east_total_affected.html")
me_chart

KeyError: 'Group'

In [271]:
# North America
americas = security_incidents_clean[security_incidents_clean['Group'] == 'North America']
americas_grouped = americas.groupby('Year')['Total affected'].sum().reset_index()
tick_values = americas_grouped['Year'][::3].tolist()

americas_chart = (
    alt.Chart(americas_grouped).mark_line(color='red')
    .encode(
        x=alt.X('Year:N', axis=alt.Axis(values=tick_values, labelAngle=0), title='Year'),
        y=alt.Y('Total affected:Q', axis=alt.Axis(grid=False), title='Total Affected'),
        tooltip=['Year:N', 'Total affected:Q']
    )
    .properties(title='North America: Total Aid Workers Affected Over Time', width=700, height=400)
    + alt.Chart(americas_grouped).mark_point(color='red', filled=True, size=80)
    .encode(x='Year:N', y='Total affected:Q', tooltip=['Year:N', 'Total affected:Q'])
).interactive().configure_view(stroke=None)

americas_chart.save("../plots/northamerica_total_affected.html")
americas_chart



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [272]:
# South America
americas = security_incidents_clean[security_incidents_clean['Group'] == 'South America']
americas_grouped = americas.groupby('Year')['Total affected'].sum().reset_index()
tick_values = americas_grouped['Year'][::3].tolist()

americas_chart = (
    alt.Chart(americas_grouped).mark_line(color='red')
    .encode(
        x=alt.X('Year:N', axis=alt.Axis(values=tick_values, labelAngle=0), title='Year'),
        y=alt.Y('Total affected:Q', axis=alt.Axis(grid=False), title='Total Affected'),
        tooltip=['Year:N', 'Total affected:Q']
    )
    .properties(title='South America: Total Aid Workers Affected Over Time', width=700, height=400)
    + alt.Chart(americas_grouped).mark_point(color='red', filled=True, size=80)
    .encode(x='Year:N', y='Total affected:Q', tooltip=['Year:N', 'Total affected:Q'])
).interactive().configure_view(stroke=None)

americas_chart.save("../plots/southamerica_total_affected.html")
americas_chart



the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [273]:
# europe
europe = security_incidents_clean[security_incidents_clean['Group'] == 'Europe']
europe_grouped = europe.groupby('Year')['Total affected'].sum().reset_index()
tick_values = europe_grouped['Year'][::3].tolist()

europe_chart = (
    alt.Chart(europe_grouped).mark_line(color='red')
    .encode(
        x=alt.X('Year:N', axis=alt.Axis(values=tick_values, labelAngle=0), title='Year'),
        y=alt.Y('Total affected:Q', axis=alt.Axis(grid=False), title='Total Affected'),
        tooltip=['Year:N', 'Total affected:Q']
    )
    .properties(title='Europe: Total Aid Workers Affected Over Time', width=700, height=400)
    + alt.Chart(europe_grouped).mark_point(color='red', filled=True, size=80)
    .encode(x='Year:N', y='Total affected:Q', tooltip=['Year:N', 'Total affected:Q'])
).interactive().configure_view(stroke=None)
europe_chart.save("../plots/europe_total_affected.html")
europe_chart


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.


the convert_dtype parameter is deprecated and will be removed in a future version.  Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.



In [36]:
# Oceania
europe = security_incidents_clean[security_incidents_clean['Group'] == 'Oceania']
europe_grouped = europe.groupby('Year')['Total affected'].sum().reset_index()
tick_values = europe_grouped['Year'][::3].tolist()

europe_chart = (
    alt.Chart(europe_grouped).mark_line(color='red')
    .encode(
        x=alt.X('Year:N', axis=alt.Axis(values=tick_values, labelAngle=0), title='Year'),
        y=alt.Y('Total affected:Q', axis=alt.Axis(grid=False), title='Total Affected'),
        tooltip=['Year:N', 'Total affected:Q']
    )
    .properties(title='Oceania: Total Aid Workers Affected Over Time', width=700, height=400)
    + alt.Chart(europe_grouped).mark_point(color='red', filled=True, size=80)
    .encode(x='Year:N', y='Total affected:Q', tooltip=['Year:N', 'Total affected:Q'])
).interactive().configure_view(stroke=None)
europe_chart.save("../plots/oceania_total_affected.html")
europe_chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# World map with total affected

In [40]:
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2
import pandas as pd

# Function to get country code and continent
def get_continent(col):
    try:
        cn_a2_code = country_name_to_country_alpha2(col)
        cn_continent = country_alpha2_to_continent_code(cn_a2_code)
        return pd.Series([cn_a2_code, cn_continent])
    except:
        return pd.Series([None, None])


In [221]:
import pandas as pd
import plotly.express as px

df = pd.read_csv("../data/processed/security_incidents_clean.csv")

# Convert Year to string for animation
df['Year'] = df['Year'].astype(str)

# Summarize total affected per Country-Year
bubble_data = df.groupby(['Country', 'Year'], as_index=False)['Total affected'].sum()

# Rename for simplicity
bubble_data = bubble_data.rename(columns={'Total affected': 'TotalAffected'})

# Create the animated bubble map
fig = px.scatter_geo(
    bubble_data,
    locations='Country',
    locationmode='country names',  
    size='TotalAffected',
    color_discrete_sequence=['red'],
    opacity=0.5,
    animation_frame='Year',
    projection='natural earth',
    size_max=50,
    hover_name='Country',
    hover_data={'Year': True, 'TotalAffected': True}
)

# Define conflict annotations (start and end years, with text)
conflict_annotations = [
    {"start": 1998, "end": 1999, "text": "Second Congo War"},
    {"start": 2001, "end": 2002, "text": "Invasion of Afghanistan"},
    {"start": 2003, "end": 2004, "text": "Invasion of Iraq"},
    {"start": 2013, "end": 2014, "text": "South Sudanese Civil War"},
    {"start": 2022, "end": 2023, "text": "Invasion of Ukraine"}
]

# Add annotations to each existing auto-generated frame
for frame in fig.frames:
    year = int(frame.name)
    annotations = []
    for conflict in conflict_annotations:
        if conflict["start"] <= year <= conflict["end"]:
            annotations.append(dict(
                text=conflict["text"],
                x=0.5,
                y=0.95,
                xref="paper",
                yref="paper",
                showarrow=False,
                font=dict(family="Courier New, monospace", size=16, color="black"),
                bgcolor="rgba(255, 255, 255, 0.7)",
                bordercolor="black",
                borderwidth=1
            ))
    frame.layout.annotations = annotations

# Style layout
fig.update_layout(
    geo=dict(
        showframe=False,
        showcoastlines=True,
        projection_type='natural earth'
    ),
    margin=dict(l=0, r=0, t=100, b=0),  
    updatemenus=[{
        "buttons": [
            {
                "args": [None, {
                    "frame": {"duration": 1000, "redraw": True},
                    "fromcurrent": True,
                    "transition": {"duration": 500, "easing": "linear"},
                    "loop": True
                }],
                "method": "animate"
            },
            {
                "args": [[None], {
                    "frame": {"duration": 0, "redraw": False},
                    "mode": "immediate",
                    "transition": {"duration": 0}
                }],
                "method": "animate"
            }
        ],
        "direction": "left",
        "pad": {"r": 10, "t": 87},
        "showactive": False,
        "type": "buttons",
        "x": 0.1,
        "xanchor": "right",
        "y": 0.3,
        "yanchor": "top"
    }],
    sliders=[{
        "active": 0,
        "x": 0.3,
        "len": 0.4,
        "xanchor": "left",
        "yanchor": "top",
        "y": 0.22,
        "pad": {"b": 1},
        "transition": {"duration": 500, "easing": "linear"},
        "currentvalue": {"visible": False},
        "steps": [
            {
                "args": [[str(year)], {
                    "frame": {"duration": 0, "redraw": True},
                    "mode": "immediate",
                    "transition": {"duration": 2}
                }],
                "label": str(year),
                "method": "animate"
            }
            for year in sorted(df['Year'].unique())
        ]
    }]
)

# Save and show
fig.write_html("../plots/worldmap_totalaffected.html")
fig.show()

In [215]:
from plotly.graph_objs import Frame

# Define conflicts with start & end years (inclusive)
conflict_annotations = [
    {"start": 1998, "end": 1999, "text": "Second Congo War"},
    {"start": 2001, "end": 2002, "text": "Invasion of Afghanistan"},
    {"start": 2003, "end": 2011, "text": "Invasion of Iraq"},
    {"start": 2013, "end": 2014, "text": "Outbreak of South Sudanese Civil War"},
    {"start": 2022, "end": 2023, "text": "Invasion of Ukrain"}
]

# Generate frames with annotations for each year
frames = []
for year in sorted(df['Year'].unique()):
    annotations = []
    for conflict in conflict_annotations:
        if conflict["start"] <= int(year) <= conflict["end"]:
            annotations.append(dict(
                text=conflict["text"],
                x=0.5,
                y=1.08,
                xref="paper",
                yref="paper",
                showarrow=False,
                font=dict(size=16, color="black"),
                bgcolor="rgba(255, 255, 255, 0.7)",
                bordercolor="black",
                borderwidth=1
            ))

    frames.append(Frame(name=str(year), layout={"annotations": annotations}))

# Assign frames to the figure
fig.frames = frames

fig.write_html("../plots/worldmap_totalaffected.html")
fig.show()

# Agencies being attacked

In [72]:
# Step 1: Aggregate counts per year per agency
agg_df = long_df.groupby(['Year', 'AgencyType'], as_index=False)['Count'].sum()

# Step 2: Calculate cumulative totals
agg_df.sort_values(by=['AgencyType', 'Year'], inplace=True)
agg_df['Cumulative'] = agg_df.groupby('AgencyType')['Count'].cumsum()

# Step 3: Rank by cumulative within each year
agg_df['Rank'] = agg_df.groupby('Year')['Cumulative'].rank(method='min', ascending=False)

# Optional: convert Year to string for plotting x-axis
agg_df['Year'] = agg_df['Year'].astype(str)

# Preview clean data
agg_df.head()


Unnamed: 0,Year,AgencyType,Count,Cumulative,Rank
0,1997,ICRC,7,7,4.0
6,1998,ICRC,20,27,3.0
12,1999,ICRC,5,32,3.0
18,2000,ICRC,5,37,3.0
24,2001,ICRC,7,44,3.0


In [278]:
fig = go.Figure()


for agency in agg_df['AgencyType'].unique():
    df_agency = agg_df[agg_df['AgencyType'] == agency]
    
    fig.add_trace(go.Scatter(
        x=df_agency['Year'],
        y=df_agency['Rank'],
        mode='lines+markers',
        name=agency,
        line=dict(width=2, shape='spline'),
        marker=dict(size=6),
        customdata=df_agency[['Cumulative']],
        hovertemplate=
            "<b>%{text}</b><br>" +
            "Year: %{x}<br>" +
            "Rank: %{y}<br>" +
            "Cumulative Affected: %{customdata[0]}<extra></extra>",
        text=[agency] * len(df_agency)
    ))

year_ticks = sorted(agg_df['Year'].unique())[::3]



fig.update_yaxes(autorange='reversed', title='Rank (1 = Most Affected)', dtick=1)

fig.update_xaxes(
    title='Year',
    tickmode='array',
    tickvals=year_ticks
)

fig.update_layout(
    title_x=0.5,
    plot_bgcolor='white',
    paper_bgcolor='white',
    margin=dict(l=40, r=40, t=60, b=40),
    showlegend=True,
    
    font=dict(
        family='Courier New, monospace',
        size=14,
        color='black'
    )
)


fig.write_html("../plots/bumpchart_totalaffected_agency.html")
fig.show()



# Attack Context vs Agency Type

In [31]:
# Define agency columns
agency_cols = ['UN', 'INGO', 'ICRC', 'NRCS and IFRC', 'NNGO', 'Other']

# Melt into long format to count affected agencies per actor type
long_df = df.melt(
    id_vars=['Attack context'],
    value_vars=agency_cols,
    var_name='AgencyType',
    value_name='Affected'
)

# Filter out zero or missing values
long_df = long_df[long_df['Affected'] > 0]

heatmap_df = (
    long_df.groupby(['Attack context', 'AgencyType'])
    .size()
    .reset_index(name='Incidents')
)


fig = px.density_heatmap(
    heatmap_df,
    x='AgencyType',
    y='Attack context',
    z='Incidents',
    color_continuous_scale=px.colors.sequential.Reds[:8],  # skips lightest reds
    text_auto=True,
)

# Update layout with monospace font
fig.update_layout(
    xaxis_title='Agency Type',
    yaxis_title='Attack context',
    plot_bgcolor='white',
    paper_bgcolor='white',
    title_x=0.5,
    margin=dict(l=40, r=40, t=60, b=40),
    font=dict(
        family='Courier New, monospace',
        size=14,
        color='black'
    )
)

# Set monospace for text labels inside the heatmap cells
fig.update_traces(
    textfont=dict(
        family='Courier New, monospace',
        size=12,
        color='black'
    )
)


fig.write_html("../plots/heatmap_attackcontext_agency.html")
fig.show()


In [23]:
print(heatmap_df['Incidents'].describe())

count     40.000000
mean     110.100000
std      144.969104
min        1.000000
25%       10.750000
50%       35.500000
75%      173.500000
max      647.000000
Name: Incidents, dtype: float64


# Actor Group and Total affected
Total Aid Workers Affected by Actor Group

In [5]:
df = pd.read_csv("../data/processed/security_incidents_clean.csv")

In [6]:
grouped_impact = (
    df.groupby('ActorGroup', as_index=False)['Total affected']
    .sum()
    .dropna()
    .sort_values(by='Total affected', ascending=False)
)


In [7]:
import plotly.express as px

fig = px.pie(
    grouped_impact,
    values='Total affected',
    names='ActorGroup',
    color_discrete_sequence=px.colors.sequential.Reds[::-1]
)

# Customize pie slices and labels
fig.update_traces(
    textinfo='percent+label',
    pull=[0.05]*len(grouped_impact)
)

# Layout: center title, remove legend, monospace font
fig.update_layout(
    title_x=0.5,
    paper_bgcolor='white',
    showlegend=False,
    font=dict(
        family='Courier New, monospace',
        size=14,
        color='black'
    )
)

fig.write_html("../plots/piechart_actorgroups.html")
fig.show()


# Attack context by actor group

In [9]:
context_df = df.dropna(subset=['Attack context', 'ActorGroup'])

# Group by ActorGroup and Attack context
grouped_context = (
    context_df.groupby(['ActorGroup', 'Attack context'])
    .size()
    .reset_index(name='Count')
)


In [14]:
import os
import plotly.express as px

# Ensure output folder exists
os.makedirs("../plots/attack_contexts", exist_ok=True)

unique_groups = grouped_context['ActorGroup'].unique()

for group in unique_groups:
    df_group = grouped_context[grouped_context['ActorGroup'] == group]

    fig = px.bar(
        df_group,
        x='Attack context',
        y='Count',
        color='Attack context',
        title=f'Attack Contexts for {group}',
        labels={'Count': 'Number of Incidents'},
        color_discrete_sequence=px.colors.sequential.Reds_r,
        text='Count'  # ← annotate bar tops
    )

    fig.update_traces(
    textposition='top',  # ← Try this
    textfont=dict(family='Courier New, monospace', size=12)
    )

    fig.update_layout(
        title_x=0.5,
        showlegend=False,
        plot_bgcolor='white',
        paper_bgcolor='white',
        font=dict(
            family='Courier New, monospace',
            size=14,
            color='black'
        ),
        margin=dict(t=60, l=40, r=40, b=40),
        yaxis=dict(
            showticklabels=False,
            showgrid=False,
            title=None,
            zeroline=False
        ),
        xaxis=dict(  
        tickangle=45
    ),
    uniformtext_minsize=10,
    uniformtext_mode='show'
    )

    # Clean file name
    safe_group_name = group.replace(" ", "_").replace(":", "").replace("/", "")
    fig.write_html(f"../plots/attack_contexts/{safe_group_name}.html")
    fig.show()


ValueError: 
    Invalid value of type 'builtins.str' received for the 'textposition' property of bar
        Received value: 'top'

    The 'textposition' property is an enumeration that may be specified as:
      - One of the following enumeration values:
            ['inside', 'outside', 'auto', 'none']
      - A tuple, list, or one-dimensional numpy array of the above

In [11]:
import plotly.express as px

fig = px.bar(
    grouped_context,
    x='Attack context',
    y='Count',
    color='ActorGroup',
    title='Attack Contexts by Actor Group',
    labels={'Count': 'Number of Incidents'},
    color_discrete_sequence=px.colors.sequential.Reds_r,
    text_auto=True  # Optional: auto display values in the segments
)

fig.update_layout(
    barmode='stack',  # ← Stacked bars
    title_x=0.5,
    plot_bgcolor='white',
    paper_bgcolor='white',
    font=dict(
        family='Courier New, monospace',
        size=14,
        color='black'
    ),
    margin=dict(t=60, l=40, r=40, b=40)
)

# Optional: remove y-axis grid/ticks
fig.update_yaxes(
    showgrid=False,
    ticks='outside',
    tickfont=dict(family='Courier New, monospace', size=12)
)

# Optional: adjust x-axis label font
fig.update_xaxes(
    tickfont=dict(family='Courier New, monospace', size=12)
)

# Save and show

fig.show()


# Most affected by country

In [33]:
top_countries = (
    df.groupby('Country', as_index=False)['Total affected']
    .sum()
    .sort_values(by='Total affected', ascending=False)
    .head(7)
)
top_countries

Unnamed: 0,Country,Total affected
0,Afghanistan,1353
76,South Sudan,890
78,Sudan,667
80,Syrian Arab Republic,608
74,Somalia,585
64,Occupied Palestinian Territories,578
21,DR Congo,448


In [36]:
import plotly.express as px

fig = px.bar(
    top_countries,
    x='Total affected',
    y='Country',
    orientation='h',
    text='Total affected',
    color='Country',
    color_discrete_sequence=px.colors.sequential.Reds_r
)

fig.update_layout(
    title_x=0.5,
    yaxis=dict(categoryorder='total ascending'),
    plot_bgcolor='white',
    paper_bgcolor='white',
    showlegend=False,
    font=dict(
        family='Courier New, monospace',
        size=14,
        color='black'
    ),
    xaxis=dict(
        showticklabels=False,  
        showgrid=False        
    )
)


fig.show()


In [145]:
top7 = (
    df.groupby('Country', as_index=False)['Total affected']
    .sum()
    .sort_values(by='Total affected', ascending=False)
    .head(7)['Country']
    .tolist()
)


In [146]:
# Filter data to top 7 countries
subset = df[df['Country'].isin(top7)]

# Drop missing actor or context
subset = subset.dropna(subset=['ActorGroup', 'Attack context'])

# Group data
grouped = (
    subset.groupby(['Country', 'ActorGroup', 'Attack context'])
    .size()
    .reset_index(name='Count')
)


In [148]:
for country in top7:
    data = grouped[grouped['Country'] == country]

    fig = px.bar(
        data,
        x='ActorGroup',
        y='Count',
        color='Attack context',
        barmode='group',
        title=f'Actor Group vs Attack Context in {country}',
        labels={'ActorGroup': 'Actor Group', 'Count': 'Number of Incidents'},
        color_discrete_sequence=px.colors.sequential.Reds_r
    )

    fig.update_layout(
        title_x=0.5,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(t=60, l=40, r=40, b=40),
        xaxis_tickangle=-30
    )

    fig.show()


In [153]:
# Pick one country, e.g., Afghanistan
country = "Afghanistan"

# Filter and drop nulls
sankey_df = df[
    (df['Country'] == country) &
    (df['ActorGroup'].notna()) &
    (df['Attack context'].notna())
]

# Group by ActorGroup and Attack context
grouped = (
    sankey_df.groupby(['ActorGroup', 'Attack context'])
    .size()
    .reset_index(name='Count')
)

# Combine all unique labels (actor + context)
all_labels = pd.unique(grouped['ActorGroup'].tolist() + grouped['Attack context'].tolist())
label_to_index = {label: i for i, label in enumerate(all_labels)}

# Create Sankey link structure
source = grouped['ActorGroup'].map(label_to_index)
target = grouped['Attack context'].map(label_to_index)
value  = grouped['Count']


fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=all_labels,
        color="red"
    ),
    link=dict(
        source=source,
        target=target,
        value=value
    )
)])

fig.update_layout(
    title_text=f"Actor Groups vs Attack Contexts in {country}",
    title_x=0.5,
    font=dict(size=12),
    height=600
)

fig.show()


unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.



In [171]:
# Normalize count values to [0, 1] for color scaling
normalized = (value - value.min()) / (value.max() - value.min())

# Get the Reds_r color scale
colors = px.colors.sequential.Reds[2:]

# Interpolate colors from the scale based on normalized values
def interpolate_color(val):
    # Use index to pick from color scale
    idx = int(val * (len(colors) - 1))
    return colors[idx]

link_colors = [interpolate_color(v) for v in normalized]



country = "Afghanistan"

# Filter and drop nulls
sankey_df = df[
    (df['Country'] == country) &
    (df['ActorGroup'].notna()) &
    (df['Attack context'].notna())
]

# Group by ActorGroup and Attack context
grouped = (
    sankey_df.groupby(['ActorGroup', 'Attack context'])
    .size()
    .reset_index(name='Count')
)

# Combine all unique labels (actor + context)
all_labels = pd.unique(grouped['ActorGroup'].tolist() + grouped['Attack context'].tolist())
label_to_index = {label: i for i, label in enumerate(all_labels)}

# Create Sankey link structure
source = grouped['ActorGroup'].map(label_to_index)
target = grouped['Attack context'].map(label_to_index)
value  = grouped['Count']


fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=all_labels,
        color="lightgray"  # node color
    ),
    link=dict(
        source=source,
        target=target,
        value=value,
        color=link_colors  # 🎨 color based on value
    )
)])

fig.update_layout(
    title_text=f"Actor Groups vs Attack Contexts in {country}",
    title_x=0.5,
    font=dict(size=12),
    height=600
)

fig.show()



unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.



In [173]:
# Make sure the folder exists
os.makedirs("../plots/grp_context_top7", exist_ok=True)

# Define the top 7 countries
top_countries = [
    "Afghanistan",
    "South Sudan",
    "Sudan",
    "Syrian Arab Republic",
    "Somalia",
    "Occupied Palestinian Territories",
    "DR Congo"
]

for country in top_countries:
    # Filter and drop nulls
    sankey_df = df[
        (df['Country'] == country) &
        (df['ActorGroup'].notna()) &
        (df['Attack context'].notna())
    ].copy()

    # Optional: Distinguish unknown source vs target if needed
    sankey_df['ActorGroup_Sankey'] = sankey_df['ActorGroup'].replace({'Unknown': 'Unknown (Actor)'})
    sankey_df['AttackContext_Sankey'] = sankey_df['Attack context'].replace({'Unknown': 'Unknown (Context)'})

    # Group by ActorGroup and Attack context
    grouped = (
        sankey_df.groupby(['ActorGroup_Sankey', 'AttackContext_Sankey'])
        .size()
        .reset_index(name='Count')
    )

    # Skip if there's no data
    if grouped.empty:
        continue

    # Combine all unique labels (actor + context)
    all_labels = pd.unique(grouped['ActorGroup_Sankey'].tolist() + grouped['AttackContext_Sankey'].tolist())
    label_to_index = {label: i for i, label in enumerate(all_labels)}

    # Create link structure
    source = grouped['ActorGroup_Sankey'].map(label_to_index)
    target = grouped['AttackContext_Sankey'].map(label_to_index)
    value = grouped['Count']

    # Normalize and generate dark-red colors
    normalized = (value - value.min()) / (value.max() - value.min())
    reds = px.colors.sequential.Reds[2:]  # Skip lightest 2
    def interpolate_color(val):
        idx = int(val * (len(reds) - 1))
        return reds[idx]
    link_colors = [interpolate_color(v) for v in normalized]

    # Plot
    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=all_labels,
            color="lightgray"
        ),
        link=dict(
            source=source,
            target=target,
            value=value,
            color=link_colors
        )
    )])

    fig.update_layout(
        title_text=f"Actor Groups vs Attack Contexts in {country}",
        title_x=0.5,
        font=dict(size=12),
        height=600
    )

    # Save HTML
    filename = f"../plots/grp_context_top7/{country.replace(' ', '_')}.html"
    fig.write_html(filename)
    fig.show()


unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.




unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.




unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.




unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.




unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.




unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.




unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.



In [193]:
os.makedirs("../plots/circlular_bar_charts", exist_ok=True)

# Top 7 countries
top_countries = [
    "Afghanistan",
    "South Sudan",
    "Sudan",
    "Syrian Arab Republic",
    "Somalia",
    "Occupied Palestinian Territories",
    "DR Congo"
]

# Agency columns
agency_cols = ['UN', 'INGO', 'ICRC', 'NRCS and IFRC', 'NNGO', 'Other']

# Loop through each country
for country in top_countries:
    # Filter & reshape
    country_df = df[df['Country'] == country].copy()
    long_df = country_df.melt(
        id_vars=['Total affected'],
        value_vars=agency_cols,
        var_name='AgencyType',
        value_name='Presence'
    )
    long_df = long_df[long_df['Presence'] > 0]

    # Aggregate
    agg = long_df.groupby('AgencyType', as_index=False)['Total affected'].sum()
    if agg.empty:
        continue
    agg['Percentage'] = (agg['Total affected'] / agg['Total affected'].sum()) * 100
    agg = agg.sort_values('Percentage', ascending=True)

    # Setup
    N = len(agg)
    radii = np.arange(1, N + 1)
    theta = 2 * np.pi * agg['Percentage'] / 100
    colors = cm.Reds(np.linspace(0.3, 1, N))  # Red gradient

    # Plot
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw={'projection': 'polar'})
    ax.set_facecolor("white")
    fig.patch.set_facecolor("white")
    ax.set_theta_zero_location('N')  # 12 o'clock
    ax.set_theta_direction(1)        # Clockwise
    ax.set_xticks([])
    ax.set_yticks([])
    ax.grid(False)
    ax.spines['polar'].set_visible(False)

    # Draw radial bars
    for i in range(N):
        ax.barh(
            y=radii[i],
            width=theta.iloc[i],
            left=0,
            height=0.9,
            color=colors[i],
            edgecolor='white'
        )

    # Annotate labels on the right
    for i in range(N):
        label = f"{agg['AgencyType'].iloc[i]} {agg['Percentage'].iloc[i]:.1f}%"
        ax.text(
            x=np.radians(-1),  # Slightly to the right of 12 o'clock
            y=radii[i],
            s=label,
            ha='left',
            va='center',
            fontsize=10
        )

    # Save as transparent PNG
    filename = f"../plots/circlular_bar_charts/{country.replace(' ', '_')}.png"
    plt.tight_layout()
    plt.savefig(filename, dpi=300, transparent=True)
    plt.close()
