In [1]:
import pandas as pd
import matplotlib.pyplot as plt

## Lethal attacks against land and environmental defenders between 2012 and 2021
__[DRAFT] data exploration + visualizations__

data source: Global Witness (www.globalwitness.org)

In [2]:
# Loading data from Global Witness (https://www.globalwitness.org/en/campaigns/environmental-activists/numbers-lethal-attacks-against-defenders-2012/)
gw_data = pd.read_csv("global_witness_led_22-09-22.csv")

In [3]:
gw_data.head(10).style\
    .hide(axis="columns", subset=["id", "country_numeric"])\
    .hide(axis="index")\
    .set_properties(**{'background-color': 'RGBA(232, 232, 250, .5)',
                                 'border': '1.5px solid white',
                                  'color': '#495057'})\
    .set_table_styles(
        [{'selector': 'tr:hover',
          'props': [('background-color', '#fffacd')]},
        {'selector': 'th',
          'props': [('background-color', 'RGBA(180, 142, 173, .5)')]}]
     )

date,name,gender,age,person_characteristics,industry_driver,perpetrator_type,continent,country,local_region,number_of_victims
2016-02-18,Aníbal Coronado Madera,Male,49.0,Other,Sector could not be confirmed*,Private military actors,Americas,Colombia,Córdoba,1
2016-10-16,Yimer Chávez Rivera,Male,31.0,Small-scale farmer,Sector could not be confirmed*,Private military actors,Americas,Colombia,Cauca,1
2015-02-10,Héctor William Cabrera Suárez,Male,58.0,Other,Sector could not be confirmed*,Unspecified,Americas,Colombia,Caquetá,1
2016-04-27,Rolan Lonin Casiano,Male,32.0,Small-scale farmer,Sector could not be confirmed*,Armed forces,Asia,Philippines,Bicol,1
2015-05-22,Benilda Santos,Female,43.0,,Sector could not be confirmed*,Unspecified,Asia,Philippines,Quezon City,1
2016-08-29,Diego Alfredo Chirán Nastacuas,Male,24.0,Indigenous peoples,Sector could not be confirmed*,Unspecified,Americas,Colombia,Nariño,1
2014-04-21,Luis Javier Campo Méndez,Male,22.0,Indigenous peoples,Sector could not be confirmed*,Unspecified,Americas,Colombia,,1
2016-12-30,Yaneth Alejandra Calvache Viveros,Female,,Small-scale farmer,Sector could not be confirmed*,Hitmen,Americas,Colombia,Cauca,1
2017-10-24,Aulio Isarama Forastero,Male,,Indigenous peoples,Sector could not be confirmed*,Private military actors,Americas,Colombia,Chocó,1
2016-05-20,Manuel Chimá Pérez,Male,21.0,Indigenous peoples,Sector could not be confirmed*,Private military actors,Americas,Colombia,Antioquia,1


In [3]:
import plotly.express as px
import numpy as np

In [5]:
gw_data["gender"].value_counts()

gender
Male       1545
Female      183
Unknown       1
Name: count, dtype: int64

In [6]:
gw_data[gw_data.gender == "Unknown"]

Unnamed: 0,id,date,name,gender,age,person_characteristics,industry_driver,perpetrator_type,continent,country,country_numeric,local_region,number_of_victims
566,recNXHR5uIQNXxxZo,2013-04-14,Criança Indígena Yanomami,Unknown,,Indigenous peoples,Sector could not be confirmed*,Unspecified,Americas,Brazil,76,,1


In [9]:
gender_data = {
    #"total": ["All", "All", "All"],
    "gender": [ "Male", "Female", "unknown"],
    "count": [1545, 183, 1]
}
gender_df = pd.DataFrame(gender_data)

In [58]:
treemap_gender = px.treemap(gender_df,
                            path=['gender'], values='count',
                            color='gender', hover_name='count',
                            color_discrete_sequence=["#81a1c1", "#a3be8c", "#b48ead", "#b48ead"],
                            width=600, height=400, title="Victims distributed by gender"
                           )
treemap_gender.update_traces(textinfo="label+percent root")

treemap_gender.show()

In [69]:
scatter_df = gw_data[["age", "gender", "person_characteristics", "number_of_victims"]]
#scatter_df.head()

In [70]:
# Age data error: 4145
#scatter_df[scatter_df.age > 90]

In [67]:
# I assume it's a typo and should be 41
fixed_df = scatter_df.replace(to_replace=4145.0, value=41)

In [71]:
#fixed_df[fixed_df.age > 90]

In [110]:
hist_age_gender = px.histogram(fixed_df, x="age", color_discrete_sequence=["#81a1c1", "#a3be8c", "#b48ead"], color="gender",
                               width=800, height=500, title="Victims distributed by age and gender")
hist_age_gender.show()

In [61]:
gw_data["person_characteristics"].value_counts()

person_characteristics
Indigenous peoples                              613
Small-scale farmer                              433
Other                                           286
State official                                   93
Afrodescendant                                   63
Park ranger                                      56
Journalist                                       26
Demonstrators                                    25
Children                                         16
Lawyer                                           15
Relatives of persons targeted for repression     11
Police                                            4
Fishery observer                                  3
Name: count, dtype: int64

In [124]:
bar_characteristic = px.bar(fixed_df, x='person_characteristics', y='number_of_victims', width=600, height=600,
                            color="gender", color_discrete_sequence=["#6A3E75", "#40c057", "#f08c00"], 
                            hover_data="age", hover_name="person_characteristics", 
                            title="Victims distributed by their personal characteristic and gender")
bar_characteristic.show()

In [29]:
bar_industry = px.bar(gw_data, y='industry_driver', x='number_of_victims', width=800, height=600,
                            color="continent", color_discrete_sequence=["#8a2be2", "#087f5b", "#f08c00", "#364fc7", "#c2255c"], 
                            hover_data="age", hover_name="industry_driver", orientation='h',
                            title="Attacks by industry associated")
bar_industry.show()

In [26]:
bar_perpetrator = px.bar(gw_data, y='perpetrator_type', x='number_of_victims', width=800, height=600,
                            color="continent", color_discrete_sequence=["#8a2be2", "#087f5b", "#f08c00", "#364fc7", "#c2255c"], 
                            hover_data="age", hover_name="perpetrator_type", orientation='h',
                            title="Attacks by type of perpetrator")
bar_perpetrator.show()

In [22]:
countries_and_continents = gw_data[["country", "continent", "number_of_victims"]]

In [55]:
treemap_countries = px.treemap(countries_and_continents,
                            path=[px.Constant("world"), 'continent', 'country'], values='number_of_victims',
                            color='country', hover_name='number_of_victims',
                            color_discrete_sequence=["#81a1c1", "#a3be8c", "#b48ead", "#ebcb8b", "#d08770", "#88c0d0", "#bf616a"],
                            width=1000, height=800, title="Victims by country"
                           )
treemap_countries.update_traces(textinfo="label+value")
treemap_countries.update_layout(margin = dict(t=50, l=25, r=25, b=25))

treemap_countries.show()

In [120]:
victims_by_country = gw_data[["country", "number_of_victims"]].groupby(by="country", as_index=False).sum()

In [123]:
#victims_by_country.head()

In [132]:
map_victims = px.choropleth(
                            victims_by_country, locations="country", 
                            locationmode="country names",
                            color='number_of_victims', 
                            color_continuous_scale="sunsetdark",
                            projection="natural earth"
                           )
map_victims.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
map_victims.show()

In [105]:
map_anim = px.choropleth(gw_data, locations="country", 
                          locationmode="country names",
                          color="country",
                          color_discrete_sequence=["#81a1c1", "#a3be8c", "#b48ead", "#ebcb8b", "#d08770", "#88c0d0", "#bf616a"],
                          hover_data=["country", "local_region", "number_of_victims", "person_characteristics", "age", "industry_driver", "perpetrator_type"],
                          width=900, height=700,
                          animation_frame="date",
                          projection="natural earth")
map_anim.show()

In [135]:
time_line = px.bar(gw_data, x="date", y="number_of_victims", title='Number of victimes over time', color_discrete_sequence=["#2e3440"])
time_line.show()

In [8]:
gw_df = gw_data.copy()
gw_df['date'] = pd.to_datetime(gw_df['date'], errors='coerce')

In [20]:
gw_df["date"].dt.year

0       2016
1       2016
2       2015
3       2016
4       2015
        ... 
1728    2021
1729    2021
1730    2021
1731    2013
1732    2018
Name: date, Length: 1733, dtype: int32

In [25]:
yearly_bar = px.bar(gw_df, x=gw_df["date"].dt.year, y='number_of_victims', width=1000, height=800,
                    title="Number of victims per year", labels={"x": "year"},
                    color="continent", color_discrete_sequence=["#8a2be2", "#087f5b", "#f08c00", "#364fc7", "#c2255c"])
yearly_bar.show()