# **IMPORTS**

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# **Database Path**



In [1]:
path = 'Dados/Brazil_Lethal_violence.csv'

# **Understanding the context of the data**

The dataset we are analyzing covers individual events of organized violence (lethal violence phenomena occurring in a specific time and place). These events are detailed enough to be geocoded to the level of individual villages with disaggregated time durations for individual days.

Content
* Brazil - Lethal violence dataset consists of 1501 Rows & 29 Columns :
* id – (A unique number to identity the protest)
* year - (Year of the protest)
* active_year – (A conflict is deemed to be active if there are at least 25 battle-related deaths per calendar year in one of the conflict’s dyads.)
* conflict_name – (Name of the conflict)
* dyad_name – (A dyad is made up of two actors)
* side_a - (First side of the actor)
* side_b - (Second side of the actor)
* number_of_sources – (Count of information sources )
* source_article – (Source of the article)
* source_office – (Source of the office)
* source_date - (Date of the source)
* source_headline – (Source news)
* source_original – (Source origin)
* where_coordinates – (Location)
* where_description – (Location description)
* adm_1 - (State)
* adm_2 - (Municipality)
* latitude - (Latitude)
* longitude - (Longitude)
* geom_wkt - (Coordinates)
* date_start - (Start date)
* date_end - (End date)
* deaths_a - (No.deaths_a)
* deaths_b - (No.deaths_b)
* deaths_civilians - (No.deaths_civilians)
* deaths_unknown - (No.deaths_unknown)
* best - (If the number is low)
* high - (Highest No.deaths)
* low - (Lowest No.deaths)**

# **Data visualization and DataFrame creation**

In [3]:
df = pd.read_csv(path)

In [18]:
df

Unnamed: 0,id,year,active_year,conflict_name,dyad_name,side_a,side_b,number_of_sources,source_article,source_office,...,geom_wkt,date_start,date_end,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best,high,low
0,98024.0,1993,1.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,1.0,"""Reuters,1996-05-04,-""",Reuters,...,POINT (-43.2075 -22.90278),1993-07-23 00:00:00.000,1993-07-23 00:00:00.000,0.0,0.0,8.0,0.0,8,8.0,8.0
1,98028.0,1993,1.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,1.0,"""Reuters,1997-07-02,-""",Reuters,...,POINT (-43.2075 -22.90278),1993-08-30 00:00:00.000,1993-08-30 00:00:00.000,0.0,0.0,21.0,0.0,21,21.0,21.0
2,98029.0,1994,0.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,2.0,"""Reuters News,1994-10-18,Rio police kill 13 in...",Reuters News;Reuters News,...,POINT (-43.2075 -22.90278),1994-10-18 00:00:00.000,1994-10-18 00:00:00.000,0.0,0.0,0.0,0.0,0,13.0,0.0
3,84044.0,1994,0.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,1.0,"""Reuters,1994-12-05,-""",Reuters,...,POINT (-43.2075 -22.90278),1994-12-04 00:00:00.000,1994-12-04 00:00:00.000,0.0,0.0,1.0,0.0,1,1.0,0.0
4,98031.0,1995,0.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,1.0,"""Reuters,1995-03-05,-""",Reuters,...,POINT (-43.2075 -22.90278),1995-03-05 00:00:00.000,1995-03-05 00:00:00.000,0.0,0.0,1.0,0.0,1,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1496,432731.0,2021,1.0,Amigos Para Sempre - Família Terror Amapá,Amigos Para Sempre - Família Terror Amapá,Amigos Para Sempre,Família Terror Amapá,1.0,"""Seles Nafes,2021-11-22,Half-open prisoner is ...",Seles Nafes,...,POINT (-51.06639 0.03889),2021-11-22 00:00:00.000,2021-11-22 00:00:00.000,0.0,0.0,0.0,1.0,1,1.0,1.0
1497,431610.0,2021,1.0,Amigos Para Sempre - Família Terror Amapá,Amigos Para Sempre - Família Terror Amapá,Amigos Para Sempre,Família Terror Amapá,1.0,"""Seles Nafes,2021-12-08,Tied to a chair, young...",Seles Nafes,...,POINT (-51.18167 -0.05833),2021-12-07 00:00:00.000,2021-12-07 00:00:00.000,0.0,1.0,0.0,0.0,1,1.0,1.0
1498,432729.0,2021,1.0,Amigos Para Sempre - Família Terror Amapá,Amigos Para Sempre - Família Terror Amapá,Amigos Para Sempre,Família Terror Amapá,1.0,"""Seles Nafes,2021-12-16,7 shots: inmate of the...",Seles Nafes,...,POINT (-51.06639 0.03889),2021-12-15 00:00:00.000,2021-12-15 00:00:00.000,0.0,0.0,0.0,1.0,1,1.0,1.0
1499,432728.0,2021,1.0,Amigos Para Sempre - Família Terror Amapá,Amigos Para Sempre - Família Terror Amapá,Amigos Para Sempre,Família Terror Amapá,1.0,"""Seles Nafes,2021-12-21,Shooting between rival...",Seles Nafes,...,POINT (-51.06639 0.03889),2021-12-17 00:00:00.000,2021-12-17 00:00:00.000,0.0,0.0,0.0,1.0,1,1.0,1.0


# **Statistics**

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1501 entries, 0 to 1500
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 1501 non-null   float64
 1   year               1501 non-null   int64  
 2   active_year        1501 non-null   float64
 3   conflict_name      1501 non-null   object 
 4   dyad_name          1501 non-null   object 
 5   side_a             1501 non-null   object 
 6   side_b             1501 non-null   object 
 7   number_of_sources  1501 non-null   float64
 8   source_article     1501 non-null   object 
 9   source_office      1501 non-null   object 
 10  source_date        1501 non-null   object 
 11  source_headline    1501 non-null   object 
 12  source_original    1494 non-null   object 
 13  where_coordinates  1501 non-null   object 
 14  where_description  1490 non-null   object 
 15  adm_1              1501 non-null   object 
 16  adm_2              1462 

In [21]:
df.isnull().sum()

id                    0
year                  0
active_year           0
conflict_name         0
dyad_name             0
side_a                0
side_b                0
number_of_sources     0
source_article        0
source_office         0
source_date           0
source_headline       0
source_original       7
where_coordinates     0
where_description    11
adm_1                 0
adm_2                39
latitude              0
longitude             0
geom_wkt              0
date_start            0
date_end              0
deaths_a              0
deaths_b              0
deaths_civilians      0
deaths_unknown        0
best                  0
high                  0
low                   0
dtype: int64

In [7]:
df['conflict_name'].value_counts()

conflict_name
Comando Vermelho - GDE                       830
Comando Vermelho - Terceiro Comando           83
Amigos Para Sempre - Família Terror Amapá     75
Comando Vermelho - PCC                        71
GDE, PCC - Sindicato RN                       71
Cartel do Norte - Comando Vermelho            64
PCC - Sindicato RN                            50
Bonde dos 13 - Comando Vermelho               41
Amigos dos Amigos - Comando Vermelho          36
Bonde do Maluco - Comando Vermelho            30
Comando Vermelho - Terceiro Comando Puro      28
Anti-Bala - Bala na Cara                      26
Associação Família Capixaba - PCC             24
Government of Brazil - Civilians              23
Bonde do Maluco - Katiara                     22
Barriga gang - Sujeirinha gang                 9
FDN - PCC                                      7
Comando Vermelho - FDN                         7
Okaida - Estados Unidos                        3
Comando Classe A - Comando Vermelho            1
Name: 

In [19]:
df['dyad_name'].value_counts()

dyad_name
Comando Vermelho - GDE                       830
Comando Vermelho - Terceiro Comando           83
Amigos Para Sempre - Família Terror Amapá     75
Comando Vermelho - PCC                        71
GDE, PCC - Sindicato RN                       71
Cartel do Norte - Comando Vermelho            64
PCC - Sindicato RN                            50
Bonde dos 13 - Comando Vermelho               41
Amigos dos Amigos - Comando Vermelho          36
Bonde do Maluco - Comando Vermelho            30
Comando Vermelho - Terceiro Comando Puro      28
Anti-Bala - Bala na Cara                      26
Associação Família Capixaba - PCC             24
Government of Brazil - Civilians              23
Bonde do Maluco - Katiara                     22
Barriga gang - Sujeirinha gang                 9
FDN - PCC                                      7
Comando Vermelho - FDN                         7
Okaida - Estados Unidos                        3
Comando Classe A - Comando Vermelho            1
Name: coun

## 

# Recorte dos dados

O dataframe possui muitos conflitos que envolvem diferentes díades e analisar todos os dados poderia acarretar muito tempo para uma análise, então decide por analisar um tipo de conflito, os que envolvem o governo do Brasil e a população cívil. Para isso filtrei o dataframe apenas com os conflitos que da valor 'Government of Brazil - Civilians' na coluna 'conflitct_name'

In [28]:
government_civilian_conflicts = df[df['conflict_name']=='Government of Brazil - Civilians']

In [29]:
civilian_conflicts.head()

Unnamed: 0,id,year,active_year,conflict_name,dyad_name,side_a,side_b,number_of_sources,source_article,source_office,...,geom_wkt,date_start,date_end,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best,high,low
0,98024.0,1993,1.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,1.0,"""Reuters,1996-05-04,-""",Reuters,...,POINT (-43.2075 -22.90278),1993-07-23 00:00:00.000,1993-07-23 00:00:00.000,0.0,0.0,8.0,0.0,8,8.0,8.0
1,98028.0,1993,1.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,1.0,"""Reuters,1997-07-02,-""",Reuters,...,POINT (-43.2075 -22.90278),1993-08-30 00:00:00.000,1993-08-30 00:00:00.000,0.0,0.0,21.0,0.0,21,21.0,21.0
2,98029.0,1994,0.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,2.0,"""Reuters News,1994-10-18,Rio police kill 13 in...",Reuters News;Reuters News,...,POINT (-43.2075 -22.90278),1994-10-18 00:00:00.000,1994-10-18 00:00:00.000,0.0,0.0,0.0,0.0,0,13.0,0.0
3,84044.0,1994,0.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,1.0,"""Reuters,1994-12-05,-""",Reuters,...,POINT (-43.2075 -22.90278),1994-12-04 00:00:00.000,1994-12-04 00:00:00.000,0.0,0.0,1.0,0.0,1,1.0,0.0
4,98031.0,1995,0.0,Government of Brazil - Civilians,Government of Brazil - Civilians,Government of Brazil,Civilians,1.0,"""Reuters,1995-03-05,-""",Reuters,...,POINT (-43.2075 -22.90278),1995-03-05 00:00:00.000,1995-03-05 00:00:00.000,0.0,0.0,1.0,0.0,1,1.0,1.0


## Excluindo colunas descenessárias
Podemos perceber que as colunas conflitct_name e dyad_name carregam o mesmo tipo de informação, então podemos excluir uma delas. Além disso as colunas side_a e side_b estão fazendo algo semelhante, então exclluiremos elas também do dataframe

In [31]:
government_civilian_conflicts.drop(columns=['conflict_name','dyad_name','side_a','side_b'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  government_civilian_conflicts.drop(columns=['conflict_name','dyad_name','side_a','side_b'], inplace=True)


In [32]:
government_civilian_conflicts.head()

Unnamed: 0,id,year,active_year,number_of_sources,source_article,source_office,source_date,source_headline,source_original,where_coordinates,...,geom_wkt,date_start,date_end,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best,high,low
0,98024.0,1993,1.0,1.0,"""Reuters,1996-05-04,-""",Reuters,1996-05-04,-,police,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1993-07-23 00:00:00.000,1993-07-23 00:00:00.000,0.0,0.0,8.0,0.0,8,8.0,8.0
1,98028.0,1993,1.0,1.0,"""Reuters,1997-07-02,-""",Reuters,1997-07-02,-,police,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1993-08-30 00:00:00.000,1993-08-30 00:00:00.000,0.0,0.0,21.0,0.0,21,21.0,21.0
2,98029.0,1994,0.0,2.0,"""Reuters News,1994-10-18,Rio police kill 13 in...",Reuters News;Reuters News,1994-10-18;1995-05-12,Rio police kill 13 in shootout with trafficker...,police,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1994-10-18 00:00:00.000,1994-10-18 00:00:00.000,0.0,0.0,0.0,0.0,0,13.0,0.0
3,84044.0,1994,0.0,1.0,"""Reuters,1994-12-05,-""",Reuters,1994-12-05,-,Brazilian army,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1994-12-04 00:00:00.000,1994-12-04 00:00:00.000,0.0,0.0,1.0,0.0,1,1.0,0.0
4,98031.0,1995,0.0,1.0,"""Reuters,1995-03-05,-""",Reuters,1995-03-05,-,Globo,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1995-03-05 00:00:00.000,1995-03-05 00:00:00.000,0.0,0.0,1.0,0.0,1,1.0,1.0


Também podemos observar que a coluna 'soucer_article' está carregando as informações das colunas 'source_office' e 'souce_date'. Vamos excluir exclui-la e preservar as outras duas.

In [35]:
government_civilian_conflicts.drop(columns='source_article', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  government_civilian_conflicts.drop(columns='source_article', inplace=True)


In [36]:
government_civilian_conflicts.head()

Unnamed: 0,id,year,active_year,number_of_sources,source_office,source_date,source_headline,source_original,where_coordinates,where_description,...,geom_wkt,date_start,date_end,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best,high,low
0,98024.0,1993,1.0,1.0,Reuters,1996-05-04,-,police,Rio de Janeiro town,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1993-07-23 00:00:00.000,1993-07-23 00:00:00.000,0.0,0.0,8.0,0.0,8,8.0,8.0
1,98028.0,1993,1.0,1.0,Reuters,1997-07-02,-,police,Rio de Janeiro town,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1993-08-30 00:00:00.000,1993-08-30 00:00:00.000,0.0,0.0,21.0,0.0,21,21.0,21.0
2,98029.0,1994,0.0,2.0,Reuters News;Reuters News,1994-10-18;1995-05-12,Rio police kill 13 in shootout with trafficker...,police,Rio de Janeiro town,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1994-10-18 00:00:00.000,1994-10-18 00:00:00.000,0.0,0.0,0.0,0.0,0,13.0,0.0
3,84044.0,1994,0.0,1.0,Reuters,1994-12-05,-,Brazilian army,Rio de Janeiro town,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1994-12-04 00:00:00.000,1994-12-04 00:00:00.000,0.0,0.0,1.0,0.0,1,1.0,0.0
4,98031.0,1995,0.0,1.0,Reuters,1995-03-05,-,Globo,Rio de Janeiro town,Rio de Janeiro town,...,POINT (-43.2075 -22.90278),1995-03-05 00:00:00.000,1995-03-05 00:00:00.000,0.0,0.0,1.0,0.0,1,1.0,1.0
