### Imports

In [13]:
# Download geojson from Bolivia
# !wget -O ../data/geojson/bo_departamentos.json https://geonode.geovisorumsa.com/geoserver/ows?srsName=EPSG%3A4326&outputFormat=json&service=WFS&srs=EPSG%3A4326&request=GetFeature&typename=geonode%3Adepartamentos_bolivia&version=1.0.0
import numpy as np
import geopandas as gpd
import pandas as pd

### Read data of femicides in Bolivia by departments from 2013 to 2022

In [14]:
# Read 'xlsx' file as dataframe
df_femicides = pd.read_excel('../data/processed/bo_feminicidios.xlsx', index_col=None)
# Print the dimensionality of the dataframe
print(df_femicides.shape)
# Show dataframe
df_femicides.head()

(998, 6)


Unnamed: 0,N°,Caso,Estado,Fiscal Responsable,Año,Departamento
0,1,F1S1302695,Cerrado Otros,Caso Cerrado,2013-2019,Chuquisaca
1,2,FIS1301290,Con Condena,Edgar Luis Aramayo Chungara,2013-2019,Chuquisaca
2,3,FIS1405345,Con Condena,Caso Cerrado,2013-2019,Chuquisaca
3,4,FIS1404301,Cerrado Otros,Caso Cerrado,2013-2019,Chuquisaca
4,5,FIS1400011,Con Condena,Caso Cerrado,2013-2019,Chuquisaca


### Read geographical data for departments in Bolivia

In [15]:
# Read 'json' file as geodataframe
gdf_bo = gpd.read_file(r'../data/geo/bo_departamentos.json')
# Print the dimensionality of the dataframe
print(gdf_bo.shape)
# Show dataframe
gdf_bo.head()

(14, 9)


Unnamed: 0,id,fid,gml_id,OBJECTID,ID,COD_DEP,NOM_DEP,DESCRIP,geometry
0,departamentos_bolivia.1,1,departamento1.1,408,261,8,BENI,Departamento,"MULTIPOLYGON (((-63.38139 -15.88395, -63.44492..."
1,departamentos_bolivia.2,2,departamento1.2,25,147,1,CHUQUISACA,Departamento,"MULTIPOLYGON (((-65.20002 -21.06723, -65.20003..."
2,departamentos_bolivia.3,3,departamento1.3,1,0,3,COCHABAMBA,Departamento,"MULTIPOLYGON (((-66.33848 -16.26765, -66.31102..."
3,departamentos_bolivia.4,4,departamento1.4,249,96,2,LA PAZ,Departamento,"MULTIPOLYGON (((-68.77752 -16.42364, -68.77733..."
4,departamentos_bolivia.5,5,departamento1.5,96,428,4,ORURO,Departamento,"MULTIPOLYGON (((-67.52889 -19.86396, -67.52937..."


### Process data

In [16]:
# Column names to lower
df_femicides.columns = df_femicides.columns.str.lower()
gdf_bo.columns = gdf_bo.columns.str.lower()
# Rename a column
gdf_bo.rename(columns = {'nom_dep':'departamento'}, inplace = True)
# Change values to title method from a column
gdf_bo['departamento'] = gdf_bo['departamento'].str.title()
# Replace a single value with a new value for an entire dataframe
gdf_bo = gdf_bo.replace(['Potosi'],'Potosí')
# Filter columns
bo = gdf_bo[['departamento', 'geometry']]
# Show dataframe
bo.head()

Unnamed: 0,departamento,geometry
0,Beni,"MULTIPOLYGON (((-63.38139 -15.88395, -63.44492..."
1,Chuquisaca,"MULTIPOLYGON (((-65.20002 -21.06723, -65.20003..."
2,Cochabamba,"MULTIPOLYGON (((-66.33848 -16.26765, -66.31102..."
3,La Paz,"MULTIPOLYGON (((-68.77752 -16.42364, -68.77733..."
4,Oruro,"MULTIPOLYGON (((-67.52889 -19.86396, -67.52937..."


### Count femicides by departments in Bolivia

In [17]:
# Create a serie with the count of values from a column
count = df_femicides['departamento'].value_counts()
# Create a new dataframe with the serie
df_count = count.to_frame(name='count')
# Reset index and rename a column
df_count = df_count.reset_index(drop=False).rename(columns={'index': 'departamento'})
# Show dataframe
df_count

Unnamed: 0,departamento,count
0,La Paz,351
1,Santa Cruz,196
2,Cochabamba,169
3,Oruro,77
4,Tarija,59
5,Potosí,49
6,Beni,44
7,Chuquisaca,43
8,Pando,10


### Read data of population of females from 2013 to 2022 in Bolivia by departments

In [18]:
# Read 'xlsx' file as dataframe
# Data source: https://nube.ine.gob.bo/index.php/s/vFDAf81LAB0xM4s/download
df_population = pd.read_excel('../data/raw/bo_poblacion.xlsx', index_col=None, sheet_name='poblacion_mujeres')
# Show dataframe
df_population

Unnamed: 0,departamento,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Chuquisaca,303245,306107,309054,312095,315225,318445,321738,325129,328589,332123
1,La Paz,1402939,1417937,1432910,1447885,1462830,1477720,1492575,1507377,1522123,1536809
2,Cochabamba,927429,942565,957799,973113,988489,988489,1019354,1019354,1050282,1065739
3,Oruro,253615,255694,257767,259800,261811,263790,265730,267638,269501,271339
4,Potosí,423864,426883,430086,433464,437008,440729,444636,448713,452959,457357
5,Tarija,254511,259370,264220,269036,273835,278610,283358,288075,292742,297386
6,Santa Cruz,1398613,1432450,1466132,1499677,1533007,1566159,1599058,1631632,1663929,1695862
7,Beni,212079,216064,220144,224283,228508,232828,237231,241715,246273,250914
8,Pando,54070,56346,58646,60973,63320,65681,68063,70463,72877,75301


### Obtain mean of population of females

In [19]:
# Create a list with columns names
column_list = list(df_population)
# Remove a column name from list
column_list.remove('departamento')
# Create a column with the mean of selected columns
df_population['mean'] = df_population[column_list].apply(np.mean, axis=1)
# Filter columns
df_mean = df_population[['departamento', 'mean']]
# Show dataframe
df_mean

Unnamed: 0,departamento,mean
0,Chuquisaca,317175.0
1,La Paz,1470110.5
2,Cochabamba,993261.3
3,Oruro,262668.5
4,Potosí,439569.9
5,Tarija,276114.3
6,Santa Cruz,1548651.9
7,Beni,231003.9
8,Pando,64574.0


### Join three datasets: 
1. geographical data (`bo`)
2. femicides count (`df_count`)
3. mean of population of females (`df_mean`)

In [20]:
# Merge dataframes
df_join = bo.merge(df_count, on='departamento').merge(df_mean, on='departamento')
# Show dataframe
df_join

Unnamed: 0,departamento,geometry,count,mean
0,Beni,"MULTIPOLYGON (((-63.38139 -15.88395, -63.44492...",44,231003.9
1,Chuquisaca,"MULTIPOLYGON (((-65.20002 -21.06723, -65.20003...",43,317175.0
2,Cochabamba,"MULTIPOLYGON (((-66.33848 -16.26765, -66.31102...",169,993261.3
3,La Paz,"MULTIPOLYGON (((-68.77752 -16.42364, -68.77733...",351,1470110.5
4,Oruro,"MULTIPOLYGON (((-67.52889 -19.86396, -67.52937...",77,262668.5
5,Pando,"MULTIPOLYGON (((-68.79815 -11.00012, -68.79817...",10,64574.0
6,Potosí,"MULTIPOLYGON (((-67.11796 -20.55995, -67.11781...",49,439569.9
7,Santa Cruz,"MULTIPOLYGON (((-62.69866 -20.46269, -62.75646...",196,1548651.9
8,Tarija,"MULTIPOLYGON (((-64.25182 -22.63141, -64.25211...",59,276114.3


### Obtain rate of population of females for each 100.000 women

In [21]:
# Divide femicides count into women population mean, multiply by 100.000 and round to zero decimal places
df_join['rate'] = df_join['count'].divide(df_join['mean']).multiply(100000).round(0)
# Convert column values floats to integers
df_join['rate'] = pd.to_numeric(df_join['rate'], downcast='integer')
# Save geodataframe
df_join.to_file('../data/processed/bo_tasa_feminicidios.geojson', driver='GeoJSON')
# Show dataframe
df_join

Unnamed: 0,departamento,geometry,count,mean,rate
0,Beni,"MULTIPOLYGON (((-63.38139 -15.88395, -63.44492...",44,231003.9,19
1,Chuquisaca,"MULTIPOLYGON (((-65.20002 -21.06723, -65.20003...",43,317175.0,14
2,Cochabamba,"MULTIPOLYGON (((-66.33848 -16.26765, -66.31102...",169,993261.3,17
3,La Paz,"MULTIPOLYGON (((-68.77752 -16.42364, -68.77733...",351,1470110.5,24
4,Oruro,"MULTIPOLYGON (((-67.52889 -19.86396, -67.52937...",77,262668.5,29
5,Pando,"MULTIPOLYGON (((-68.79815 -11.00012, -68.79817...",10,64574.0,15
6,Potosí,"MULTIPOLYGON (((-67.11796 -20.55995, -67.11781...",49,439569.9,11
7,Santa Cruz,"MULTIPOLYGON (((-62.69866 -20.46269, -62.75646...",196,1548651.9,13
8,Tarija,"MULTIPOLYGON (((-64.25182 -22.63141, -64.25211...",59,276114.3,21
