# Homicides by Sex Data Analysis

## Requirements

In [1]:
import os, sys
import polars as pl
import numpy as np

In [2]:
SCRIPT_PATH = os.getcwd() # os.path.dirname(os.path.abspath(__file__))
PROJECT_PATH = os.path.join(SCRIPT_PATH, '..')
INPUT_DIR = os.path.join(PROJECT_PATH, 'data', 'processed')
INPUT_FILE = os.path.join(INPUT_DIR, 'processed_unodc_intentional_homicide_rate.csv')
PLOTS_PATH = os.path.join(PROJECT_PATH, 'plots')
os.makedirs(PLOTS_PATH, exist_ok=True)

In [3]:
sys.path.append(PROJECT_PATH)

from config.config_01a import (
    SELECTED_COUNTRIES, 
    COLOR_MAP, 
    CATEGORY_ORDERS,
    HOVER_DATA,
    LABELS, 
    HOVER_TEMPLATES,
    PLOT_FILENAME,
    PROP_YEARS_IN_PERIOD_LIMIT,
    REF_REGION_FOR_START_YEAR
)

from src.plots_utils import time_series_plot, barplot
from src.analysis_utils import process_time_series_data, process_ranking_data

In [4]:
# Read data
df = pl.read_csv(INPUT_FILE)

In [5]:
# Processing for time series by country and region

by = 'Sex'

df_time_series, optimal_min_year, max_year = process_time_series_data(
    df = df, 
    selected_countries = SELECTED_COUNTRIES, 
    prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
    ref_region_for_start_year = REF_REGION_FOR_START_YEAR,
    by = by
    )

‚öôÔ∏è Procesando desglose por: SEXO
üìÖ Periodo: 2000-2023
‚úÖ Pa√≠ses v√°lidos para regi√≥n: 21 de 31
--------------------------------------------------------------------------------


In [6]:
# Processing for Rankings by country and region

initial_years = [1990, 
                 optimal_min_year, # 2000
                 2014,
                 2019]

df_ranking_combined, df_ranking_dict = process_ranking_data(
    df = df_time_series['country'], 
    selected_countries = SELECTED_COUNTRIES, 
    prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
    initial_years = initial_years, 
    max_year = max_year,
    by = by
)

üîÑ Procesando ranking (Sex) para: 1990-2023
----------------------------------------------------------------------------------------------------
üìä Ranking Period: 1990 - 2023
   Segmentado por: Sex
   Pa√≠ses analizados: 31
   Pa√≠ses seleccionados (Data > 65.0%): 17
   Pa√≠ses descartados: ['France', 'Argentina', 'El Salvador', 'Chile', 'Republic of Korea', 'Singapore', 'China', 'Philippines', 'Indonesia', 'Thailand', 'T√ºrkiye', 'Morocco', 'Egypt', 'South Africa']
   Prop. datos (a√±os) en el periodo, por pais: {'Germany': 1.0, 'Denmark': 1.0, 'USA': 1.0, 'Egypt': 0.26, 'Chile': 0.56, 'India': 0.71, 'Greece': 0.97, 'Sweden': 0.74, 'Mexico': 1.0, 'Indonesia': 0.03, 'Romania': 1.0, 'Republic of Korea': 0.38, 'Thailand': 0.56, 'Italy': 0.97, 'Argentina': 0.24, 'Brazil': 1.0, 'South Africa': 0.35, 'Colombia': 1.0, 'Japan': 1.0, 'Spain': 0.68, 'Norway': 1.0, 'T√ºrkiye': 0.53, 'Portugal': 0.85, 'Philippines': 0.32, 'United Kingdom': 0.65, 'El Salvador': 0.53, 'Venezuela': 0.71, 'Singa

---

In [7]:
df_time_series['country']

Country,Region,Subregion,Dimension,Category,Sex,Age,Year,homicides_rate,homicides_count,population,Region_2,homicides_rate_abs_change
str,str,str,str,str,str,str,i64,f64,f64,i64,str,f64
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Female""","""Total""",2015,1.76,387.0,21939310,"""Latam""",
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Male""","""Total""",2015,11.38,2450.0,21537702,"""Latam""",
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Male""","""Total""",2017,8.7,1909.0,21953153,"""Latam""",-2.68
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Female""","""Total""",2017,1.83,408.0,22335741,"""Latam""",0.07
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Female""","""Total""",2018,1.76,396.0,22512096,"""Latam""",-0.07
…,…,…,…,…,…,…,…,…,…,…,…,…
"""Venezuela""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Male""","""Total""",2015,96.12,14606.0,15195272,"""Latam""",-6.72
"""Venezuela""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Female""","""Total""",2016,5.2,806.0,15485244,"""Latam""",0.45
"""Venezuela""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Male""","""Total""",2016,106.54,16280.0,15280476,"""Latam""",10.42
"""Venezuela""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Female""","""Total""",2017,5.12,788.0,15396408,"""Latam""",-0.08


In [8]:
df_time_series['region']

Region_2,Year,Sex,mean_homicides_rate
str,i64,str,f64
"""Asia""",1990,"""Female""",0.49
"""Asia""",1990,"""Male""",0.61
"""Asia""",1991,"""Female""",0.47
"""Asia""",1991,"""Male""",0.56
"""Asia""",1992,"""Female""",0.48
…,…,…,…
"""USA""",2021,"""Male""",10.51
"""USA""",2022,"""Female""",2.81
"""USA""",2022,"""Male""",10.18
"""USA""",2023,"""Female""",2.55


In [9]:
df_ranking_combined['country']

Country,Sex,mean_homicides_rate,Region_2,Periodo
str,str,f64,str,str
"""Japan""","""Female""",0.41,"""Asia""","""1990-2023"""
"""Japan""","""Male""",0.45,"""Asia""","""1990-2023"""
"""Italy""","""Female""",0.55,"""Europe""","""1990-2023"""
"""Spain""","""Female""",0.57,"""Spain""","""1990-2023"""
"""Sweden""","""Female""",0.61,"""Europe""","""1990-2023"""
…,…,…,…,…
"""Chile""","""Male""",10.13,"""Latam""","""2019-2023"""
"""Brazil""","""Male""",37.34,"""Latam""","""2019-2023"""
"""El Salvador""","""Male""",39.42,"""Latam""","""2019-2023"""
"""Colombia""","""Male""",45.49,"""Latam""","""2019-2023"""


In [10]:
df_ranking_combined['region']

Region_2,Sex,mean_homicides_rate,Periodo
str,str,f64,str
"""Spain""","""Female""",0.57,"""1990-2023"""
"""Europe""","""Female""",0.78,"""1990-2023"""
"""Spain""","""Male""",1.17,"""1990-2023"""
"""Asia""","""Female""",1.6,"""1990-2023"""
"""Europe""","""Male""",1.69,"""1990-2023"""
…,…,…,…
"""USA""","""Female""",2.62,"""2019-2023"""
"""Latam""","""Female""",3.44,"""2019-2023"""
"""Africa""","""Male""",3.7,"""2019-2023"""
"""USA""","""Male""",9.49,"""2019-2023"""


---

## Time Series

In [11]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    facet_col='Sex',
    color='Country',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa de Homicidios Intencionados en el Mundo por Sexo (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [12]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    facet_col='Sex',
    color='Region_2',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa de Homicidios Intencionados en el Mundo por Regi√≥n y Sexo (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [13]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    line_dash='Sex',
    color='Region_2',
    default_visible_name=['Spain, Female', 'Spain, Male'],
    title='Evoluci√≥n de la Tasa de Homicidios por Regi√≥n y Sexo (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [14]:
time_series_plot(
    df = df_time_series['region'],
    x='Year',
    y='mean_homicides_rate',
    line_group='Region_2',
    facet_col='Sex',
    color='Region_2',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa Media de Homicidios Intencionados en el Mundo por Regi√≥n y Sexo (1990-2023)',
    hover_data=HOVER_DATA['time_series_region'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_region'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [15]:
time_series_plot(
    df = df_time_series['region'],
    x='Year',
    y='mean_homicides_rate',
    line_group='Region_2',
    line_dash='Sex',
    color='Region_2',
    default_visible_name=['Spain, Female', 'Spain, Male'],
    title='Evoluci√≥n de la Tasa de Homicidios por Regi√≥n y Sexo (1990-2023)',
    hover_data=HOVER_DATA['time_series_region'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_region'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

## Rankings

In [16]:
barplot(
    df = df_ranking_combined['country'], 
    x='mean_homicides_rate', 
    y='Country', 
    height=2000,
    reverse_y_order=False,
    orientation='h', 
    color='Sex', 
    facet_col='Periodo',
    cols_wrap=1, 
    vertical_spacing = 0.01,
    #barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_country'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_country'],
    title=f"Ranking de Homicidios por Pais y Sexo",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_country']), 
    show=True
    )

In [17]:
barplot(
    df = df_ranking_combined['country'], 
    x='mean_homicides_rate', 
    y='Country', 
    height=1000,
    reverse_y_order=False,
    orientation='h', 
    color='Sex', 
    facet_col='Periodo',
    cols_wrap=2, 
    vertical_spacing = 0.03,
    #barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_country'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_country'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots',  PLOT_FILENAME['ranking_country']), 
    show=True
    )

In [18]:
barplot(
    df = df_ranking_combined['region'], 
    x='mean_homicides_rate', 
    y='Region_2', 
    height=800,
    reverse_y_order=False,
    orientation='h', 
    color='Sex', 
    facet_col='Periodo',
    cols_wrap=1, 
    vertical_spacing = 0.03,
    #barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_region'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_region'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_region']), 
    show=True
    )

In [19]:
barplot(
    df = df_ranking_combined['region'], 
    x='mean_homicides_rate', 
    y='Region_2', 
    height=800,
    reverse_y_order=False,
    orientation='h', 
    color='Sex', 
    facet_col='Periodo',
    cols_wrap=2, 
    vertical_spacing = 0.03,
    barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_region'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_region'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_region']), 
    show=True
    )