# Homicides by Sex Data Analysis

## Requirements

In [13]:
import os, sys
import polars as pl
import numpy as np

In [14]:
SCRIPT_PATH = os.getcwd() # os.path.dirname(os.path.abspath(__file__))
PROJECT_PATH = os.path.join(SCRIPT_PATH, '..')
INPUT_DIR = os.path.join(PROJECT_PATH, 'data', 'processed')
INPUT_FILE = os.path.join(INPUT_DIR, 'processed_unodc_intentional_homicide_rate.csv')
PLOTS_PATH = os.path.join(PROJECT_PATH, 'plots')
os.makedirs(PLOTS_PATH, exist_ok=True)

In [15]:
sys.path.append(PROJECT_PATH)

from config.config_01a import (
    SELECTED_COUNTRIES, 
    COLOR_MAP, 
    CATEGORY_ORDERS,
    HOVER_DATA,
    LABELS, 
    HOVER_TEMPLATES,
    PLOT_FILENAME,
    PROP_YEARS_IN_PERIOD_LIMIT,
    REF_REGION_FOR_START_YEAR
)

from src.plots_utils import time_series_plot, barplot
from src.analysis_utils import process_time_series_data, process_ranking_data

In [16]:
age_mapping = {
    # Ni√±ez
    "0-9": "0-14",
    "10 -14": "0-14",
    
    # Juventud (El grupo m√°s fragmentado)
    "15 -17": "15-29",
    "18-19": "15-29",
    "20-24": "15-29",
    "25-29": "15-29",
    
    # Adultos (Mantener formato consistente)
    "30-44": "30-44",
    "45-59": "45-59",
    "60 and older": "60+",
    
    # Dejar 'Total' igual o excluirlo luego
    "Total": "Total"
}

In [17]:
# Read data
df = pl.read_csv(INPUT_FILE)

In [18]:
# Processing for time series by country and region

by = 'Age'

df_time_series, optimal_min_year, max_year = process_time_series_data(
    df = df, 
    selected_countries = SELECTED_COUNTRIES, 
    prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
    ref_region_for_start_year = REF_REGION_FOR_START_YEAR,
    by = by
    )

‚öôÔ∏è Procesando desglose por: EDAD
üìÖ Periodo: 2014-2023
‚úÖ Pa√≠ses v√°lidos para regi√≥n: 12 de 31
--------------------------------------------------------------------------------


In [19]:
# Processing for Rankings by country and region

initial_years = [1990, 
                 optimal_min_year, # 2000
                 2014,
                 2019]

df_ranking_combined, df_ranking_dict = process_ranking_data(
    df = df_time_series['country'], 
    selected_countries = SELECTED_COUNTRIES, 
    prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
    initial_years = initial_years, 
    max_year = max_year,
    by = by
)

üîÑ Procesando ranking (Age) para: 1990-2023
----------------------------------------------------------------------------------------------------
üìä Ranking Period: 1990 - 2023
   Segmentado por: Age
   Pa√≠ses analizados: 31
   Pa√≠ses seleccionados (Data > 65.0%): 0
   Pa√≠ses descartados: ['Spain', 'Portugal', 'France', 'Italy', 'Germany', 'United Kingdom', 'Sweden', 'Norway', 'Denmark', 'Romania', 'Greece', 'Brazil', 'Mexico', 'Colombia', 'Venezuela', 'Argentina', 'El Salvador', 'Chile', 'Japan', 'Republic of Korea', 'Singapore', 'China', 'India', 'Philippines', 'Indonesia', 'Thailand', 'T√ºrkiye', 'Morocco', 'Egypt', 'South Africa', 'USA']
   Prop. datos (a√±os) en el periodo, por pais: {'Greece': 0.03, 'Morocco': 0.03, 'Spain': 0.24, 'India': 0.18, 'Mexico': 0.24, 'United Kingdom': 0.12, 'Brazil': 0.03, 'Denmark': 0.32, 'Republic of Korea': 0.18, 'France': 0.26, 'Colombia': 0.38, 'Argentina': 0.15, 'T√ºrkiye': 0.24, 'Italy': 0.29, 'Norway': 0.32, 'Singapore': 0.03, 'Egypt': 0.

---

In [20]:
df_time_series['country']

Country,Region_2,Year,Age,homicides_rate,homicides_count,population,homicides_rate_abs_change
str,str,i64,str,f64,f64,i64,f64
"""Argentina""","""Latam""",2015,"""30-44""",8.39,758.0,9031067,
"""Argentina""","""Latam""",2015,"""60 and older""",3.63,241.0,6642668,
"""Argentina""","""Latam""",2015,"""45-59""",4.77,316.0,6621047,
"""Argentina""","""Latam""",2018,"""0-9""",0.77,57.0,7362200,
"""Argentina""","""Latam""",2018,"""10 -14""",0.68,24.0,3521665,
…,…,…,…,…,…,…,…
"""United Kingdom""","""Europe""",2018,"""10 -14""",0.2,7.0,3450782,-0.16
"""United Kingdom""","""Europe""",2018,"""25-29""",1.72,69.0,4022272,-0.06
"""United Kingdom""","""Europe""",2018,"""18-19""",3.45,24.0,695208,-1.86
"""United Kingdom""","""Europe""",2018,"""20-24""",1.83,68.0,3717960,-0.5


In [21]:
df_time_series['region']

Region_2,Year,Age,mean_homicides_rate
str,i64,str,f64
"""Asia""",2016,"""0-9""",0.38
"""Asia""",2016,"""10 -14""",0.38
"""Asia""",2016,"""15 -17""",1.98
"""Asia""",2016,"""18-19""",3.3
"""Asia""",2016,"""20-24""",5.64
…,…,…,…
"""USA""",2020,"""20-24""",12.84
"""USA""",2020,"""25-29""",11.9
"""USA""",2020,"""30-44""",8.06
"""USA""",2020,"""45-59""",3.85


In [22]:
df_ranking_combined['country']

Country,Age,mean_homicides_rate,Region_2,Periodo
str,str,f64,str,str
"""Italy""","""10 -14""",0.12,"""Europe""","""2014-2023"""
"""Spain""","""10 -14""",0.13,"""Spain""","""2014-2023"""
"""Spain""","""15 -17""",0.22,"""Spain""","""2014-2023"""
"""Italy""","""0-9""",0.23,"""Europe""","""2014-2023"""
"""Germany""","""10 -14""",0.24,"""Europe""","""2014-2023"""
…,…,…,…,…
"""Colombia""","""25-29""",44.33,"""Latam""","""2019-2023"""
"""Mexico""","""20-24""",44.8,"""Latam""","""2019-2023"""
"""Colombia""","""20-24""",45.72,"""Latam""","""2019-2023"""
"""Mexico""","""30-44""",46.46,"""Latam""","""2019-2023"""


In [23]:
df_ranking_combined['region']

Region_2,Age,mean_homicides_rate,Periodo
str,str,f64,str
"""Spain""","""10 -14""",0.13,"""2014-2023"""
"""Spain""","""15 -17""",0.22,"""2014-2023"""
"""Spain""","""0-9""",0.32,"""2014-2023"""
"""Asia""","""10 -14""",0.39,"""2014-2023"""
"""Asia""","""0-9""",0.42,"""2014-2023"""
…,…,…,…
"""Latam""","""45-59""",14.38,"""2019-2023"""
"""Latam""","""18-19""",22.33,"""2019-2023"""
"""Latam""","""30-44""",25.4,"""2019-2023"""
"""Latam""","""20-24""",29.84,"""2019-2023"""


---

## Time Series

In [24]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    facet_col='Age',
    facet_col_wrap=3,
    color='Country',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa de Homicidios Intencionados en el Mundo por Edad (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [25]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    facet_col='Age',
    facet_col_wrap=3,
    color='Region_2',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa de Homicidios Intencionados en el Mundo por Regi√≥n y Edad (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [26]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    line_dash='Age',
    color='Region_2',
    default_visible_name=['Spain, 18-19', 'Spain, 20-24'],
    title='Evoluci√≥n de la Tasa de Homicidios por Regi√≥n y Edad (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [28]:
time_series_plot(
    df = df_time_series['region'],
    x='Year',
    y='mean_homicides_rate',
    line_group='Region_2',
    facet_col='Age',
    facet_col_wrap=3,
    color='Region_2',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa Media de Homicidios Intencionados en el Mundo por Regi√≥n y Edad (1990-2023)',
    hover_data=HOVER_DATA['time_series_region'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_region'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

## Rankings

**CONTINUAR DESDE AQUI: hay que arreglar este grafico, salen dob barras en cada barra en el primero, y se estan sumando, algo raro pasa**

In [31]:
barplot(
    df = df_ranking_combined['country'], 
    x='mean_homicides_rate', 
    y='Country', 
    height=2000,
    reverse_y_order=False,
    orientation='h', 
    color='Age', 
    facet_col='Periodo',
    cols_wrap=1, 
    vertical_spacing = 0.01,
    #barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    #color_discrete_map=COLOR_MAP['Age'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_country'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_country'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_country']), 
    show=True
    )

In [60]:
barplot(
    df = df_ranking_combined['country'], 
    x='mean_homicides_rate', 
    y='Country', 
    height=1000,
    reverse_y_order=False,
    orientation='h', 
    color='Sex', 
    facet_col='Periodo',
    cols_wrap=2, 
    vertical_spacing = 0.02,
    #barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_country'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_country'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s ({initial_year}-2023)",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', f'{initial_year}_2023_' + PLOT_FILENAME['ranking_country']), 
    show=True
    )

In [61]:
barplot(
    df = df_ranking_combined['region'], 
    x='mean_homicides_rate', 
    y='Region_2', 
    height=800,
    reverse_y_order=False,
    orientation='h', 
    color='Sex', 
    facet_col='Periodo',
    cols_wrap=1, 
    vertical_spacing = 0.01,
    #barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_region'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_region'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s ({initial_year}-2023)",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', f'{initial_year}_2023_' + PLOT_FILENAME['ranking_region']), 
    show=True
    )

In [62]:
barplot(
    df = df_ranking_combined['region'], 
    x='mean_homicides_rate', 
    y='Region_2', 
    height=800,
    reverse_y_order=False,
    orientation='h', 
    color='Sex', 
    facet_col='Periodo',
    cols_wrap=2, 
    vertical_spacing = 0.01,
    #barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_region'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_region'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s ({initial_year}-2023)",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', f'{initial_year}_2023_' + PLOT_FILENAME['ranking_region']), 
    show=True
    )