# Homicides by Age Data Analysis

## Requirements

In [1]:
import os, sys
import polars as pl
import numpy as np

In [2]:
SCRIPT_PATH = os.getcwd() # os.path.dirname(os.path.abspath(__file__))
PROJECT_PATH = os.path.join(SCRIPT_PATH, '..')
INPUT_DIR = os.path.join(PROJECT_PATH, 'data', 'processed')
INPUT_FILE = os.path.join(INPUT_DIR, 'processed_unodc_intentional_homicide_rate.csv')
PLOTS_PATH = os.path.join(PROJECT_PATH, 'plots')
os.makedirs(PLOTS_PATH, exist_ok=True)

In [3]:
sys.path.append(PROJECT_PATH)

from config.config_01a import (
    SELECTED_COUNTRIES, 
    COLOR_MAP, 
    CATEGORY_ORDERS,
    HOVER_DATA,
    LABELS, 
    HOVER_TEMPLATES,
    PLOT_FILENAME,
    PROP_YEARS_IN_PERIOD_LIMIT,
    REF_REGION_FOR_START_YEAR,
    AGE_MAPPING
)

from src.plots_utils import time_series_plot, barplot
from src.analysis_utils import process_time_series_data, process_ranking_data

In [4]:
# Read data
df = pl.read_csv(INPUT_FILE)

In [5]:
# Processing for time series by country and region

by = 'Age'

df_time_series, optimal_min_year, max_year = process_time_series_data(
    df = df, 
    selected_countries = SELECTED_COUNTRIES, 
    prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
    ref_region_for_start_year = REF_REGION_FOR_START_YEAR,
    by = by,
    age_mapping=AGE_MAPPING
    )

‚öôÔ∏è Procesando desglose por: EDAD
üìÖ Periodo: 2014-2023
‚úÖ Pa√≠ses v√°lidos para regi√≥n: 12 de 31
--------------------------------------------------------------------------------


In [6]:
# Processing for Rankings by country and region

initial_years = [1990, 
                 optimal_min_year, # 2000
                 2014,
                 2019]

df_ranking_combined, df_ranking_dict = process_ranking_data(
    df = df_time_series['country'], 
    selected_countries = SELECTED_COUNTRIES, 
    prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
    initial_years = initial_years, 
    max_year = max_year,
    by = by
)

üîÑ Procesando ranking (Age) para: 1990-2023
----------------------------------------------------------------------------------------------------
üìä Ranking Period: 1990 - 2023
   Segmentado por: Age
   Pa√≠ses analizados: 31
   Pa√≠ses seleccionados (Data > 65.0%): 0
   Pa√≠ses descartados: ['Spain', 'Portugal', 'France', 'Italy', 'Germany', 'United Kingdom', 'Sweden', 'Norway', 'Denmark', 'Romania', 'Greece', 'Brazil', 'Mexico', 'Colombia', 'Venezuela', 'Argentina', 'El Salvador', 'Chile', 'Japan', 'Republic of Korea', 'Singapore', 'China', 'India', 'Philippines', 'Indonesia', 'Thailand', 'T√ºrkiye', 'Morocco', 'Egypt', 'South Africa', 'USA']
   Prop. datos (a√±os) en el periodo, por pais: {'Brazil': 0.03, 'Mexico': 0.24, 'Greece': 0.03, 'Portugal': 0.38, 'India': 0.18, 'Morocco': 0.03, 'Norway': 0.32, 'Egypt': 0.18, 'Singapore': 0.03, 'T√ºrkiye': 0.24, 'France': 0.26, 'Germany': 0.32, 'United Kingdom': 0.12, 'Colombia': 0.38, 'Chile': 0.21, 'Italy': 0.32, 'El Salvador': 0.15, 'US

---

In [7]:
df_time_series['country']

Country,Region_2,Year,Age,homicides_rate,homicides_count,population,homicides_rate_abs_change
str,str,i64,str,f64,f64,i64,f64
"""Argentina""","""Latam""",2015,"""30-44""",8.39,758.0,9031067,
"""Argentina""","""Latam""",2015,"""60+""",3.63,241.0,6642668,
"""Argentina""","""Latam""",2015,"""45-59""",4.77,316.0,6621047,
"""Argentina""","""Latam""",2018,"""15-19""",7.95,277.0,3483957,
"""Argentina""","""Latam""",2018,"""0-9""",0.77,57.0,7362200,
…,…,…,…,…,…,…,…
"""United Kingdom""","""Europe""",2018,"""30-44""",1.52,174.0,11436886,-0.07
"""United Kingdom""","""Europe""",2018,"""15-19""",1.96,51.0,2605079,-0.75
"""United Kingdom""","""Europe""",2018,"""10-14""",0.2,7.0,3450782,-0.16
"""United Kingdom""","""Europe""",2018,"""0-9""",0.75,54.0,7223750,0.14


In [8]:
df_time_series['region']

Region_2,Year,Age,mean_homicides_rate
str,i64,str,f64
"""Asia""",2016,"""0-9""",0.38
"""Asia""",2016,"""10-14""",0.38
"""Asia""",2016,"""15-19""",2.51
"""Asia""",2016,"""20-29""",6.18
"""Asia""",2016,"""30-44""",4.75
…,…,…,…
"""USA""",2020,"""15-19""",11.55
"""USA""",2020,"""20-29""",12.36
"""USA""",2020,"""30-44""",8.06
"""USA""",2020,"""45-59""",3.85


In [9]:
df_ranking_combined['country']

Country,Age,mean_homicides_rate,Region_2,Periodo
str,str,f64,str,str
"""Italy""","""10-14""",0.12,"""Europe""","""2014-2023"""
"""Spain""","""10-14""",0.13,"""Spain""","""2014-2023"""
"""Italy""","""0-9""",0.23,"""Europe""","""2014-2023"""
"""France""","""10-14""",0.24,"""Europe""","""2014-2023"""
"""Germany""","""10-14""",0.24,"""Europe""","""2014-2023"""
…,…,…,…,…
"""El Salvador""","""30-44""",33.22,"""Latam""","""2019-2023"""
"""El Salvador""","""20-29""",37.34,"""Latam""","""2019-2023"""
"""Colombia""","""20-29""",45.02,"""Latam""","""2019-2023"""
"""Mexico""","""30-44""",46.46,"""Latam""","""2019-2023"""


In [10]:
df_ranking_combined['region']

Region_2,Age,mean_homicides_rate,Periodo
str,str,f64,str
"""Spain""","""10-14""",0.13,"""2014-2023"""
"""Spain""","""0-9""",0.32,"""2014-2023"""
"""Asia""","""10-14""",0.39,"""2014-2023"""
"""Spain""","""15-19""",0.4,"""2014-2023"""
"""Asia""","""0-9""",0.42,"""2014-2023"""
…,…,…,…
"""Latam""","""60+""",7.11,"""2019-2023"""
"""Latam""","""45-59""",14.38,"""2019-2023"""
"""Latam""","""15-19""",16.04,"""2019-2023"""
"""Latam""","""30-44""",25.4,"""2019-2023"""


---

## Time Series

In [11]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    facet_col='Age',
    facet_col_wrap=3,
    color='Country',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa de Homicidios Intencionados en el Mundo por Edad (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [12]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    facet_col='Age',
    facet_col_wrap=3,
    color='Region_2',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa de Homicidios Intencionados en el Mundo por Regi√≥n y Edad (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [None]:
# TODO: personalizar hover para que salga la categor√≠a de Age en las etiquetas

time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    line_dash='Age',
    color='Region_2',
    default_visible_name=['Spain, 15-19', 'Spain, 20-29'],
    title='Evoluci√≥n de la Tasa de Homicidios por Regi√≥n y Edad (1990-2023)',
    #hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    #hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True,
    hover_data={
    "Region_2": False,
    "Country": True ,     
    "Year": True,        
    "homicides_rate": True, 
    "homicides_rate_abs_change": True,
    "Age": True
    },
    hovertemplate=(
        "<b style='font-size: 14px'>%{customdata[1]}</b><br>" 
        "<br>" 
        
        # L√≠nea 1: A√ëO
        "<b>A√±o:</b> %{x}<br>"
        
        # L√≠nea 2: TASA (Usando tu etiqueta exacta y formato .2f)
        "<b>Tasa de Homicidios:</b> %{y:.2f}<br>"
        
        # L√≠nea 3: VARIACI√ìN (Usando tu etiqueta exacta y formato +.2f)
        "<b>Variaci√≥n Absoluta:</b> %{customdata[2]:+.2f}"

        # Category
        "<b>Age:</b> %{customdata[3]}<br>"
        
        # Ocultamos la etiqueta secundaria de la derecha
        "<extra></extra>"
    )
)



In [None]:
HOVER_DATA['time_series_country_age'] = {
    "Region_2": False,
    "Country": True ,     
    "Year": True,        
    "homicides_rate": True, 
    "homicides_rate_abs_change": True,
    "Age": True
    }

In [None]:
HOVER_TEMPLATES['time_series_country_age'] = (
        "<b style='font-size: 14px'>%{customdata[1]}</b><br>" 
        "<br>" 
        
        # L√≠nea 1: A√ëO
        "<b>A√±o:</b> %{x}<br>"
        
        # L√≠nea 2: TASA (Usando tu etiqueta exacta y formato .2f)
        "<b>Tasa de Homicidios:</b> %{y:.2f}<br>"
        
        # L√≠nea 3: VARIACI√ìN (Usando tu etiqueta exacta y formato +.2f)
        "<b>Variaci√≥n Absoluta:</b> %{customdata[2]:+.2f}"

        # Category
        "<b>Age:</b> %{customdata[3]}<br>"
        
        # Ocultamos la etiqueta secundaria de la derecha
        "<extra></extra>"
    )

In [None]:
# TODO: personalizar hover para que salga la categor√≠a de Age en las etiquetas

time_series_plot(
    df = df_time_series['region'],
    x='Year',
    y='mean_homicides_rate',
    line_group='Region_2',
    facet_col='Age',
    facet_col_wrap=3,
    color='Region_2',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa Media de Homicidios Intencionados en el Mundo por Regi√≥n y Edad (1990-2023)',
    hover_data=HOVER_DATA['time_series_region'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_region'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

## Rankings

In [None]:
# TODO: personalizar hover para que salga la categor√≠a de Age en las etiquetas

barplot(
    df = df_ranking_combined['country'], 
    x='mean_homicides_rate', 
    y='Country', 
    height=2000,
    reverse_y_order=False,
    orientation='h', 
    color='Age', 
    barmode='group',
    facet_col='Periodo',
    cols_wrap=1, 
    vertical_spacing = 0.01,
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    #color_discrete_map=COLOR_MAP['Age'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_country'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_country'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_country']), 
    show=True
    )

In [None]:
# TODO: personalizar hover para que salga la categor√≠a de Age en las etiquetas

barplot(
    df = df_ranking_combined['country'], 
    x='mean_homicides_rate', 
    y='Country', 
    height=1000,
    reverse_y_order=False,
    orientation='h', 
    color='Age', 
    facet_col='Periodo',
    cols_wrap=2, 
    vertical_spacing = 0.02,
    barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_country'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_country'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_country']), 
    show=True
    )

In [None]:
# TODO: personalizar hover para que salga la categor√≠a de Age en las etiquetas

barplot(
    df = df_ranking_combined['region'], 
    x='mean_homicides_rate', 
    y='Region_2', 
    height=800,
    reverse_y_order=False,
    orientation='h', 
    color='Age', 
    facet_col='Periodo',
    cols_wrap=1, 
    vertical_spacing = 0.01,
    barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    #category_orders=CATEGORY_ORDERS,
    hover_data=HOVER_DATA['ranking_region'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_region'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_region']), 
    show=True
    )

In [None]:
# TODO: personalizar hover para que salga la categor√≠a de Age en las etiquetas

barplot(
    df = df_ranking_combined['region'], 
    x='mean_homicides_rate', 
    y='Region_2', 
    height=800,
    reverse_y_order=False,
    orientation='h', 
    color='Age', 
    facet_col='Periodo',
    cols_wrap=2, 
    vertical_spacing = 0.01,
    barmode='group',
    yticks_color_column='Region_2',
    yticks_color_map=COLOR_MAP['Region_2'],
    color_discrete_map=COLOR_MAP['Sex'], 
    hover_data=HOVER_DATA['ranking_region'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_region'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_region']), 
    show=True
    )