# Total Homicides Data Analysis

## Requirements

In [1]:
import os, sys
import polars as pl
import numpy as np

In [2]:
SCRIPT_PATH = os.getcwd() # os.path.dirname(os.path.abspath(__file__))
PROJECT_PATH = os.path.join(SCRIPT_PATH, '..')
INPUT_DIR = os.path.join(PROJECT_PATH, 'data', 'processed')
INPUT_FILE = os.path.join(INPUT_DIR, 'processed_unodc_intentional_homicide_rate.csv')
PLOTS_PATH = os.path.join(PROJECT_PATH, 'plots')
os.makedirs(PLOTS_PATH, exist_ok=True)

In [3]:
sys.path.append(PROJECT_PATH)

from config.config_01a import (
    SELECTED_COUNTRIES, 
    COLOR_MAP, 
    CATEGORY_ORDERS,
    HOVER_DATA,
    LABELS, 
    HOVER_TEMPLATES,
    PLOT_FILENAME,
    PROP_YEARS_IN_PERIOD_LIMIT,
    REF_REGION_FOR_START_YEAR
)

from src.plots_utils import time_series_plot, barplot
from src.analysis_utils import process_time_series_data, process_ranking_data

In [4]:
# Read data
df = pl.read_csv(INPUT_FILE)

In [5]:
# Processing for time series by country and region

df_time_series, optimal_min_year, max_year = process_time_series_data(
    df = df, 
    selected_countries = SELECTED_COUNTRIES, 
    prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
    ref_region_for_start_year = REF_REGION_FOR_START_YEAR)

‚öôÔ∏è Procesando serie: TOTAL PA√çS
üìÖ Periodo: 1998-2023
‚úÖ Pa√≠ses v√°lidos para regi√≥n: 27 de 31
--------------------------------------------------------------------------------


In [6]:
# Processing for Rankings by country and region

initial_years = [1990, 
                 optimal_min_year,# 1998
                 2014,
                 2019]

df_ranking_combined, df_ranking_dict = process_ranking_data(
    df = df_time_series['country'], 
    selected_countries = SELECTED_COUNTRIES, 
    prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
    initial_years = initial_years, 
    max_year = max_year
)

üîÑ Procesando ranking (Total) para: 1990-2023
----------------------------------------------------------------------------------------------------
üìä Ranking Period: 1990 - 2023
   Pa√≠ses analizados: 31
   Pa√≠ses seleccionados (Data > 65.0%): 26
   Pa√≠ses descartados: ['Chile', 'Philippines', 'Indonesia', 'T√ºrkiye', 'Egypt']
   Prop. datos (a√±os) en el periodo, por pais: {'China': 0.65, 'Germany': 1.0, 'Romania': 1.0, 'Japan': 1.0, 'Chile': 0.59, 'Brazil': 1.0, 'Thailand': 0.65, 'Argentina': 0.68, 'Norway': 0.97, 'Spain': 1.0, 'Italy': 0.94, 'Philippines': 0.62, 'United Kingdom': 0.91, 'Greece': 0.94, 'USA': 0.91, 'Morocco': 0.94, 'Colombia': 1.0, 'Mexico': 1.0, 'Sweden': 1.0, 'T√ºrkiye': 0.53, 'France': 1.0, 'Singapore': 0.97, 'Denmark': 0.79, 'El Salvador': 0.85, 'Indonesia': 0.18, 'Venezuela': 0.91, 'Egypt': 0.56, 'India': 0.97, 'Portugal': 0.79, 'South Africa': 0.85, 'Republic of Korea': 1.0}
---------------------------------------------------------------------------------

---

In [7]:
df_time_series['country']

Country,Region,Subregion,Dimension,Category,Sex,Age,Year,homicides_rate,homicides_count,population,Region_2,homicides_rate_abs_change
str,str,str,str,str,str,str,i64,f64,f64,i64,str,f64
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Total""","""Total""",2001,8.32,3129.0,37624826,"""Latam""",
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Total""","""Total""",2002,9.39,3570.0,38029349,"""Latam""",1.07
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Total""","""Total""",2003,7.72,2967.0,38424282,"""Latam""",-1.67
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Total""","""Total""",2004,6.02,2338.0,38815915,"""Latam""",-1.7
"""Argentina""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Total""","""Total""",2005,5.6,2198.0,39216790,"""Latam""",-0.42
…,…,…,…,…,…,…,…,…,…,…,…,…
"""Venezuela""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Total""","""Total""",2017,47.98,14665.0,30565324,"""Latam""",-8.6
"""Venezuela""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Total""","""Total""",2019,41.03,11874.0,28938098,"""Latam""",-6.95
"""Venezuela""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Total""","""Total""",2020,29.48,8384.0,28444077,"""Latam""",-11.55
"""Venezuela""","""Americas""","""Latin America and the Caribbea‚Ä¶","""Total""","""Total""","""Total""","""Total""",2021,19.28,5444.0,28237826,"""Latam""",-10.2


In [8]:
df_time_series['region']

Region_2,Year,mean_homicides_rate
str,i64,f64
"""Africa""",1990,0.37
"""Africa""",1991,0.5
"""Africa""",1992,0.52
"""Africa""",1993,0.58
"""Africa""",1994,37.35
…,…,…
"""USA""",2019,4.93
"""USA""",2020,6.35
"""USA""",2021,6.78
"""USA""",2022,6.51


In [9]:
df_ranking_combined['country']

Country,mean_homicides_rate,Region_2,Periodo
str,f64,str,str
"""Japan""",0.43,"""Asia""","""1990-2023"""
"""Singapore""",0.56,"""Asia""","""1990-2023"""
"""Republic of Korea""",0.71,"""Asia""","""1990-2023"""
"""Norway""",0.78,"""Europe""","""1990-2023"""
"""Spain""",0.95,"""Spain""","""1990-2023"""
…,…,…,…
"""El Salvador""",21.29,"""Latam""","""2019-2023"""
"""Colombia""",24.31,"""Latam""","""2019-2023"""
"""Venezuela""",25.71,"""Latam""","""2019-2023"""
"""Mexico""",27.35,"""Latam""","""2019-2023"""


In [10]:
df_ranking_combined['region']

Region_2,mean_homicides_rate,Periodo
str,f64,str
"""Spain""",0.95,"""1990-2023"""
"""Europe""",1.26,"""1990-2023"""
"""Asia""",2.4,"""1990-2023"""
"""USA""",6.12,"""1990-2023"""
"""Africa""",20.66,"""1990-2023"""
…,…,…
"""Europe""",0.88,"""2019-2023"""
"""Asia""",1.29,"""2019-2023"""
"""USA""",6.07,"""2019-2023"""
"""Latam""",18.53,"""2019-2023"""


---

## Time Series

In [11]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    color='Country',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa de Homicidios Intencionados en el Mundo (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [12]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate_abs_change',
    color='Country',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Variaci√≥n Absoluta de la Tasa de Homicidios Intencionados en el Mundo (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [13]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate',
    line_group='Country',
    color='Region_2',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa de Homicidios Intencionados en el Mundo por Regi√≥n (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [14]:
time_series_plot(
    df = df_time_series['country'],
    x='Year',
    y='homicides_rate_abs_change',
    line_group='Country',
    color='Region_2',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Variaci√≥n Absoluta de la Tasa de Homicidios Intencionados en el Mundo por Regi√≥n (1990-2023)',
    hover_data=HOVER_DATA['time_series_country'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_country'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [15]:
time_series_plot(
    df = df_time_series['region'],
    x='Year',
    y='mean_homicides_rate',
    line_group='Region_2',
    color='Region_2',
    default_visible_name='Spain',
    title='Evoluci√≥n de la Tasa Media de Homicidios Intencionados en el Mundo por Regi√≥n (1990-2023)',
    hover_data=HOVER_DATA['time_series_region'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series_region'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

## Rankings

In [16]:
barplot(
    df = df_ranking_combined['country'], 
    x='mean_homicides_rate', 
    y='Country', 
    height=1000,
    orientation='h', 
    color='Region_2',
    facet_col='Periodo',
    cols_wrap=2, 
    barmode='relative',
    color_discrete_map=COLOR_MAP['Region_2'], 
    hover_data=HOVER_DATA['ranking_country'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_country'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_country']), 
    show=True
    )

In [17]:
barplot(
    df = df_ranking_combined['country'], 
    x='mean_homicides_rate', 
    y='Country', 
    height=2000,
    orientation='h', 
    color='Region_2',
    facet_col='Periodo',
    cols_wrap=1, 
    barmode='relative',
    vertical_spacing = 0.01,
    color_discrete_map=COLOR_MAP['Region_2'], 
    hover_data=HOVER_DATA['ranking_country'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_country'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s para diferentes Periodos",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_country']), 
    show=True
    )

In [18]:
barplot(
    df = df_ranking_combined['region'], 
    x='mean_homicides_rate', 
    y='Region_2', 
    height=800,
    orientation='h', 
    color='Region_2',
    facet_col='Periodo',
    cols_wrap=1, 
    barmode='relative',
    vertical_spacing = 0.03,
    color_discrete_map=COLOR_MAP['Region_2'], 
    hover_data=HOVER_DATA['ranking_region'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_region'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s para diferentes Periodos",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_region']), 
    show=True
    )

In [19]:
barplot(
    df = df_ranking_combined['region'], 
    x='mean_homicides_rate', 
    y='Region_2', 
    height=800,
    orientation='h', 
    color='Region_2',
    facet_col='Periodo',
    cols_wrap=2, 
    barmode='relative',
    vertical_spacing = 0.03,
    color_discrete_map=COLOR_MAP['Region_2'], 
    hover_data=HOVER_DATA['ranking_region'], 
    labels=LABELS['ranking'],
    hovertemplate=HOVER_TEMPLATES['ranking_region'],
    title=f"Ranking Mundial de Homicidios por Pa√≠s para diferentes Periodos",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['ranking_region']), 
    show=True
    )