# Total Homicides Data Analysis

## Requirements

In [1]:
import os, sys
import polars as pl
import numpy as np

In [2]:
SCRIPT_PATH = os.getcwd() # os.path.dirname(os.path.abspath(__file__))
PROJECT_PATH = os.path.join(SCRIPT_PATH, '..')
INPUT_DIR = os.path.join(PROJECT_PATH, 'data', 'processed')
INPUT_FILE = os.path.join(INPUT_DIR, 'processed_unodc_intentional_homicide_rate.csv')
PLOTS_PATH = os.path.join(PROJECT_PATH, 'plots')
os.makedirs(PLOTS_PATH, exist_ok=True)

In [3]:
sys.path.append(PROJECT_PATH)

from config.config_01a import (
    SELECTED_COUNTRIES, 
    COLOR_MAP, 
    CATEGORY_ORDERS,
    HOVER_DATA,
    LABELS, 
    HOVER_TEMPLATES,
    PLOT_FILENAME,
    PROP_YEARS_IN_PERIOD_LIMIT
)

from src.plots_utils import time_series_plot, barplot
from src.analysis_utils import calculate_ranking_by_country

In [4]:
# Read data
df = pl.read_csv(INPUT_FILE)

# Process data (strictly to carry out this NB exploration)

## Processing for time series

df_time_series = df.filter(
    (pl.col('Dimension') == 'Total') &
    (pl.col('Category') == 'Total') &
    (pl.col('Sex') == 'Total') &
    (pl.col('Age') == 'Total') 
    ).filter(
        pl.col('Country').is_in(SELECTED_COUNTRIES)
    )

## Processing for Rankings

shared_min_year = df_time_series.group_by('Country').agg(pl.min('Year'))['Year'].max()
# shared_max_year = df_time_series.group_by('Country').agg(pl.max('Year'))['Year'].min()
max_year = df_time_series['Year'].max()
# Para el calculo de estadísticas se usará el periodo completo, unificando el limite inferior al año shared_min_year (2003)
# Sin embargo el limite superios compartido no se considerará (shared_max_year, 2020), puesto que implicaría eliminar los años más recientes, que si están disponibles en la mayoría de paises

df_ranking_country, df_ranking_region = {}, {}
ranking_initial_years = [shared_min_year, 2014, 2019]

for initial_year in ranking_initial_years:
    
    df_ranking_country[initial_year] = calculate_ranking_by_country(
        df_time_series = df_time_series, 
        selected_countries = SELECTED_COUNTRIES, 
        prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
        start_year = initial_year, 
        end_year = max_year
    )

    df_ranking_region[initial_year] = df_ranking_country[initial_year].group_by('Region_2').agg(pl.mean('mean_homicides_rate'))

ranking_period: [2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]
ranking_selected_countries: ['Spain', 'Portugal', 'France', 'Italy', 'Germany', 'United Kingdom', 'Sweden', 'Norway', 'Denmark', 'Romania', 'Greece', 'Brazil', 'Mexico', 'Colombia', 'Venezuela', 'Argentina', 'El Salvador', 'Chile', 'Japan', 'Republic of Korea', 'Singapore', 'China', 'India', 'Türkiye', 'Morocco', 'Egypt', 'South Africa', 'USA']
ranking_not_selected_countries: ['Philippines', 'Indonesia', 'Thailand']
prop_year_in_period: {'Spain': np.float64(1.0), 'Portugal': np.float64(0.67), 'France': np.float64(1.0), 'Italy': np.float64(0.9), 'Germany': np.float64(1.0), 'United Kingdom': np.float64(0.9), 'Sweden': np.float64(1.0), 'Norway': np.float64(0.95), 'Denmark': np.float64(0.67), 'Romania': np.float64(1.0), 'Greece': np.float64(0.95), 'Brazil': np.float64(1.0), 'Mexico': np.float64(1.0), 'Colombia': np.float64(1.0), 'Venezuela': np.floa

## Time Series

In [5]:
time_series_plot(
    df = df_time_series,
    x='Year',
    y='homicides_rate',
    line_group='Country',
    color='Region_2',
    title='Evolución de la Tasa de Homicidios Intencionados en el Mundo (1990-2023)',
    hover_data=HOVER_DATA['time_series'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series'],
    color_discrete_map=COLOR_MAP,
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [6]:
time_series_plot(
    df = df_time_series,
    x='Year',
    y='homicides_rate_abs_change',
    line_group='Country',
    color='Region_2',
    title='Evolución de la Variación Absoluta de la Tasa de Homicidios Intencionados en el Mundo (1990-2023)',
    hover_data=HOVER_DATA['time_series'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series'],
    color_discrete_map=COLOR_MAP,
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

## Ranking

In [7]:
for initial_year in ranking_initial_years:

    barplot(
        df = df_ranking_country[initial_year], 
        x='mean_homicides_rate', 
        y='Country', 
        orientation='h', 
        color='Region_2', 
        color_discrete_map=COLOR_MAP, 
        category_orders=CATEGORY_ORDERS,
        hover_data=HOVER_DATA['ranking_country'], 
        labels=LABELS['ranking'],
        hovertemplate=HOVER_TEMPLATES['ranking'],
        title=f"Ranking Mundial de Homicidios por País ({initial_year}-2023)",
        plot_save_path=os.path.join(PROJECT_PATH, 'plots', f'{initial_year}_2023_' + PLOT_FILENAME['ranking_country']), 
        show=True
        )

In [8]:
for initial_year in ranking_initial_years:

    barplot(
        df = df_ranking_region[initial_year], 
        x='mean_homicides_rate', 
        y='Region_2', 
        orientation='h', 
        color='Region_2', 
        color_discrete_map=COLOR_MAP, 
        category_orders=CATEGORY_ORDERS,
        hover_data=HOVER_DATA['ranking_region'], 
        labels=LABELS['ranking'],
        hovertemplate=HOVER_TEMPLATES['ranking'],
        title=f"Ranking Mundial de Homicidios por Región ({initial_year}-2023)",
        plot_save_path=os.path.join(PROJECT_PATH, 'plots', f'{initial_year}_2023_' + PLOT_FILENAME['ranking_region']), 
        show=True
        )

---
---

## Sex analysis

probabilidad de homicio por sexo (prob = tasa / unidad tasa)

## Age analysis

## Sex + Age analysis

## Dimension analysis

### Dimension A 

### Dimension B 


## Dimension by gender / age analysis