# Total Homicides Data Analysis

## Requirements

In [1]:
import os, sys
import polars as pl
import numpy as np

In [2]:
SCRIPT_PATH = os.getcwd() # os.path.dirname(os.path.abspath(__file__))
PROJECT_PATH = os.path.join(SCRIPT_PATH, '..')
INPUT_DIR = os.path.join(PROJECT_PATH, 'data', 'processed')
INPUT_FILE = os.path.join(INPUT_DIR, 'processed_unodc_intentional_homicide_rate.csv')

In [3]:
sys.path.append(PROJECT_PATH)

from config.config_01a import (
    SELECTED_COUNTRIES, 
    COLOR_MAP, 
    CATEGORY_ORDERS,
    HOVER_DATA,
    LABELS, 
    PLOT_FILENAME,
    PROP_YEARS_IN_PERIOD_LIMIT
)

from src.plots_utils import time_series_plot, barplot

In [4]:
def get_ranking_selected_countries(df_filtered, selected_countries, ranking_period, prop_years_in_period_limit):

    prop_year_in_period = {}
    for c in selected_countries:
        unique_years = df_filtered.filter(pl.col('Country') == c)['Year'].unique().to_list()
        prop_year_in_period[c] = np.mean([x in unique_years for x in ranking_period])

    ranking_selected_countries = [c for c, p in prop_year_in_period.items() if p >= prop_years_in_period_limit]

    return ranking_selected_countries, prop_year_in_period

In [5]:
df = pl.read_csv(INPUT_FILE)

# Filtrado para obtener solo la serie principal (Totales)
df_filtered = df.filter(
    (pl.col('Dimension') == 'Total') &
    (pl.col('Category') == 'Total') &
    (pl.col('Sex') == 'Total') &
    (pl.col('Age') == 'Total') 
)

## Time Series

### World

In [6]:
df_filtered = df_filtered.filter(
    pl.col('Country').is_in(SELECTED_COUNTRIES['world'])
)

In [7]:
time_series_plot(
    df = df_filtered,
    x='Year',
    y='homicides_rate',
    line_group='Country',
    color='Region_2',
    title='Evolución de la Tasa de Homicidios Intencionados por País',
    hover_data=HOVER_DATA['time_series'],
    labels=LABELS['time_series'],
    color_discrete_map=COLOR_MAP['world'],
    category_orders=CATEGORY_ORDERS['world'],
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME[('world', 'time_series')]),
    show=True
)

### Europe + USA



### Latam + USA

## Ranking

### Full shared period: 2003-2023

In [8]:
shared_min_year = df_filtered.group_by('Country').agg(pl.min('Year'))['Year'].max()
shared_max_year = df_filtered.group_by('Country').agg(pl.max('Year'))['Year'].min()
max_year = df_filtered['Year'].max()
ranking_period = list(range(shared_min_year, max_year + 1))

# Para el calculo de estadísticas se usará el periodo completo, unificando el limite inferior al año shared_min_year (2003)
# Sin embargo el limite superios compartido no se considerará (shared_max_year, 2020), puesto que implicaría eliminar los años más recientes, 
# que si están disponibles en la mayoría de paises

ranking_selected_countries, prop_year_in_period = get_ranking_selected_countries(
    df_filtered = df_filtered, 
    selected_countries = SELECTED_COUNTRIES['world'], 
    ranking_period=ranking_period,
    prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT
)

df_ranking = df_filtered.filter(
    pl.col('Year').is_in(ranking_period),
    pl.col('Country').is_in(ranking_selected_countries)
    ).group_by(
    ['Country']
    ).agg(
        pl.mean('homicides_rate').alias('mean_homicides_rate')
    ).with_columns(
        pl.col('mean_homicides_rate').round(2)
    ).join(
        df_filtered[['Country', 'Region_2']], 
        on='Country', 
        how='left'
    ).unique().sort(
        "mean_homicides_rate", 
        descending=False
    ) 

In [9]:
barplot(
    df = df_ranking, 
    x='mean_homicides_rate', 
    y='Country', 
    orientation='h', 
    color='Region_2', 
    color_discrete_map=COLOR_MAP['world'], 
    hover_data=HOVER_DATA['ranking'], 
    labels=LABELS['ranking'],
    title="Ranking Mundial de Homicidios (2003-2023)",
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME[('world', 'ranking')]), 
    show=True
    )

### Last decade: 2014-2023

### Last lustrum: 2019-2023

### Last year: 2023

---
---

## Sex analysis

probabilidad de homicio por sexo (prob = tasa / unidad tasa)

## Age analysis

## Sex + Age analysis

## Dimension analysis

### Dimension A 

### Dimension B 


## Dimension by gender / age analysis