# Homicides by Sex Data Analysis

## Requirements

In [1]:
import os, sys
import polars as pl
import numpy as np

In [2]:
SCRIPT_PATH = os.getcwd() # os.path.dirname(os.path.abspath(__file__))
PROJECT_PATH = os.path.join(SCRIPT_PATH, '..')
INPUT_DIR = os.path.join(PROJECT_PATH, 'data', 'processed')
INPUT_FILE = os.path.join(INPUT_DIR, 'processed_unodc_intentional_homicide_rate.csv')
PLOTS_PATH = os.path.join(PROJECT_PATH, 'plots')
os.makedirs(PLOTS_PATH, exist_ok=True)

In [3]:
sys.path.append(PROJECT_PATH)

from config.config_01a import (
    SELECTED_COUNTRIES, 
    COLOR_MAP, 
    CATEGORY_ORDERS,
    HOVER_DATA,
    LABELS, 
    HOVER_TEMPLATES,
    PLOT_FILENAME,
    PROP_YEARS_IN_PERIOD_LIMIT
)

from src.plots_utils import time_series_plot, barplot_01b
from src.analysis_utils import calculate_ranking

In [4]:
# Read data
df = pl.read_csv(INPUT_FILE)

# Process data (strictly to carry out this NB exploration)

## Processing for time series

df_time_series = df.filter(
    (pl.col('Dimension') == 'Total') &
    (pl.col('Category') == 'Total') &
    (pl.col('Sex') != 'Total') &
    (pl.col('Age') == 'Total') 
    ).filter(
        pl.col('Country').is_in(SELECTED_COUNTRIES)
    )

## Processing for Rankings

# shared_min_year = df_time_series.group_by('Country').agg(pl.min('Year'))['Year'].max()
# shared_max_year = df_time_series.group_by('Country').agg(pl.max('Year'))['Year'].min()
max_year = df_time_series['Year'].max()

df_ranking_country, df_ranking_region = {}, {}
ranking_initial_years = [2003, 2014, 2019]

for initial_year in ranking_initial_years:
    
    df_ranking_country[initial_year] = calculate_ranking(
        df_time_series = df_time_series, 
        selected_countries = SELECTED_COUNTRIES, 
        prop_years_in_period_limit = PROP_YEARS_IN_PERIOD_LIMIT, 
        start_year = initial_year, 
        end_year = max_year,
        by = ['Country', 'Sex']
    )

    df_ranking_region[initial_year] = df_ranking_country[initial_year].group_by(['Region_2', 'Sex']).agg(pl.mean('mean_homicides_rate'))

ranking_period: 2003 - 2023
ranking_selected_countries: ['Spain', 'Portugal', 'Italy', 'Germany', 'United Kingdom', 'Sweden', 'Norway', 'Denmark', 'Romania', 'Greece', 'Brazil', 'Mexico', 'Colombia', 'El Salvador', 'Chile', 'Japan', 'Singapore', 'India', 'Türkiye', 'Morocco', 'USA']
ranking_not_selected_countries: ['France', 'Venezuela', 'Argentina', 'Republic of Korea', 'China', 'Philippines', 'Indonesia', 'Thailand', 'Egypt', 'South Africa']
prop_year_in_period: {'Spain': np.float64(1.0), 'Portugal': np.float64(0.95), 'France': np.float64(0.43), 'Italy': np.float64(0.95), 'Germany': np.float64(1.0), 'United Kingdom': np.float64(0.9), 'Sweden': np.float64(1.0), 'Norway': np.float64(1.0), 'Denmark': np.float64(1.0), 'Romania': np.float64(1.0), 'Greece': np.float64(1.0), 'Brazil': np.float64(1.0), 'Mexico': np.float64(1.0), 'Colombia': np.float64(1.0), 'Venezuela': np.float64(0.62), 'Argentina': np.float64(0.38), 'El Salvador': np.float64(0.86), 'Chile': np.float64(0.9), 'Japan': np.flo

## Time Series

In [5]:
time_series_plot(
    df = df_time_series,
    x='Year',
    y='homicides_rate',
    line_group='Country',
    facet_col='Sex',
    color='Region_2',
    default_visible_name='Spain',
    title='Evolución de la Tasa de Homicidios Intencionados en el Mundo (1990-2023)',
    hover_data=HOVER_DATA['time_series'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

In [6]:
time_series_plot(
    df = df_time_series,
    x='Year',
    y='homicides_rate',
    line_group='Country',
    line_dash='Sex',
    color='Region_2',
    default_visible_name=['Spain, Female', 'Spain, Male'],
    title='Evolución de la Tasa de Homicidios en España por Sexos (1990-2023)',
    hover_data=HOVER_DATA['time_series'],
    labels=LABELS['time_series'],
    hovertemplate=HOVER_TEMPLATES['time_series'],
    color_discrete_map=COLOR_MAP['Region_2'],
    category_orders=CATEGORY_ORDERS,
    plot_save_path=os.path.join(PROJECT_PATH, 'plots', PLOT_FILENAME['time_series']),
    show=True
)

## Rankings

In [7]:
for initial_year in ranking_initial_years:

    barplot_01b(
        df = df_ranking_country[initial_year], 
        x='mean_homicides_rate', 
        y='Country', 
        reverse_y_order=False,
        orientation='h', 
        color='Sex', 
        barmode='group',
        yticks_color_column='Region_2',
        yticks_color_map=COLOR_MAP['Region_2'],
        color_discrete_map=COLOR_MAP['Sex'], 
        category_orders=CATEGORY_ORDERS,
        hover_data=HOVER_DATA['ranking_country'], 
        labels=LABELS['ranking'],
        hovertemplate=HOVER_TEMPLATES['ranking_country'],
        title=f"Ranking Mundial de Homicidios por País ({initial_year}-2023)",
        plot_save_path=os.path.join(PROJECT_PATH, 'plots', f'{initial_year}_2023_' + PLOT_FILENAME['ranking_country']), 
        show=True
        )

In [8]:
for initial_year in ranking_initial_years:

    barplot_01b(
        df = df_ranking_country[initial_year], 
        x='mean_homicides_rate', 
        y='Country', 
        orientation='h', 
        color='Sex',
        facet_col='Sex', 
        yticks_color_column='Region_2',
        yticks_color_map=COLOR_MAP['Region_2'],
        color_discrete_map=COLOR_MAP['Sex'], 
        category_orders=CATEGORY_ORDERS,
        hover_data=HOVER_DATA['ranking_country'], 
        labels=LABELS['ranking'],
        hovertemplate=HOVER_TEMPLATES['ranking_country'],
        title=f"Ranking Mundial de Homicidios por País ({initial_year}-2023)",
        plot_save_path=os.path.join(PROJECT_PATH, 'plots', f'{initial_year}_2023_' + PLOT_FILENAME['ranking_country']), 
        show=True
        )

In [9]:
for initial_year in ranking_initial_years:

    barplot_01b(
        df = df_ranking_region[initial_year], 
        x='mean_homicides_rate', 
        y='Region_2', 
        reverse_y_order=False,
        orientation='h', 
        color='Sex', 
        barmode='group',
        yticks_color_column='Region_2',
        yticks_color_map=COLOR_MAP['Region_2'],
        color_discrete_map=COLOR_MAP['Sex'], 
        hover_data=HOVER_DATA['ranking_region'], 
        labels=LABELS['ranking'],
        hovertemplate=HOVER_TEMPLATES['ranking_region'],
        title=f"Ranking Mundial de Homicidios por País ({initial_year}-2023)",
        #x_title_size=20,
        plot_save_path=os.path.join(PROJECT_PATH, 'plots', f'{initial_year}_2023_' + PLOT_FILENAME['ranking_region']), 
        show=True
        )

In [10]:
for initial_year in ranking_initial_years:

    barplot_01b(
        df = df_ranking_region[initial_year], 
        x='mean_homicides_rate', 
        y='Region_2', 
        orientation='h', 
        color='Sex', 
        facet_col='Sex',
        yticks_color_column='Region_2',
        yticks_color_map=COLOR_MAP['Region_2'],
        color_discrete_map=COLOR_MAP['Sex'], 
        #category_orders=CATEGORY_ORDERS,
        hover_data=HOVER_DATA['ranking_region'], 
        labels=LABELS['ranking'],
        hovertemplate=HOVER_TEMPLATES['ranking_region'],
        title=f"Ranking Mundial de Homicidios por País ({initial_year}-2023)",
        plot_save_path=os.path.join(PROJECT_PATH, 'plots', f'{initial_year}_2023_' + PLOT_FILENAME['ranking_region']), 
        show=True
        )