#### Notebook 'Tempo'

In [None]:
# Carga de librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from itertools import combinations
from collections import Counter
import networkx as nx
from pyvis.network import Network


In [None]:
# Cargar datasets
df_aspectos = pd.read_excel('./data/Analytical_Data.xlsx')  # Actualiza la ruta si es necesario
df = pd.read_excel('./data/MP_Dataset_KMeans_mean.xlsx')  # Dataset principal

# Verificar los datos
print("Datos cargados:")
display(df.head())
display(df_aspectos.head())


#### Paso 3: Definición de Funciones

##### Preparar Datos

In [None]:
def preparar_datos(df, df_aspectos, aspecto_filtro, desde, hasta):
    variables = df_aspectos[df_aspectos['aspecto'] == aspecto_filtro][['var_1', 'var_2', 'var_3', 'var_4', 'var_5']].dropna().values.flatten()
    df_periodo = df[(df['agno'] >= desde) & (df['agno'] <= hasta)]
    return df_periodo, variables


##### Filtrar y Normalizar

In [None]:
def filtrar_normalizar(df_periodo, variables):
    df_filtered = df_periodo[['countryname', 'agno'] + list(variables)].copy()
    df_filtered.dropna(inplace=True)
    columns_per = variables
    df_filtered['per_sum'] = df_filtered[columns_per].sum(axis=1)
    rows_to_normalize = df_filtered['per_sum'] != 1.0
    df_filtered.loc[rows_to_normalize, columns_per] = df_filtered.loc[rows_to_normalize, columns_per].div(
        df_filtered.loc[rows_to_normalize, 'per_sum'], axis=0
    )
    df_filtered.drop(columns=['per_sum'], inplace=True)
    return df_filtered


##### PCA y K-MEANS

In [None]:
def ejecutar_pca_kmeans(df_filtered, variables, n_components=3, optimal_k=3):
    numeric_data = df_filtered[variables]
    pca = PCA(n_components=n_components)
    pca_components = pca.fit_transform(numeric_data)
    pca_df = pd.DataFrame(pca_components, columns=[f'PC{i+1}' for i in range(n_components)])
    df_pca = pd.concat([df_filtered[['countryname', 'agno']].reset_index(drop=True), pca_df.reset_index(drop=True)], axis=1)
    
    # Aplicar K-Means
    kmeans = KMeans(n_clusters=optimal_k, random_state=42)
    df_pca['cluster'] = kmeans.fit_predict(df_pca[[f'PC{i+1}' for i in range(n_components)]])
    return df_pca, pca


##### Matriz de coincidencias

In [None]:
def construir_matriz_coincidencias(resultados_temporales):
    coincidencias = Counter()
    for clusters in resultados_temporales.values():
        for cluster in clusters.values():
            for pair in combinations(sorted(cluster), 2):
                coincidencias[pair] += 1

    # Convertir a matriz
    paises = sorted(set([p for pair in coincidencias.keys() for p in pair]))
    coincidence_matrix = pd.DataFrame(0, index=paises, columns=paises)

    for (p1, p2), count in coincidencias.items():
        coincidence_matrix.loc[p1, p2] = count
        coincidence_matrix.loc[p2, p1] = count

    return coincidence_matrix


##### Visualización del grafo

In [None]:
def grafo_coincidencias(coincidence_matrix):
    # Crear el grafo
    G = nx.Graph()
    for i, row in coincidence_matrix.iterrows():
        for j, value in row.iteritems():
            if value > 0:  # Agregar conexiones con peso mayor a 0
                G.add_edge(i, j, weight=value)

    # Convertir a grafo interactivo con PyVis
    net = Network(notebook=True, height="750px", width="100%")
    net.from_nx(G)
    net.show("grafo_coincidencias.html")


#### Ejecución para Múltiples Períodos

In [None]:
# Definir períodos
periodos = [(1970, 1972), (1973, 1975), ..., (2019, 2022)]

resultados_temporales = {}

for desde, hasta in periodos:
    df_periodo, variables = preparar_datos(df, df_aspectos, 'DROGAS_Y_CORRUPCIÓN', desde, hasta)
    df_filtered = filtrar_normalizar(df_periodo, variables)
    df_pca, pca = ejecutar_pca_kmeans(df_filtered, variables)
    
    # Almacenar los países en cada clúster
    clusters = {}
    for cluster in df_pca['cluster'].unique():
        clusters[cluster] = df_pca[df_pca['cluster'] == cluster]['countryname'].unique()
    resultados_temporales[f"{desde}-{hasta}"] = clusters

# Construir y visualizar la matriz de coincidencias
coincidence_matrix = construir_matriz_coincidencias(resultados_temporales)
grafo_coincidencias(coincidence_matrix)
