In [28]:
# === Carga de modelos y dependencias ===
from tensorflow.keras.models import load_model, Model
import joblib
import pandas as pd
import numpy as np
from types import SimpleNamespace
from datetime import datetime, timedelta

# === Carga de modelos exportados ===
fusion_model = joblib.load("exported_models/fusion_model.pkl")
scaler, feature_names = joblib.load("exported_models/scaler.pkl")
rnn_model = load_model("exported_models/rnn_model.keras")

# === Cargar DataFrame procesado ===
df_contextual = pd.read_csv("df_contextual.csv", low_memory=False)
df_contextual['datetime'] = pd.to_datetime(df_contextual['datetime'], errors='coerce')

# === Crear objeto contenedor del modelo ===
model = SimpleNamespace()
model.rnn_model = rnn_model
model.fusion_model = fusion_model
model.scaler = scaler
model.temporal_window = 30  # Ajustar si fue entrenado con otro valor

# Diccionario de clases
crime_class_dict = {
    0: "Liquor Law Violations", 1: "Impersonation", 2: "All Other Offenses",
    3: "Burglary/Breaking & Entering", 4: "Credit Card/ATM Fraud", 5: "Identity Theft",
    6: "False Pretenses", 7: "Rape", 8: "Welfare Fraud", 9: "Wire Fraud",
    10: "Theft of Motor Vehicle Parts", 11: "Family Offenses, Nonviolent", 12: "Embezzlement",
    13: "Murder", 14: "Aggravated Assault", 15: "Fondling", 16: "Theft From Motor Vehicle",
    17: "Simple Assault", 18: "Drug Violations", 19: "Vandalism", 20: "Forgery",
    21: "Motor Vehicle Theft", 22: "Theft From Building", 23: "Obscene Material",
    24: "Intimidation", 25: "All Other Larceny", 26: "Shoplifting", 27: "Trespass",
    28: "DUI", 29: "Arson", 30: "Robbery", 31: "Computer Invasion", 32: "Weapon Violations",
    33: "Disorderly Conduct", 34: "Statutory Rape", 35: "Sodomy", 36: "Sexual Assault Object",
    37: "Curfew/Loitering", 38: "Stolen Property", 39: "Coin Machine Theft",
    40: "Animal Cruelty", 41: "Pocket-picking", 42: "Drug Equipment", 43: "Purse-snatching",
    44: "Extortion", 45: "Gambling Equipment", 46: "Gambling Promotion", 47: "Kidnapping",
    48: "Human Trafficking - Servitude", 49: "Bad Checks", 50: "Prostitution",
    51: "Drunkenness", 52: "Human Trafficking - Sex Acts", 53: "Incest",
    54: "Promoting Prostitution", 55: "Peeping Tom", 56: "Bribery"
}

# === FUNCIÓN RENOVADA PARA NUEVO ENFOQUE ===
def predict_top_crimes_for_date_future(model, df_contextual, target_datetime, top_n=5):
    from math import sin, cos, pi

    def compute_temporal_features(dt):
        hour = dt.hour
        day = dt.day
        month = dt.month

        return {
            "hour_sin": sin(2 * pi * hour / 24),
            "hour_cos": cos(2 * pi * hour / 24),
            "day_sin": sin(2 * pi * day / 31),
            "day_cos": cos(2 * pi * day / 31),
            "month_sin": sin(2 * pi * month / 12),
            "month_cos": cos(2 * pi * month / 12),
            "hour": hour,
            "day_of_week": dt.weekday(),  # lunes = 0
            "month": month,
            "day_of_year": dt.timetuple().tm_yday,
            "year": dt.year,
            "quarter": (month - 1) // 3 + 1,
        }

    all_predictions = []
    clusters = df_contextual['spatial_cluster'].unique()

    for cluster_id in clusters:
        cluster_df = df_contextual[df_contextual['spatial_cluster'] == cluster_id].sort_values('datetime')
        if len(cluster_df) < model.temporal_window:
            continue

        recent_seq = cluster_df.tail(model.temporal_window)
        temporal_features = ['crime_count_7d', 'crime_count_30d', 'temporal_weight']
        try:
            # === RNN Embedding ===
            sequence = recent_seq[temporal_features].values[np.newaxis, :, :]
            embedding_model = Model(
                inputs=model.rnn_model.input,
                outputs=model.rnn_model.get_layer("dense_embedding").output
            )
            embedding = embedding_model.predict(sequence, verbose=0)

            # === Contextual input ===
            context_vector = pd.DataFrame([[0.0] * len(feature_names)], columns=feature_names)
            for col in feature_names:
                if col in recent_seq.columns:
                    val = pd.to_numeric(recent_seq[col], errors='coerce').mean()
                    if pd.notna(val):
                        context_vector.at[0, col] = val

            # Inyectar variables temporales de la fecha futura
            temporal_injection = compute_temporal_features(target_datetime)
            for key, val in temporal_injection.items():
                if key in context_vector.columns:
                    context_vector.at[0, key] = val

            # Escalar y predecir
            scaled_context = model.scaler.transform(context_vector)
            combined_input = np.hstack([scaled_context, embedding])
            probs = model.fusion_model.predict_proba(combined_input)[0]

            print(f"📊 Distribución de clases para cluster {cluster_id}:")
            for idx, p in enumerate(probs):
                if p > 0.01:
                    print(f"Clase {idx} ({crime_class_dict.get(idx, 'Unknown')}): {p:.4f}")

            for class_id, prob in enumerate(probs):
                all_predictions.append({
                    "date": target_datetime,
                    "spatial_cluster": cluster_id,
                    "latitude": recent_seq["Latitude"].mean(),
                    "longitude": recent_seq["Longitude"].mean(),
                    "class_id": class_id,
                    "crime_type": crime_class_dict.get(class_id, f"Clase {class_id}"),
                    "probability": prob
                })

        except Exception as e:
            print(f"❌ Cluster {cluster_id}: {e}")
            continue

    # Agrupar por class_id para obtener la predicción con mayor probabilidad por clase
    result_df = pd.DataFrame(all_predictions)
    top_by_class = result_df.sort_values('probability', ascending=False).drop_duplicates('class_id')
    
    # Tomar los top N delitos con mayor probabilidad, asegurando variedad
    return top_by_class.sort_values(by='probability', ascending=False).head(top_n)




# === Ejecutar búsqueda ===
target_datetime = datetime(2025, 9, 15, 20, 0)
top_crimes_future = predict_top_crimes_for_date_future(model, df_contextual, target_datetime, top_n=30)

if not top_crimes_future.empty:
    print(f"✅ Crímenes más probables para {target_datetime.date()}:")
    display(top_crimes_future)

else:
    print("⚠️ No se generaron predicciones válidas.")


  saveable.load_own_variables(weights_store.get(inner_path))


📊 Distribución de clases para cluster 0:
Clase 0 (Liquor Law Violations): 0.0600
Clase 1 (Impersonation): 0.0700
Clase 2 (All Other Offenses): 0.2200
Clase 6 (False Pretenses): 0.0300
Clase 12 (Embezzlement): 0.1000
Clase 18 (Drug Violations): 0.0200
Clase 21 (Motor Vehicle Theft): 0.1200
Clase 26 (Shoplifting): 0.0300
Clase 30 (Robbery): 0.0500
Clase 33 (Disorderly Conduct): 0.0600
Clase 45 (Gambling Equipment): 0.0200
Clase 47 (Kidnapping): 0.0300
Clase 53 (Incest): 0.1200
📊 Distribución de clases para cluster 1:
Clase 0 (Liquor Law Violations): 0.0300
Clase 1 (Impersonation): 0.1000
Clase 2 (All Other Offenses): 0.4200
Clase 6 (False Pretenses): 0.0200
Clase 9 (Wire Fraud): 0.0200
Clase 12 (Embezzlement): 0.0400
Clase 21 (Motor Vehicle Theft): 0.0500
Clase 26 (Shoplifting): 0.0300
Clase 27 (Trespass): 0.0200
Clase 30 (Robbery): 0.0200
Clase 33 (Disorderly Conduct): 0.0200
Clase 45 (Gambling Equipment): 0.0600
Clase 47 (Kidnapping): 0.0300
Clase 53 (Incest): 0.0900
📊 Distribución de 

Unnamed: 0,date,spatial_cluster,latitude,longitude,class_id,crime_type,probability
59,2025-09-15 20:00:00,1,33.641223,-84.437708,2,All Other Offenses,0.42
244,2025-09-15 20:00:00,24,33.617524,-84.339181,16,Theft From Motor Vehicle,0.14
167,2025-09-15 20:00:00,2,33.654939,-84.423982,53,Incest,0.14
21,2025-09-15 20:00:00,0,33.739652,-84.414267,21,Motor Vehicle Theft,0.12
115,2025-09-15 20:00:00,2,33.654939,-84.423982,1,Impersonation,0.11
228,2025-09-15 20:00:00,24,33.617524,-84.339181,0,Liquor Law Violations,0.11
12,2025-09-15 20:00:00,0,33.739652,-84.414267,12,Embezzlement,0.1
177,2025-09-15 20:00:00,3,33.625953,-84.429356,6,False Pretenses,0.08
259,2025-09-15 20:00:00,24,33.617524,-84.339181,31,Computer Invasion,0.07
33,2025-09-15 20:00:00,0,33.739652,-84.414267,33,Disorderly Conduct,0.06


In [29]:
import folium
from folium.plugins import MarkerCluster
print(top_crimes_future[['latitude', 'longitude', 'crime_type']])

clase_id_to_nombre = {
    0: "Liquor Law Violations",
    1: "Impersonation",
    2: "All Other Offenses",
    3: "Burglary/Breaking & Entering",
    4: "Credit Card/ATM Fraud",
    5: "Identity Theft",
    6: "False Pretenses",
    7: "Rape",
    8: "Welfare Fraud",
    9: "Wire Fraud",
    10: "Theft of Vehicle Parts",
    11: "Family Offenses (Nonviolent)",
    12: "Embezzlement",
    13: "Murder",
    14: "Aggravated Assault",
    15: "Fondling",
    16: "Theft From Vehicle",
    17: "Simple Assault",
    18: "Drug/Narcotic Violations",
    19: "Vandalism",
    20: "Counterfeiting",
    21: "Motor Vehicle Theft",
    22: "Theft From Building",
    23: "Pornography",
    24: "Intimidation",
    25: "All Other Larceny",
    26: "Shoplifting",
    27: "Trespassing",
    28: "DUI",
    29: "Arson",
    30: "Robbery",
    31: "Hacking",
    32: "Weapon Violations",
    33: "Disorderly Conduct",
    34: "Statutory Rape",
    35: "Sodomy",
    36: "Sexual Assault with Object",
    37: "Curfew Violations",
    38: "Stolen Property",
    39: "Coin Machine Theft",
    40: "Animal Cruelty",
    41: "Pocket-picking",
    42: "Drug Equipment Violations",
    43: "Purse-snatching",
    44: "Extortion",
    45: "Gambling Equipment",
    46: "Promoting Gambling",
    47: "Kidnapping",
    48: "Human Trafficking (Servitude)",
    49: "Bad Checks",
    50: "Prostitution",
    51: "Drunkenness",
    52: "Human Trafficking (Sex Acts)",
    53: "Incest",
    54: "Promoting Prostitution",
    55: "Peeping Tom",
    56: "Bribery"
}

# Centrar el mapa en el promedio de coordenadas
center_lat = top_crimes_future['latitude'].mean()
center_lon = top_crimes_future['longitude'].mean()
mapa = folium.Map(location=[center_lat, center_lon], zoom_start=12)

# Crear clústeres de marcadores
marcadores = MarkerCluster().add_to(mapa)

# Colorear según probabilidad
def color_por_prob(p):
    if p >= 0.5:
        return 'red'
    elif p >= 0.2:
        return 'orange'
    elif p >= 0.05:
        return 'blue'
    else:
        return 'green'

# Añadir marcadores
for _, row in top_crimes_future.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=8,
        color=color_por_prob(row['probability']),
        fill=True,
        fill_opacity=0.7,
        popup=folium.Popup(
            f"<b>Fecha:</b> {row['date']}<br>"
            f"<b>Cluster:</b> {row['spatial_cluster']}<br>"
            f"<b>Delito:</b> {row['crime_type']}<br>"
            f"<b>Probabilidad:</b> {row['probability']:.2%}",
            max_width=250
        )
    ).add_to(marcadores)

# Mostrar el mapa en Jupyter
mapa

      latitude  longitude                    crime_type
59   33.641223 -84.437708            All Other Offenses
244  33.617524 -84.339181      Theft From Motor Vehicle
167  33.654939 -84.423982                        Incest
21   33.739652 -84.414267           Motor Vehicle Theft
115  33.654939 -84.423982                 Impersonation
228  33.617524 -84.339181         Liquor Law Violations
12   33.739652 -84.414267                  Embezzlement
177  33.625953 -84.429356               False Pretenses
259  33.617524 -84.339181             Computer Invasion
33   33.739652 -84.414267            Disorderly Conduct
102  33.641223 -84.437708            Gambling Equipment
248  33.617524 -84.339181                       Forgery
315  33.795067 -84.322147                       Robbery
262  33.617524 -84.339181                Statutory Rape
236  33.617524 -84.339181                 Welfare Fraud
238  33.617524 -84.339181  Theft of Motor Vehicle Parts
83   33.641223 -84.437708                   Shop