In [1]:
!python --version


Python 3.10.18


In [18]:
# === Carga de modelos y dependencias ===
from tensorflow.keras.models import load_model, Model
import joblib
import pandas as pd
import numpy as np
from types import SimpleNamespace
from datetime import datetime

# === Carga de modelos exportados ===
fusion_model = joblib.load("exported_models/fusion_model.pkl")
scaler, feature_names = joblib.load("exported_models/scaler.pkl")
rnn_model = load_model("exported_models/rnn_model.keras")

# === Cargar DataFrame procesado ===
df_contextual = pd.read_csv("df_contextual.csv", low_memory=False)
df_contextual['datetime'] = pd.to_datetime(df_contextual['datetime'], errors='coerce')

# === Crear objeto contenedor del modelo ===
model = SimpleNamespace()
model.rnn_model = rnn_model
model.fusion_model = fusion_model
model.scaler = scaler
model.temporal_window = 30  # ajustar si corresponde

# === FUNCIÓN ROBUSTA ===
def predict_probability_by_crime_class(model, df_contextual, target_datetime, target_class_id):
    print(f"🔮 Prediciendo probabilidad de clase {target_class_id} para el {target_datetime}...\n")
    results = []

    df = df_contextual[df_contextual['datetime'] <= target_datetime].copy()
    clusters = df['spatial_cluster'].unique()

    for cluster_id in clusters:
        cluster_df = df[df['spatial_cluster'] == cluster_id].sort_values('datetime')

        if len(cluster_df) < model.temporal_window:
            continue

        recent_seq = cluster_df.tail(model.temporal_window)
        temporal_features = ['crime_count_7d', 'crime_count_30d', 'temporal_weight']

        if not all(f in recent_seq.columns for f in temporal_features):
            print(f"Cluster {cluster_id} ignorado: faltan columnas temporales necesarias.")
            continue

        try:
            # === Embedding RNN ===
            sequence = recent_seq[temporal_features].values[np.newaxis, :, :]
            embedding_model = Model(
                inputs=model.rnn_model.input,
                outputs=model.rnn_model.get_layer("dense_embedding").output
            )
            embedding = embedding_model.predict(sequence, verbose=0)

            # === Contexto ===
            context_vector = pd.DataFrame([[0.0] * len(feature_names)], columns=feature_names)

            for col in feature_names:
                if col in recent_seq.columns:
                    value = pd.to_numeric(recent_seq[col], errors='coerce').mean()
                    if pd.notna(value):
                        context_vector.at[0, col] = value

            scaled_context = model.scaler.transform(context_vector)
            combined_input = np.hstack([scaled_context, embedding])
            probs = model.fusion_model.predict_proba(combined_input)[0]

            # === Coordenadas ===
            results.append({
                'spatial_cluster': cluster_id,
                'lat': recent_seq['Latitude'].mean(),
                'lon': recent_seq['Longitude'].mean(),
                'target_class': target_class_id,
                'probability': probs[target_class_id]
            })


        except Exception as e:
            print(f"❌ Error en cluster {cluster_id}: {e}")
            continue

    return pd.DataFrame(results)

# === Prueba ===
target_datetime = datetime(2024, 7, 15, 18, 0)
target_class_id = 51  # Cambia si deseas evaluar otra clase

df_resultado = predict_probability_by_crime_class(model, df_contextual, target_datetime, target_class_id)

# === Visualización de resultados ===
if not df_resultado.empty and 'probability' in df_resultado.columns:
    print("✅ Predicciones generadas exitosamente.")
    display(df_resultado.sort_values(by='probability', ascending=False).head(10))
else:
    print("⚠️ No se pudieron generar predicciones válidas. Revisa los datos de entrada.")


  saveable.load_own_variables(weights_store.get(inner_path))


🔮 Prediciendo probabilidad de clase 51 para el 2024-07-15 18:00:00...

✅ Predicciones generadas exitosamente.


Unnamed: 0,spatial_cluster,lat,lon,target_class,probability
1,1,33.640315,-84.444664,51,0.02
0,0,33.795845,-84.384646,51,0.01
3,3,33.626349,-84.429233,51,0.01
4,4,33.794868,-84.321212,51,0.01
2,2,33.655284,-84.422173,51,0.0


In [17]:
import folium
from folium.plugins import MarkerCluster
clase_id_to_nombre = {
    0: "Liquor Law Violations",
    1: "Impersonation",
    2: "All Other Offenses",
    3: "Burglary/Breaking & Entering",
    4: "Credit Card/ATM Fraud",
    5: "Identity Theft",
    6: "False Pretenses",
    7: "Rape",
    8: "Welfare Fraud",
    9: "Wire Fraud",
    10: "Theft of Vehicle Parts",
    11: "Family Offenses (Nonviolent)",
    12: "Embezzlement",
    13: "Murder",
    14: "Aggravated Assault",
    15: "Fondling",
    16: "Theft From Vehicle",
    17: "Simple Assault",
    18: "Drug/Narcotic Violations",
    19: "Vandalism",
    20: "Counterfeiting",
    21: "Motor Vehicle Theft",
    22: "Theft From Building",
    23: "Pornography",
    24: "Intimidation",
    25: "All Other Larceny",
    26: "Shoplifting",
    27: "Trespassing",
    28: "DUI",
    29: "Arson",
    30: "Robbery",
    31: "Hacking",
    32: "Weapon Violations",
    33: "Disorderly Conduct",
    34: "Statutory Rape",
    35: "Sodomy",
    36: "Sexual Assault with Object",
    37: "Curfew Violations",
    38: "Stolen Property",
    39: "Coin Machine Theft",
    40: "Animal Cruelty",
    41: "Pocket-picking",
    42: "Drug Equipment Violations",
    43: "Purse-snatching",
    44: "Extortion",
    45: "Gambling Equipment",
    46: "Promoting Gambling",
    47: "Kidnapping",
    48: "Human Trafficking (Servitude)",
    49: "Bad Checks",
    50: "Prostitution",
    51: "Drunkenness",
    52: "Human Trafficking (Sex Acts)",
    53: "Incest",
    54: "Promoting Prostitution",
    55: "Peeping Tom",
    56: "Bribery"
}

# Centrar el mapa en el promedio de coordenadas
center_lat = df_resultado['lat'].mean()
center_lon = df_resultado['lon'].mean()
mapa = folium.Map(location=[center_lat, center_lon], zoom_start=12)

# Crear clústeres de marcadores
marcadores = MarkerCluster().add_to(mapa)

# Colorear según probabilidad
def color_por_prob(p):
    if p >= 0.5:
        return 'red'
    elif p >= 0.2:
        return 'orange'
    elif p >= 0.05:
        return 'blue'
    else:
        return 'green'

# Añadir marcadores por cluster
for _, row in df_resultado.iterrows():
    folium.CircleMarker(
        location=[row['lat'], row['lon']],
        radius=8,
        color=color_por_prob(row['probability']),
        fill=True,
        fill_opacity=0.7,
        popup=folium.Popup(
            f"<b>Cluster:</b> {row['spatial_cluster']}<br>"
            f"<b>Delito:</b> {clase_id_to_nombre.get(row['target_class'], 'Desconocido')}<br>"
            f"<b>Probabilidad:</b> {row['probability']:.2%}",
            max_width=250
        )
    ).add_to(marcadores)

# Mostrar el mapa (Jupyter Notebook)
mapa
