In [1]:
#Import Dependencies
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [2]:
# Cargar los datos
data = pd.read_csv('LACrimesData.csv', low_memory=False)

In [5]:
# Detectar columnas con valores NaN
cols_with_nan = data.columns[data.isna().any()].tolist()

# Seleccionar solo columnas numéricas para detectar valores infinitos
numeric_data = data.select_dtypes(include=[np.number])
cols_with_inf = numeric_data.columns[np.isinf(numeric_data).any()].tolist()

print("Columnas con valores NaN:", cols_with_nan)
print("Columnas con valores infinitos:", cols_with_inf)

# Columnas esenciales para el análisis
required_columns = ['DATE OCC', 'HOUR', 'DAY_OF_WEEK', 'MONTH', 'Crm Cd Desc']



Columnas con valores NaN: ['Date Rptd', 'DATE OCC', 'TIME OCC', 'AREA', 'AREA NAME', 'Rpt Dist No', 'Part 1-2', 'Crm Cd', 'Crm Cd Desc', 'Mocodes', 'Vict Age', 'Vict Sex', 'Vict Descent', 'Premis Cd', 'Premis Desc', 'Weapon Used Cd', 'Weapon Desc', 'Status', 'Status Desc', 'Crm Cd 1', 'Crm Cd 2', 'Crm Cd 3', 'Crm Cd 4', 'LOCATION', 'Cross Street', 'LAT', 'LON']
Columnas con valores infinitos: []


In [30]:
# Preprocesamiento de datos
data['DATE OCC'] = pd.to_datetime(data['DATE OCC'], format='%m/%d/%Y %H:%M:%S', errors='coerce')
data['HOUR'] = data['DATE OCC'].dt.hour
data['DAY_OF_WEEK'] = data['DATE OCC'].dt.dayofweek
data['MONTH'] = data['DATE OCC'].dt.month

# Seleccionar características y etiquetas
X = data[['HOUR', 'DAY_OF_WEEK', 'MONTH']].values
y = data['Crm Cd Desc'].astype(str)  # Convertir la columna a cadenas

# Codificar las etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Ajustar las dimensiones para la entrada de LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Construir la red neuronal recurrente (LSTM)
model = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], 1), activation='relu', return_sequences=True),
    LSTM(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Ajustar el modelo
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/20


  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  super().__init__(**kwargs)


[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1ms/step - accuracy: 0.0032 - loss: nan - val_accuracy: 0.0034 - val_loss: nan
Epoch 2/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - accuracy: 0.0032 - loss: nan - val_accuracy: 0.0034 - val_loss: nan
Epoch 3/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - accuracy: 0.0031 - loss: nan - val_accuracy: 0.0034 - val_loss: nan
Epoch 4/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - accuracy: 0.0031 - loss: nan - val_accuracy: 0.0034 - val_loss: nan
Epoch 5/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - accuracy: 0.0032 - loss: nan - val_accuracy: 0.0034 - val_loss: nan
Epoch 6/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - accuracy: 0.0030 - loss: nan - val_accuracy: 0.0034 - val_loss: nan
Epoch 7/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0xfffeee1c6290>

In [38]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Cargar los datos
data = pd.read_csv('LACrimesData.csv', low_memory=False)

# Preprocesamiento de datos
if 'DATE OCC' in data.columns:
    data['DATE OCC'] = pd.to_datetime(data['DATE OCC'], errors='coerce')  # Convertir fechas
    data = data.dropna(subset=['DATE OCC'])  # Eliminar filas con fechas no válidas
    data = data.sort_values(by='DATE OCC')  # Ordenar cronológicamente

    # Crear características temporales (día, mes, año)
    data['DAY'] = data['DATE OCC'].dt.day
    data['MONTH'] = data['DATE OCC'].dt.month
    data['YEAR'] = data['DATE OCC'].dt.year

    # Seleccionar características y etiquetas
    X = data[['DAY', 'MONTH', 'YEAR']].values
    y = data['Crm Cd Desc']  # Usar la descripción del crimen como etiqueta

    # Verificar que X y y no están vacíos
    print("Número de muestras en X:", X.shape[0])
    print("Número de etiquetas en y:", len(y))

    if X.shape[0] > 0 and len(y) > 0:
        # Codificar las etiquetas
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(y)

        # Dividir los datos en conjuntos de entrenamiento y prueba
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

        # Escalar características
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        # Ajustar las dimensiones para la entrada de CNN (reshape)
        X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
        X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

        # Construir la red neuronal convolucional (CNN)
        model = Sequential([
    Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(50, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

        # Compilar el modelo
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

        # Ajustar el modelo
        model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))
    else:
        print("Error: Conjunto de datos vacío después del preprocesamiento.")
else:
    print("Error: Columna 'DATE OCC' no encontrada en los datos.")


  data['DATE OCC'] = pd.to_datetime(data['DATE OCC'], errors='coerce')  # Convertir fechas
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Número de muestras en X: 318977
Número de etiquetas en y: 318977
Epoch 1/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 487us/step - accuracy: 0.1009 - loss: 3.4538 - val_accuracy: 0.1146 - val_loss: 3.3759
Epoch 2/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 478us/step - accuracy: 0.1067 - loss: 3.3831 - val_accuracy: 0.1146 - val_loss: 3.3689
Epoch 3/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 474us/step - accuracy: 0.1070 - loss: 3.3793 - val_accuracy: 0.1146 - val_loss: 3.3705
Epoch 4/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 475us/step - accuracy: 0.1079 - loss: 3.3744 - val_accuracy: 0.1146 - val_loss: 3.3679
Epoch 5/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 473us/step - accuracy: 0.1068 - loss: 3.3721 - val_accuracy: 0.1160 - val_loss: 3.3666
Epoch 6/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 476us/step - accuracy: 0.1

In [40]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Cargar los datos
data = pd.read_csv('LACrimesData.csv')

# Preprocesamiento de datos
data['DATE OCC'] = pd.to_datetime(data['DATE OCC'], errors='coerce')  # Convertir fechas
data = data.sort_values(by='DATE OCC')  # Ordenar cronológicamente

# Crear características temporales (día, mes, año)
data['HOUR'] = data['DATE OCC'].dt.hour
data['DAY_OF_WEEK'] = data['DATE OCC'].dt.dayofweek
data['MONTH'] = data['DATE OCC'].dt.month

# Seleccionar características y etiquetas
X = data[['HOUR', 'DAY_OF_WEEK', 'MONTH']].values
y = data['Crm Cd Desc']  # Usar la descripción del crimen como etiqueta

# Codificar las etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Ajustar las dimensiones para la entrada de ARNN
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Construir la red neuronal auto-recurrente (ARNN)
model = Sequential([
    SimpleRNN(64, input_shape=(X_train.shape[1], 1), activation='relu', return_sequences=True),
    SimpleRNN(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Ajustar el modelo
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))


  data = pd.read_csv('LACrimesData.csv')
  data['DATE OCC'] = pd.to_datetime(data['DATE OCC'], errors='coerce')  # Convertir fechas


Epoch 1/20


  super().__init__(**kwargs)


[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 767us/step - accuracy: 0.1023 - loss: 3.4405 - val_accuracy: 0.1146 - val_loss: nan
Epoch 2/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 760us/step - accuracy: 0.1050 - loss: 3.3845 - val_accuracy: 0.1146 - val_loss: nan
Epoch 3/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 736us/step - accuracy: 0.1055 - loss: 3.3820 - val_accuracy: 0.1146 - val_loss: nan
Epoch 4/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 738us/step - accuracy: 0.1048 - loss: 3.3860 - val_accuracy: 0.1146 - val_loss: nan
Epoch 5/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 744us/step - accuracy: 0.1044 - loss: 3.3812 - val_accuracy: 0.1146 - val_loss: nan
Epoch 6/20
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 758us/step - accuracy: 0.1054 - loss: 3.3813 - val_accuracy: 0.1146 - val_loss: nan
Epoch 7/20
[1m7975/7975[0m [32

<keras.src.callbacks.history.History at 0xffff1bd32850>

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Cargar los datos
data = pd.read_csv('LACrimesData.csv')

# Preprocesamiento de datos
X_title = data['Crm Cd Desc'].fillna('').values  # Utilizar "Crm Cd Desc" como el título
X_location = data['LOCATION'].fillna('').values  # Utilizar "LOCATION" para la ubicación
y = data['Status Desc'].fillna('').values  # Utilizar "Status Desc" como la descripción secundaria

# Concatenar título y ubicación para el modelo de texto
X = [f"{title} {location}" for title, location in zip(X_title, X_location)]

# Codificar etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Tokenización de texto
tokenizer = Tokenizer(num_words=10000)  # Tamaño máximo del vocabulario
tokenizer.fit_on_texts(X)
X_tokenized = tokenizer.texts_to_sequences(X)
X_padded = pad_sequences(X_tokenized, maxlen=50)  # Máximo de 50 palabras por entrada

# División de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

# Modelo de texto con RNN
model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=50),
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Ajustar el modelo
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


  data = pd.read_csv('LACrimesData.csv')


Epoch 1/10




[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 15ms/step - accuracy: 0.7763 - loss: 0.6256 - val_accuracy: 0.7793 - val_loss: 0.5844
Epoch 2/10
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 15ms/step - accuracy: 0.7856 - loss: 0.5736 - val_accuracy: 0.7813 - val_loss: 0.5797
Epoch 3/10
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 15ms/step - accuracy: 0.7908 - loss: 0.5588 - val_accuracy: 0.7818 - val_loss: 0.5799
Epoch 4/10
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 15ms/step - accuracy: 0.7951 - loss: 0.5462 - val_accuracy: 0.7818 - val_loss: 0.5813
Epoch 5/10
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 15ms/step - accuracy: 0.7972 - loss: 0.5401 - val_accuracy: 0.7816 - val_loss: 0.5847
Epoch 6/10
[1m7975/7975[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m483s[0m 61ms/step - accuracy: 0.8008 - loss: 0.5289 - val_accuracy: 0.7807 - val_loss: 0.5871
Epoch 7/1

<keras.src.callbacks.history.History at 0xfffef0266010>