In [None]:
import pandas as pd
import numpy as np

# Generate synthetic data
np.random.seed(42)
n_samples = 1000

data = {
    'barangay': np.random.choice(['Barangay 1', 'Barangay 2', 'Barangay 3'], n_samples),
    'incident_type': np.random.choice(['Theft', 'Assault', 'Vandalism'], n_samples),
    'date_committed': pd.date_range(start='2022-01-01', periods=n_samples, freq='D'),
    'time_committed': np.random.randint(0, 24, n_samples),
    'next_day_crime': np.random.choice([0, 1], n_samples)
}

df = pd.DataFrame(data)


In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Encoding categorical variables
le_barangay = LabelEncoder()
le_incident = LabelEncoder()
df['barangay'] = le_barangay.fit_transform(df['barangay'])
df['incident_type'] = le_incident.fit_transform(df['incident_type'])

# Creating time features
df['day_of_week'] = df['date_committed'].dt.dayofweek
df['month'] = df['date_committed'].dt.month

# Prepare input features and labels
features = df[['barangay', 'incident_type', 'time_committed', 'day_of_week', 'month']]
labels = df['next_day_crime']

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [None]:
import tensorflow as tf

# Reshape data for LSTM
X_train = np.array(X_train).reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = np.array(X_test).reshape((X_test.shape[0], 1, X_test.shape[1]))

# Build LSTM model
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


  super().__init__(**kwargs)


In [None]:
# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')


Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5183 - loss: 0.6913 - val_accuracy: 0.5000 - val_loss: 0.6948
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5131 - loss: 0.6909 - val_accuracy: 0.5050 - val_loss: 0.6953
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5269 - loss: 0.6900 - val_accuracy: 0.5100 - val_loss: 0.6950
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5135 - loss: 0.6890 - val_accuracy: 0.4800 - val_loss: 0.6958
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5498 - loss: 0.6873 - val_accuracy: 0.5050 - val_loss: 0.6960
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5329 - loss: 0.6904 - val_accuracy: 0.5050 - val_loss: 0.6955
Epoch 7/100
[1m25/25[0m [32m━━━

In [None]:
# Make predictions on the test set
predictions = model.predict(X_test)

# Convert predictions to probabilities
predicted_probabilities = predictions.flatten()  # Flatten to 1D array

# Display the predicted probabilities alongside the actual labels
results = pd.DataFrame({'Actual': y_test, 'Predicted Probability': predicted_probabilities})
print(results.head(10))

# Optional: Thresholding to classify the probabilities
threshold = 0.5
results['Predicted Class'] = (results['Predicted Probability'] >= threshold).astype(int)

# Evaluate performance
from sklearn.metrics import classification_report

print(classification_report(results['Actual'], results['Predicted Class']))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step
     Actual  Predicted Probability
521       0               0.481363
737       0               0.444919
740       0               0.457086
660       0               0.489979
411       0               0.490337
678       0               0.479848
626       1               0.488582
513       0               0.488192
859       1               0.483590
136       0               0.501042
              precision    recall  f1-score   support

           0       0.48      0.72      0.57        99
           1       0.46      0.24      0.31       101

    accuracy                           0.47       200
   macro avg       0.47      0.48      0.44       200
weighted avg       0.47      0.47      0.44       200

