# Time Series Models for Crime Prediction

This notebook implements:
1. ARIMA model
2. Prophet model
3. LSTM model
4. Model comparison

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

print("Libraries loaded!")

In [None]:
# Load time series data
ts_data = pd.read_csv('../data/processed/overall_monthly_timeseries.csv')
ts_data['Date'] = pd.to_datetime(ts_data['Date'])
ts_data = ts_data.sort_values('Date')

print(f"Data shape: {ts_data.shape}")
print(f"Date range: {ts_data['Date'].min()} to {ts_data['Date'].max()}")
print(ts_data.head())

In [None]:
# Train-test split (80-20)
train_size = int(len(ts_data) * 0.8)
train_data = ts_data[:train_size]
test_data = ts_data[train_size:]

print(f"Training data: {len(train_data)} months")
print(f"Testing data: {len(test_data)} months")

## 1. ARIMA Model

In [None]:
# ARIMA model
arima_model = ARIMA(train_data['Incidents_Reported'], order=(1,1,1))
arima_fitted = arima_model.fit()

# Predictions
arima_pred = arima_fitted.forecast(steps=len(test_data))

# Metrics
arima_mae = mean_absolute_error(test_data['Incidents_Reported'], arima_pred)
arima_rmse = np.sqrt(mean_squared_error(test_data['Incidents_Reported'], arima_pred))
arima_r2 = r2_score(test_data['Incidents_Reported'], arima_pred)

print("ARIMA Model Performance:")
print(f"MAE: {arima_mae:.2f}")
print(f"RMSE: {arima_rmse:.2f}")
print(f"R²: {arima_r2:.4f}")

## 2. Prophet Model

In [None]:
# Prepare data for Prophet
prophet_train = train_data[['Date', 'Incidents_Reported']].copy()
prophet_train.columns = ['ds', 'y']

# Train Prophet model
prophet_model = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)
prophet_model.fit(prophet_train)

# Make predictions
future = prophet_model.make_future_dataframe(periods=len(test_data), freq='MS')
prophet_forecast = prophet_model.predict(future)
prophet_pred = prophet_forecast.iloc[-len(test_data):]['yhat'].values

# Metrics
prophet_mae = mean_absolute_error(test_data['Incidents_Reported'], prophet_pred)
prophet_rmse = np.sqrt(mean_squared_error(test_data['Incidents_Reported'], prophet_pred))
prophet_r2 = r2_score(test_data['Incidents_Reported'], prophet_pred)

print("Prophet Model Performance:")
print(f"MAE: {prophet_mae:.2f}")
print(f"RMSE: {prophet_rmse:.2f}")
print(f"R²: {prophet_r2:.4f}")

## 3. LSTM Model

In [None]:
# Scale data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(ts_data[['Incidents_Reported']])

# Create sequences
def create_sequences(data, lookback=12):
    X, y = [], []
    for i in range(lookback, len(data)):
        X.append(data[i-lookback:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

lookback = 12
X, y = create_sequences(scaled_data, lookback)

# Split
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Reshape for LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

In [None]:
# Build LSTM model
lstm_model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(lookback, 1)),
    Dropout(0.2),
    LSTM(50, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse')
print(lstm_model.summary())

In [None]:
# Train LSTM
history = lstm_model.fit(X_train, y_train, epochs=50, batch_size=16, 
                        validation_split=0.1, verbose=0)

# Plot training history
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('LSTM Model Training History')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(alpha=0.3)
plt.show()

In [None]:
# LSTM Predictions
lstm_pred_scaled = lstm_model.predict(X_test, verbose=0)
lstm_pred = scaler.inverse_transform(lstm_pred_scaled).flatten()
y_test_original = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

# Metrics
lstm_mae = mean_absolute_error(y_test_original, lstm_pred)
lstm_rmse = np.sqrt(mean_squared_error(y_test_original, lstm_pred))
lstm_r2 = r2_score(y_test_original, lstm_pred)

print("LSTM Model Performance:")
print(f"MAE: {lstm_mae:.2f}")
print(f"RMSE: {lstm_rmse:.2f}")
print(f"R²: {lstm_r2:.4f}")

## 4. Model Comparison

In [None]:
# Comparison table
comparison = pd.DataFrame({
    'Model': ['ARIMA', 'Prophet', 'LSTM'],
    'MAE': [arima_mae, prophet_mae, lstm_mae],
    'RMSE': [arima_rmse, prophet_rmse, lstm_rmse],
    'R²': [arima_r2, prophet_r2, lstm_r2]
})

print("\nModel Comparison:")
print(comparison.to_string(index=False))

# Find best model
best_model = comparison.loc[comparison['R²'].idxmax(), 'Model']
print(f"\n✓ Best performing model: {best_model}")

In [None]:
# Visualization comparison
fig, axes = plt.subplots(3, 1, figsize=(14, 12))

# ARIMA
axes[0].plot(test_data['Date'].values, test_data['Incidents_Reported'].values, 
             label='Actual', marker='o', linewidth=2)
axes[0].plot(test_data['Date'].values, arima_pred, 
             label='Predicted', marker='s', linewidth=2, alpha=0.7)
axes[0].set_title(f'ARIMA - R²: {arima_r2:.4f}', fontweight='bold')
axes[0].legend()
axes[0].grid(alpha=0.3)

# Prophet
axes[1].plot(test_data['Date'].values, test_data['Incidents_Reported'].values, 
             label='Actual', marker='o', linewidth=2)
axes[1].plot(test_data['Date'].values, prophet_pred, 
             label='Predicted', marker='s', linewidth=2, alpha=0.7)
axes[1].set_title(f'Prophet - R²: {prophet_r2:.4f}', fontweight='bold')
axes[1].legend()
axes[1].grid(alpha=0.3)

# LSTM
test_dates_lstm = ts_data.iloc[train_size+lookback:]['Date'].values
axes[2].plot(test_dates_lstm, y_test_original, 
             label='Actual', marker='o', linewidth=2)
axes[2].plot(test_dates_lstm, lstm_pred, 
             label='Predicted', marker='s', linewidth=2, alpha=0.7)
axes[2].set_title(f'LSTM - R²: {lstm_r2:.4f}', fontweight='bold')
axes[2].legend()
axes[2].grid(alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Save Models

In [None]:
import joblib

# Save models
arima_fitted.save('../models/arima_model.pkl')
joblib.dump(prophet_model, '../models/prophet_model.pkl')
lstm_model.save('../models/lstm_model.h5')
joblib.dump(scaler, '../models/scaler.pkl')

print("✓ All models saved successfully!")