In [5]:
import tensorflow as tf
from tensorflow.keras import Input, layers, Model
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
import numpy as np
import geopandas as gpd
import shapely
import matplotlib.pyplot as plt

### Creating Mask

In [6]:
# Load NYC borough shapefile
print("Loading NYC shapefile...")
path = '../data/shapefile/nyc_boroughs.shp'
nyc = gpd.read_file(path)
# Transform to WGS84 (EPSG:4326) to match crime data coordinates
nyc = nyc.to_crs("EPSG:4326")
nyc = nyc.dissolve()

# Load data to get dimensions
train_data_temp = np.load('../data/train_data.npy')
n_y_cells, n_x_cells = train_data_temp.shape[1], train_data_temp.shape[2]

# Create mask using shapefile bounds (like Chicago version)
print("Creating mask from NYC boundaries...")
xmin, ymin, xmax, ymax = nyc.total_bounds
x_cell_size = (xmax - xmin) / n_x_cells
y_cell_size = (ymax - ymin) / n_y_cells
mask = np.ones((n_y_cells, n_x_cells))
nyc_geom = nyc.geometry.values[0]

x_arange = np.arange(xmin, xmax+x_cell_size, x_cell_size)
y_arange = np.arange(ymin, ymax+y_cell_size, y_cell_size)
for i, y0 in zip(range(n_y_cells-1, -1, -1), y_arange):
    for j, x0 in zip(range(n_x_cells), x_arange):
        x1 = x0 + x_cell_size
        y1 = y0 + y_cell_size
        box = shapely.geometry.box(x0, y0, x1, y1)
        if not nyc_geom.intersects(box):
            mask[i,j] = 0

print(f"Mask created: {np.sum(mask)} cells inside NYC out of {n_y_cells * n_x_cells} total")

# Convert mask for TensorFlow
mask = tf.keras.backend.constant(mask)
mask = tf.expand_dims(mask, -1)

Loading NYC shapefile...
Creating mask from NYC boundaries...
Mask created: 1886.0 cells inside NYC out of 3350 total


### Training 4 Hetero-ConvLSTMS

In [7]:
lookback = 7
batch_size = 4

train_X_crimes_only = np.load('../data/train_data.npy')
test_X_crimes_only = np.load('../data/test_data.npy')

train_X_crimes_only = tf.expand_dims(train_X_crimes_only, -1)
test_X_crimes_only = tf.expand_dims(test_X_crimes_only, -1)

train_gen = TimeseriesGenerator(
    train_X_crimes_only,
    train_X_crimes_only,
    length=lookback,
    batch_size=batch_size,
    shuffle=False
)
test_gen = TimeseriesGenerator(
    test_X_crimes_only,
    test_X_crimes_only,
    length=lookback,
    batch_size=batch_size,
    shuffle=False
)

def masked_MSE_loss(y_true, y_pred):
    y_pred_masked = tf.math.multiply(y_pred, mask)
    # Use reduce_mean instead of deprecated mean_squared_error
    mse = tf.reduce_mean(tf.square(y_true - y_pred_masked))
    return mse

# Define the input tensors
inputs = Input(shape=(lookback, *train_X_crimes_only.shape[1:]))

# First stack of convlstm layers
convlstm1 = layers.ConvLSTM2D(filters=128, kernel_size=(3, 3), padding='same', activation='tanh', return_sequences=True)(inputs)
bathnorm1 = layers.BatchNormalization()(convlstm1)
convlstm2 = layers.ConvLSTM2D(filters=128, kernel_size=(3, 3), padding='same', activation='tanh', return_sequences=False)(bathnorm1)

# Second stack of convlstm layers
convlstm3 = layers.ConvLSTM2D(filters=128, kernel_size=(3, 3), padding='same', activation='tanh', return_sequences=True)(inputs)
batchnorm2 = layers.BatchNormalization()(convlstm3)
convlstm4 = layers.ConvLSTM2D(filters=128, kernel_size=(3, 3), padding='same', activation='tanh', return_sequences=False)(batchnorm2)

# Concatenate outputs of two stacks
concatenation = layers.concatenate([convlstm2, convlstm4])
outputs = layers.Conv2D(filters=1, kernel_size=1, padding="same", activation='linear')(concatenation)

# Create the model
model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss=masked_MSE_loss, metrics=['mae'])

# Train the model
model.fit(train_gen, epochs=1)

# Create test prediction
test_pred = model.predict(test_gen)
test_pred *= mask

  self._warn_if_super_not_called()


[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3809s[0m 10s/step - loss: 0.7413 - mae: 0.3997
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m292s[0m 3s/step


In [8]:
np.save('../data/homo_convlstm.npy', test_pred)