
# SPATIO-TEMPORAL CRIME PREDICTION USING DYNAMIC MODE DECOMPOSITION (DMD) AND CONVOLUTIONAL NEURAL NETWORK LONG-SHORT TERM MEMORY (CNN-LSTM) 
## Adrian Joseph Albino, Julian Ernest Camello
### This notebook documents the implementation of our thesis paper

![Conceptual Framework](conceptual_framework.png)

# Dataset
[SF Incident Report](https://www.example.com)


In [1]:
# Load Data 
import pandas as pd
file_path = 'Police_Department_Incident_Reports__2018_to_Present.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,Incident Datetime,Incident Date,Incident Time,Incident Year,Incident Day of Week,Report Datetime,Row ID,Incident ID,Incident Number,CAD Number,...,Longitude,Point,Neighborhoods,ESNCAG - Boundary File,Central Market/Tenderloin Boundary Polygon - Updated,Civic Center Harm Reduction Project Boundary,HSOC Zones as of 2018-06-05,Invest In Neighborhoods (IIN) Areas,Current Supervisor Districts,Current Police Districts
0,2023/03/13 11:41:00 PM,2023/03/13,23:41,2023,Monday,2023/03/13 11:41:00 PM,125373607041,1253736,230167874,,...,,,,,,,,,,
1,2023/03/01 05:02:00 AM,2023/03/01,05:02,2023,Wednesday,2023/03/11 03:40:00 PM,125379506374,1253795,236046151,,...,,,,,,,,,,
2,2023/03/13 01:16:00 PM,2023/03/13,13:16,2023,Monday,2023/03/13 01:17:00 PM,125357107041,1253571,220343896,,...,,,,,,,,,,
3,2023/03/13 10:59:00 AM,2023/03/13,10:59,2023,Monday,2023/03/13 11:00:00 AM,125355107041,1253551,230174885,,...,,,,,,,,,,
4,2023/03/14 06:44:00 PM,2023/03/14,18:44,2023,Tuesday,2023/03/14 06:45:00 PM,125402407041,1254024,230176728,,...,,,,,,,,,,


# Preprocessing

## Filtering & Removing Other Columns

In [2]:
columns_to_keep = ['Incident Date', 'Latitude', 'Longitude']
df.dropna(subset=columns_to_keep, inplace=True)
df = df[columns_to_keep]
df.head()


Unnamed: 0,Incident Date,Latitude,Longitude
9,2023/03/11,37.772895,-122.454285
11,2022/06/27,37.787359,-122.408227
13,2023/03/16,37.76229,-122.401324
33,2023/03/21,37.787038,-122.418271
61,2021/08/22,37.793977,-122.429804


## Aggregating By Date

In [16]:
df['Incident Date'] = pd.to_datetime(df['Incident Date'])
crime_by_date = df.groupby(pd.Grouper(key='Incident Date', freq='D'))
crime_by_date.head()

Unnamed: 0,Incident Date,Latitude,Longitude
17982,2018-01-01,37.756167,-122.406636
78404,2018-01-01,37.773467,-122.391434
98729,2018-01-01,37.767257,-122.404750
122472,2018-01-01,37.721659,-122.467615
130417,2018-01-01,37.721367,-122.473668
...,...,...,...
94436,2023-11-24,37.787872,-122.411699
94437,2023-11-24,37.759830,-122.425920
94442,2023-11-24,37.748231,-122.414919
94444,2023-11-24,37.742343,-122.491768


In [14]:
specific_date = '2018-01-01'  
specific_date_data = aggregated_df.get_group(specific_date)
print(specific_date_data)

       Incident Date   Latitude   Longitude
17982     2018-01-01  37.756167 -122.406636
78404     2018-01-01  37.773467 -122.391434
98729     2018-01-01  37.767257 -122.404750
122472    2018-01-01  37.721659 -122.467615
130417    2018-01-01  37.721367 -122.473668
...              ...        ...         ...
752296    2018-01-01  37.778719 -122.414741
758037    2018-01-01  37.803727 -122.428400
764293    2018-01-01  37.729910 -122.397177
773595    2018-01-01  37.721659 -122.467615
793311    2018-01-01  37.783933 -122.412595

[521 rows x 3 columns]


## Aggregation By Cluster

In [68]:
import geopandas as gpd
sf_geojson = "SanFrancisco.Neighborhoods.json"
sf_neighborhoods = gpd.read_file(sf_geojson, driver='GeoJSON')

DAYS = 100

def count_crimes_by_neighborhood(date, crimes_group):
    crimes_group = gpd.GeoDataFrame(crimes_group, geometry=gpd.points_from_xy(crimes_group['Longitude'], crimes_group['Latitude']))
    crimes_group.crs = 'EPSG:4326'
    crimes_with_neighborhoods = gpd.sjoin(crimes_group, sf_neighborhoods, how="left", predicate="within")
    counts = crimes_with_neighborhoods.groupby('neighborhood').size()
    # Set count to 0 for neighborhoods with no crimes
    counts.fillna(0, inplace=True)
    return pd.DataFrame({'date': date, 'neighborhood': counts.index, 'crime_count': counts.values})

crime_by_cluster = []

counter = 0
for date, crimes_group in crime_by_date:
     # LIMITS THE NUMBER OF DAYS TO PROCESS
    if (counter == DAYS):
        break #
    counter += 1
    
    counts_df = count_crimes_by_neighborhood(date, crimes_group)
    crime_by_cluster.append(counts_df['crime_count'].tolist())


print(crime_by_cluster[:5])

[[16, 37, 14, 20, 11, 30, 13, 17, 24, 13, 38, 35, 30, 10, 28, 2, 13, 41, 38, 9, 9, 56, 5], [10, 26, 9, 20, 5, 6, 16, 4, 25, 10, 45, 14, 7, 6, 17, 7, 4, 44, 23, 11, 5, 38, 4], [13, 15, 9, 11, 11, 20, 15, 1, 27, 10, 23, 31, 25, 9, 12, 7, 50, 34, 18, 6, 36, 3], [14, 22, 10, 21, 17, 19, 12, 2, 18, 2, 51, 40, 11, 10, 13, 1, 10, 58, 28, 21, 6, 54, 3], [3, 30, 24, 9, 12, 8, 10, 3, 13, 9, 46, 32, 9, 15, 11, 6, 9, 50, 21, 15, 12, 53, 2]]


## Time Series Splitting

In [60]:
from math import ceil

TRAIN_PERCENT = 0.8
TEST_PERCENT = 0.2

total_length = len(crime_by_cluster)
train_data_length = ceil(total_length * TRAIN_PERCENT)
test_data_length = total_length - train_data_length

# Split the data into training and testing sets
train_data = crime_by_cluster[:train_data_length]
test_data = crime_by_cluster[train_data_length:]

print("Train data length:", len(train_data))
print("Test data length:", len(test_data))

Train data length: 80
Test data length: 20


## Dynamic Mode Decomposition

In [67]:
from pydmd import DMD
from pydmd.plotter import plot_summary

for i in range(len(train_data)):
    print(train_data[i], len(train_data[i]))

# dmd = DMD(svd_rank=12)
# dmd.fit(train_data)

# plot_summary(dmd)

[16, 37, 14, 20, 11, 30, 13, 17, 24, 13, 38, 35, 30, 10, 28, 2, 13, 41, 38, 9, 9, 56, 5] 23
[10, 26, 9, 20, 5, 6, 16, 4, 25, 10, 45, 14, 7, 6, 17, 7, 4, 44, 23, 11, 5, 38, 4] 23
[13, 15, 9, 11, 11, 20, 15, 1, 27, 10, 23, 31, 25, 9, 12, 7, 50, 34, 18, 6, 36, 3] 22
[14, 22, 10, 21, 17, 19, 12, 2, 18, 2, 51, 40, 11, 10, 13, 1, 10, 58, 28, 21, 6, 54, 3] 23
[3, 30, 24, 9, 12, 8, 10, 3, 13, 9, 46, 32, 9, 15, 11, 6, 9, 50, 21, 15, 12, 53, 2] 23
[9, 15, 13, 14, 7, 12, 5, 6, 14, 13, 38, 36, 21, 15, 16, 3, 7, 66, 32, 4, 1, 60, 5] 23
[10, 14, 18, 13, 16, 18, 8, 8, 21, 3, 38, 22, 23, 12, 15, 1, 5, 36, 14, 15, 3, 43, 6] 23
[4, 20, 8, 9, 11, 16, 15, 3, 18, 8, 34, 22, 16, 7, 13, 7, 10, 32, 13, 14, 4, 32, 5] 23
[11, 25, 12, 15, 7, 27, 16, 9, 11, 9, 31, 32, 16, 12, 22, 6, 11, 48, 7, 13, 2, 31, 6] 23
[16, 26, 11, 6, 4, 25, 15, 8, 5, 3, 36, 34, 12, 14, 8, 1, 7, 63, 23, 13, 9, 51, 3] 23
[9, 24, 16, 22, 9, 15, 13, 4, 6, 13, 44, 21, 13, 6, 16, 1, 9, 56, 28, 9, 14, 28, 6] 23
[17, 11, 4, 17, 9, 22, 14, 9, 7, 