In [22]:
import pandas as pd
from prophet import Prophet
import joblib
from datetime import datetime
import sqlite3
import numpy as np
import os



In [23]:
# Connect to the SQLite database
conn = sqlite3.connect('../db/incidents.db')
# Fetch data from the database
query = "SELECT * FROM incidents WHERE incident_datetime >= '2009-01-01'"
df = pd.read_sql_query(query, conn)
# Close the database connection
conn.close()

In [24]:
# Convert incident_datetime to datetime objects
df['incident_datetime'] = pd.to_datetime(df['incident_datetime'])
# Assume df_filtered is the preprocessed dataframe from previous steps
df_filtered = df  # You may need to add your preprocessing steps here

In [25]:
# Aggregate data by date and neighborhood
df_filtered['date'] = df_filtered['incident_datetime'].dt.date
crime_data = df_filtered.groupby(['date', 'neighborhood']).size().reset_index(name='crime_count')
# Prepare data for Prophet
crime_data = crime_data.rename(columns={'date': 'ds', 'crime_count': 'y'})

In [26]:
# Function to create and fit a Prophet model
def fit_prophet_model(data):
    model = Prophet()
    model.fit(data)
    return model

In [27]:
# Function to create future dataframe and make predictions
def make_forecast(model, periods=365):
    future = model.make_future_dataframe(periods=periods)
    forecast = model.predict(future)
    return forecast

In [28]:
# Create a dictionary to store models and forecasts for each neighborhood
models = {}
forecasts = {}
neighborhoods = df_filtered['neighborhood'].unique()

In [29]:
# Fit a model for each neighborhood
for neighborhood in neighborhoods:
    neighborhood_data = crime_data[crime_data['neighborhood'] == neighborhood]
    if len(neighborhood_data) > 0:  # Ensure there is data for the neighborhood
        model = fit_prophet_model(neighborhood_data)
        forecast = make_forecast(model)
        models[neighborhood] = model
        forecasts[neighborhood] = forecast

18:21:54 - cmdstanpy - INFO - Chain [1] start processing
18:21:55 - cmdstanpy - INFO - Chain [1] done processing
18:21:55 - cmdstanpy - INFO - Chain [1] start processing
18:21:56 - cmdstanpy - INFO - Chain [1] done processing
18:21:56 - cmdstanpy - INFO - Chain [1] start processing
18:21:56 - cmdstanpy - INFO - Chain [1] done processing
18:21:57 - cmdstanpy - INFO - Chain [1] start processing
18:21:57 - cmdstanpy - INFO - Chain [1] done processing
18:21:58 - cmdstanpy - INFO - Chain [1] start processing
18:21:58 - cmdstanpy - INFO - Chain [1] done processing
18:21:58 - cmdstanpy - INFO - Chain [1] start processing
18:21:59 - cmdstanpy - INFO - Chain [1] done processing
18:21:59 - cmdstanpy - INFO - Chain [1] start processing
18:21:59 - cmdstanpy - INFO - Chain [1] done processing
18:22:00 - cmdstanpy - INFO - Chain [1] start processing
18:22:00 - cmdstanpy - INFO - Chain [1] done processing
18:22:01 - cmdstanpy - INFO - Chain [1] start processing
18:22:01 - cmdstanpy - INFO - Chain [1]

In [30]:
# Fit a model for the whole of Buffalo
buffalo_data = df_filtered.groupby('date').size().reset_index(name='crime_count')
buffalo_data = buffalo_data.rename(columns={'date': 'ds', 'crime_count': 'y'})
buffalo_model = fit_prophet_model(buffalo_data)
buffalo_forecast = make_forecast(buffalo_model)

18:22:21 - cmdstanpy - INFO - Chain [1] start processing
18:22:21 - cmdstanpy - INFO - Chain [1] done processing


In [31]:
# Get the current working directory
current_dir = os.getcwd()
# Create the 'data/forecast' folder if it doesn't exist
forecast_directory = os.path.join(current_dir, '..', 'data', 'forecast')
if not os.path.exists(forecast_directory):
    os.makedirs(forecast_directory)

In [32]:
# Save the Buffalo forecast
buffalo_forecast_path = os.path.join(forecast_directory, 'buffalo_crime_forecast.csv')
buffalo_forecast.to_csv(buffalo_forecast_path, index=False)

In [33]:
# Save models and forecasts for each neighborhood
for neighborhood, model in models.items():
    forecast_path = os.path.join(forecast_directory, f'forecast_{neighborhood}.csv')
    forecasts[neighborhood].to_csv(forecast_path, index=False)
print("Models and forecasts saved successfully.")

Models and forecasts saved successfully.
