### [City of Baltimore Crime Data](https://data.world/data-society/city-of-baltimore-crime-data)

In [16]:
import pandas as pd
df = pd.read_csv('https://query.data.world/s/qvspv6ym4ukn6irryolom4ijbrmfug?dws=00000', parse_dates=True)

In [17]:
df.head()

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Location 1,Total Incidents
0,11/12/2016,02:35:00,3B,300 SAINT PAUL PL,ROBBERY - STREET,O,,111.0,CENTRAL,Downtown,"(39.2924100000, -76.6140800000)",1
1,11/12/2016,02:56:00,3CF,800 S BROADWAY,ROBBERY - COMMERCIAL,I,FIREARM,213.0,SOUTHEASTERN,Fells Point,"(39.2824200000, -76.5928800000)",1
2,11/12/2016,03:00:00,6D,1500 PENTWOOD RD,LARCENY FROM AUTO,O,,413.0,NORTHEASTERN,Stonewood-Pentwood-Winston,"(39.3480500000, -76.5883400000)",1
3,11/12/2016,03:00:00,6D,6600 MILTON LN,LARCENY FROM AUTO,O,,424.0,NORTHEASTERN,Westfield,"(39.3626300000, -76.5516100000)",1
4,11/12/2016,03:00:00,6E,300 W BALTIMORE ST,LARCENY,O,,111.0,CENTRAL,Downtown,"(39.2893800000, -76.6197100000)",1


In [18]:
crime_date_total_incidents = df[["CrimeDate", "Total Incidents"]]

# Set 'CrimeDate' as the index
crime_date_total_incidents.set_index('CrimeDate', inplace=True)

In [19]:
crime_date_total_incidents.head()

Unnamed: 0_level_0,Total Incidents
CrimeDate,Unnamed: 1_level_1
11/12/2016,1
11/12/2016,1
11/12/2016,1
11/12/2016,1
11/12/2016,1


In [20]:
grouped = crime_date_total_incidents.groupby('CrimeDate').sum()

In [23]:
grouped.index = pd.to_datetime(grouped.index, format='%m/%d/%Y')

# THIS PART IS IMPORTANT
grouped = grouped.sort_index()

In [24]:
import plotly.offline as pyo
import plotly.graph_objects as go

# Create the plot using Plotly Graph Objects
fig = go.Figure(data=go.Scatter(x=grouped.index, y=grouped['Total Incidents'], mode='lines'))

# Update the layout of the plot
fig.update_layout(title='Total Incidents Over Time', xaxis_title='Date', yaxis_title='Total Incidents')

# Save the plot as an HTML file
pyo.plot(fig, filename='total_incidents_plot.html')

'total_incidents_plot.html'

In [25]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Fit the Holt-Winters model to the data
model = ExponentialSmoothing(grouped['Total Incidents'], trend='add', seasonal='add')
fitted_model = model.fit()

# Generate the forecasts
forecast_values = fitted_model.predict(start=grouped.index.min(), end=grouped.index.max())


No frequency information was provided, so inferred frequency D will be used.



In [26]:
forecast_values

2011-01-01    123.790104
2011-01-02    126.081426
2011-01-03    131.612923
2011-01-04    127.462036
2011-01-05    114.536773
                 ...    
2016-11-08    145.570446
2016-11-09    137.585762
2016-11-10    132.398731
2016-11-11    135.029052
2016-11-12    120.046344
Freq: D, Length: 2143, dtype: float64

In [31]:
import plotly.graph_objects as go

# Create the plot using Plotly Graph Objects
fig = go.Figure()
fig.add_trace(go.Scatter(x=grouped.index, y=grouped['Total Incidents'], mode='lines', name='Actual'))
fig.add_trace(go.Scatter(x=forecast_values.index, y=forecast_values, mode='lines', name='Forecast'))

# Update the layout of the plot
fig.update_layout(title='Total Incidents Over Time - Holt-Winters Forecast',
                  xaxis_title='Date', yaxis_title='Total Incidents')

# Save the plot as an HTML file
fig.write_html('total_incidents_plot_Holt_Winters.html')

In [33]:
from statsmodels.tools.eval_measures import rmse, meanabs
# Calculate accuracy metrics
mse_value = meanabs(grouped['Total Incidents'], forecast_values)
mae_value = rmse(grouped['Total Incidents'], forecast_values)
mape_value = (abs((grouped['Total Incidents'] - forecast_values) / grouped['Total Incidents'])).mean() * 100
pe_value = 100 * (forecast_values.mean() - grouped['Total Incidents'].mean()) / grouped['Total Incidents'].mean()

print("Mean Absolute Error (MAE):", mae_value)
print("Root Mean Squared Error (RMSE):", mse_value)
print("Mean Absolute Percentage Error (MAPE):", mape_value)
print("Percentage Error (PE):", pe_value)

Mean Absolute Error (MAE): 18.322951908693604
Root Mean Squared Error (RMSE): 13.418435603251908
Mean Absolute Percentage Error (MAPE): 10.574153729327799
Percentage Error (PE): 0.04266908437202284


In [36]:
# Generate the forecasts for the next month
forecast_values = fitted_model.predict(start=grouped.index.max() + pd.DateOffset(days=1),
                                       end=grouped.index.max() + pd.DateOffset(months=1))

# Create the plot using Plotly Graph Objects
fig = go.Figure()
fig.add_trace(go.Scatter(x=grouped.index, y=grouped['Total Incidents'], mode='lines', name='Historical Data'))
fig.add_trace(go.Scatter(x=forecast_values.index, y=forecast_values, mode='lines', name='Forecast'))

# Update the layout of the plot
fig.update_layout(title='Total Incidents Over Time - Holt-Winters 1 months Forecast',
                  xaxis_title='Date', yaxis_title='Total Incidents')

# Save the plot as an HTML file
fig.write_html('total_incidents_plot.html')

# Save the plot as an offline HTML file
pyo.plot(fig, filename='total_incidents_plot_Holt_Winters_1_months_Forecast.html')


'total_incidents_plot_Holt_Winters_1_months_Forecast.html'

We can observe a decreasing trend in the 1 month horizon