<a href="https://colab.research.google.com/github/SARTHAKCHAKS/FBI_TIME_SERIES_FORECASTING/blob/main/FBI_TIME_SERIES_FORECASTING.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==============================================================================
# 1. SETUP AND DATA UPLOADING
# ==============================================================================

# Install the necessary library (Prophet)
!pip install prophet openpyxl

import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt
from google.colab import files
import io

print("Please upload the required files: 'Train.csv' and 'Test.csv'")
# Use the files.upload() function to allow the user to upload the files
uploaded = files.upload()

# Check if both files were uploaded
if 'Train.csv' not in uploaded:
    print("\nERROR: 'Train.csv' was not found. Please ensure you upload the file with the exact name.")
if 'Test.csv' not in uploaded:
    print("\nERROR: 'Test.csv' was not found. Please ensure you upload the file with the exact name.")

# Read the uploaded training and testing data
try:
    # The user provided Train.xlsx, but it was converted to CSV, let's use the uploaded CSV name
    train_data = pd.read_csv(io.BytesIO(uploaded['Train.csv']))
    test_template = pd.read_csv(io.BytesIO(uploaded['Test.csv']))
    print("\nData files loaded successfully.")
except Exception as e:
    print(f"\nAn error occurred during file loading: {e}")
    # Stop execution if files are not loaded
    raise

# Display the initial structure of the loaded dataframes
print("\n--- Train Data Head ---")
print(train_data.head())
print("\n--- Test Template Head ---")
print(test_template.head())

# ==============================================================================
# 2. TRAINING DATA PREPROCESSING (AGGREGATION)
# ==============================================================================

print("\nStarting data preprocessing...")

# Convert 'YEAR' and 'MONTH' to a single datetime index (Time Series requires aggregation)
# We assume the 1st of the month for all entries.
train_data['DATE'] = pd.to_datetime(train_data['YEAR'].astype(str) + '-' + train_data['MONTH'].astype(str) + '-01')

# The goal is to forecast the COUNT of incidents, so we group the individual records
# by their date and crime type and count the occurrences.
ts_data = train_data.groupby(['DATE', 'TYPE']).size().reset_index(name='COUNT')

# Rename columns to Prophet's required format: ds (date) and y (value)
ts_data.columns = ['ds', 'TYPE', 'y']

# Display the aggregated time series data
print("\n--- Aggregated Time Series Data Head ---")
print(ts_data.head())
print(f"Total number of time series: {ts_data['TYPE'].nunique()}")

# ==============================================================================
# 3. MODEL TRAINING AND FORECASTING
# ==============================================================================

# Create a container to store the results
all_forecasts = []
unique_types = ts_data['TYPE'].unique()

for crime_type in unique_types:
    print(f"\nTraining and forecasting for: {crime_type}...")

    # a) Filter the data for the current crime type
    df_type = ts_data[ts_data['TYPE'] == crime_type].copy()

    # b) Initialize and fit the Prophet model
    # Set daily_seasonality=False for monthly data, weekly_seasonality=False.
    # Add yearly seasonality which is common in crime data.
    model = Prophet(
        yearly_seasonality=True,
        daily_seasonality=False,
        weekly_seasonality=False,
        interval_width=0.95  # 95% confidence interval
    )
    model.fit(df_type)

    # c) Prepare the future dates for forecasting (from the Test template)
    # The test template has the exact YEAR/MONTH/TYPE combinations we need to predict
    future_test = test_template[test_template['TYPE'] == crime_type].copy()
    future_test['DATE'] = pd.to_datetime(future_test['YEAR'].astype(str) + '-' + future_test['MONTH'].astype(str) + '-01')
    future_test = future_test.rename(columns={'DATE': 'ds'})

    # d) Generate the forecast
    # We only need the 'ds' column for the forecast
    future_dates = future_test[['ds']]
    forecast = model.predict(future_dates)

    # e) Extract the prediction (yhat) and combine with the original test template columns
    # 'yhat' is the point forecast.
    forecast_df = future_test[['YEAR', 'MONTH', 'TYPE']].copy()
    forecast_df['Incident_Counts'] = forecast['yhat'].round().astype(int) # Round to nearest integer count

    # Append to the results list
    all_forecasts.append(forecast_df)

    # Optional: Plot the forecast for a visual check
    # fig = model.plot(forecast)
    # plt.title(f"Forecast for {crime_type}")
    # plt.show()


# ==============================================================================
# 4. FINAL OUTPUT AND SUBMISSION FILE GENERATION
# ==============================================================================

print("\n--- Generating final submission file ---")

# Combine all individual crime type forecasts into a single DataFrame
submission_df = pd.concat(all_forecasts)

# Merge the predicted counts back into the original test template structure.
# We will use the columns that uniquely identify the rows in Test (2).csv
final_submission = pd.merge(
    test_template[['YEAR', 'MONTH', 'TYPE']],
    submission_df[['YEAR', 'MONTH', 'TYPE', 'Incident_Counts']],
    on=['YEAR', 'MONTH', 'TYPE'],
    how='left'
)

# Replace any NaN (if a crime type was in the test but not in the train, or vice versa) with 0, or simply ensure the merge was clean.
# Based on the merge, NaN should only occur if the data is inconsistent. Let's assume a clean merge for now.

# Final check of the submission file structure
print("\n--- Final Submission File Head ---")
print(final_submission.head())
print(f"Total rows in submission: {len(final_submission)}")

# Save the final DataFrame to a CSV file for submission
final_submission.to_csv('FBI_Time_Series_Forecast_Submission.csv', index=False)

print("\n--- SUCCESS ---")
print("The forecasting is complete. The results have been saved to:")
print("FBI_Time_Series_Forecast_Submission.csv")
print("You can download the file from the Colab file explorer or by running the following command:")

# Command to download the file directly
files.download('FBI_Time_Series_Forecast_Submission.csv')

Please upload the required files: 'Train.csv' and 'Test.csv'


Saving Train.csv to Train.csv
Saving Test.csv to Test.csv

Data files loaded successfully.

--- Train Data Head ---
          TYPE     HUNDRED_BLOCK NEIGHBOURHOOD         X           Y  \
0  Other Theft  9XX TERMINAL AVE    Strathcona  493906.5  5457452.47   
1  Other Theft  9XX TERMINAL AVE    Strathcona  493906.5  5457452.47   
2  Other Theft  9XX TERMINAL AVE    Strathcona  493906.5  5457452.47   
3  Other Theft  9XX TERMINAL AVE    Strathcona  493906.5  5457452.47   
4  Other Theft  9XX TERMINAL AVE    Strathcona  493906.5  5457452.47   

    Latitude   Longitude  HOUR  MINUTE  YEAR  MONTH  DAY        Date  
0  49.269802 -123.083763  16.0    15.0  1999      5   12  05/12/1999  
1  49.269802 -123.083763  15.0    20.0  1999      5    7  05/07/1999  
2  49.269802 -123.083763  16.0    40.0  1999      4   23  04/23/1999  
3  49.269802 -123.083763  11.0    15.0  1999      4   20  04/20/1999  
4  49.269802 -123.083763  17.0    45.0  1999      4   12  04/12/1999  

--- Test Template Head -

DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/gg6x6t7b.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/wxdrjgi4.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=6790', 'data', 'file=/tmp/tmpxpfup3ip/gg6x6t7b.json', 'init=/tmp/tmpxpfup3ip/wxdrjgi4.json', 'output', 'file=/tmp/tmpxpfup3ip/prophet_model5hcmuk_4/prophet_model-20251008175325.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:53:25 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:53:25 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/bxc76d8k.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/kzrbnhms.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/l


Training and forecasting for: Break and Enter Residential/Other...

Training and forecasting for: Mischief...


DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/dm25sxd2.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/w8ha99ub.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=58106', 'data', 'file=/tmp/tmpxpfup3ip/dm25sxd2.json', 'init=/tmp/tmpxpfup3ip/w8ha99ub.json', 'output', 'file=/tmp/tmpxpfup3ip/prophet_modelytw993e7/prophet_model-20251008175325.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:53:25 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:53:25 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/9794h6f4.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/80hny7pf.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/


Training and forecasting for: Offence Against a Person...

Training and forecasting for: Other Theft...


DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/1_19yuuh.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/lbnq0azs.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=23113', 'data', 'file=/tmp/tmpxpfup3ip/1_19yuuh.json', 'init=/tmp/tmpxpfup3ip/lbnq0azs.json', 'output', 'file=/tmp/tmpxpfup3ip/prophet_model5ez2gt1t/prophet_model-20251008175325.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:53:25 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:53:25 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/2w5u6v99.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/qr2bbify.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/


Training and forecasting for: Theft from Vehicle...

Training and forecasting for: Theft of Bicycle...


DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/hxf6779p.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/48o1gzr2.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=47644', 'data', 'file=/tmp/tmpxpfup3ip/hxf6779p.json', 'init=/tmp/tmpxpfup3ip/48o1gzr2.json', 'output', 'file=/tmp/tmpxpfup3ip/prophet_modelsdahiv6b/prophet_model-20251008175326.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
17:53:26 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
17:53:26 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/_sdim5sb.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpxpfup3ip/wjuuvoii.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/


Training and forecasting for: Theft of Vehicle...

Training and forecasting for: Vehicle Collision or Pedestrian Struck (with Injury)...


17:53:26 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing



--- Generating final submission file ---

--- Final Submission File Head ---
   YEAR  MONTH                                               TYPE  \
0  2013      6  Vehicle Collision or Pedestrian Struck (with I...   
1  2013      6                                   Theft of Vehicle   
2  2013      6                                   Theft of Bicycle   
3  2013      6                                 Theft from Vehicle   
4  2013      6                                        Other Theft   

   Incident_Counts  
0            129.0  
1             87.0  
2            209.0  
3            909.0  
4            410.0  
Total rows in submission: 162

--- SUCCESS ---
The forecasting is complete. The results have been saved to:
FBI_Time_Series_Forecast_Submission.csv
You can download the file from the Colab file explorer or by running the following command:


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>