<a href="https://colab.research.google.com/github/AnishBehera05/Data_Analysis_911_call_data_Report/blob/main/Predictive_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
# Reading the CSV file into a DataFrame
df = pd.read_csv('/content/drive/MyDrive/911.csv')

In [None]:
# Creating a new column 'Reason' based on the 'title' column
def extract_reason(title):
    if isinstance(title, str):  # Check if title is a string
        return title.split(':')[0]
    else:
        return 'Unknown'

df['Reason'] = df['title'].apply(extract_reason)

In [None]:
# Encoding categorical variables
le = LabelEncoder()
df['ReasonEncoded'] = le.fit_transform(df['Reason'])

In [None]:
# Converting the 'timeStamp' column to DateTime objects
df['timeStamp'] = pd.to_datetime(df['timeStamp'])

In [None]:
# Extracting Hour, Month, and Day of Week from the 'timeStamp' column
df['Hour'] = df['timeStamp'].apply(lambda time: time.hour)
df['Month'] = df['timeStamp'].apply(lambda time: time.month)
df['Day of Week'] = df['timeStamp'].apply(lambda time: time.dayofweek)

In [None]:
# Defining features and target variable
X = df[['lat', 'lng', 'Hour', 'Month']]
y = df['ReasonEncoded']

In [None]:
# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Handling NaN values in the testing set
X_test.dropna(inplace=True)
y_test = y_test[X_test.index]  # Adjusting y_test accordingly

In [None]:
# Initializing and training the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

In [None]:
# Making predictions on the testing set
y_pred = rf_classifier.predict(X_test)

In [None]:
# Converting encoded labels back to original labels for both predicted and actual values
predicted_reasons = le.inverse_transform(y_pred)
actual_reasons = le.inverse_transform(y_test)

In [None]:
# Counting the occurrences of each predicted reason
predicted_counts = {}
for reason in predicted_reasons:
    if reason in predicted_counts:
        predicted_counts[reason] += 1
    else:
        predicted_counts[reason] = 1

In [None]:
# Counting the occurrences of each actual reason
actual_counts = {}
for reason in actual_reasons:
    if reason in actual_counts:
        actual_counts[reason] += 1
    else:
        actual_counts[reason] = 1

In [None]:
# Creating a bar plot
labels = list(set(predicted_reasons) | set(actual_reasons))
predicted_vals = [predicted_counts.get(label, 0) for label in labels]
actual_vals = [actual_counts.get(label, 0) for label in labels]
x = np.arange(len(labels))
width = 0.35
fig, ax = plt.subplots(figsize=(10, 6))
rects1 = ax.bar(x - width/2, predicted_vals, width, label='Predicted', color='skyblue')
rects2 = ax.bar(x + width/2, actual_vals, width, label='Actual', color='salmon')
ax.set_xlabel('Reason for 911 Call')
ax.set_ylabel('Count')
ax.set_title('Predicted vs Actual Reasons for 911 Calls')
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=45, ha='right')
ax.legend()
plt.tight_layout()
plt.show()