<a href="https://colab.research.google.com/github/SaquibKhan-DS/311-Customer-Service-Optimization/blob/main/notebooks/03_response_time_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 03_response_time_analysis.ipynb

# -----------------------------
# Notebook 03: Response Time Analysis
# -----------------------------
# This notebook calculates response times for complaints,
# analyzes average times by complaint type, handles anomalies,
# and applies transformations for statistical analysis.
# -----------------------------

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# -----------------------------
# Step 1: Load dataset
# -----------------------------
df = pd.read_csv('/kaggle/input/311-service-requests-nyc/311_Service_Requests_from_2010_to_Present.csv')

# Convert date columns to datetime
df['created_dt'] = pd.to_datetime(df['Created Date'], errors='coerce')
df['closed_dt'] = pd.to_datetime(df['Closed Date'], errors='coerce')

# Keep only rows with valid 'Closed Date'
df = df[df['Closed Date'].notna()]

In [None]:
# -----------------------------
# Step 2: Calculate elapsed time
# -----------------------------
# Elapsed time in seconds
df['elapsed_time'] = df['closed_dt'] - df['created_dt']
df['elapsed_time_sec'] = df['elapsed_time'] / np.timedelta64(1, 's')

# Summary statistics
df['elapsed_time_sec'].describe()

# Elapsed time in minutes for easier interpretation
df['elapsed_time_min'] = df['elapsed_time'] / np.timedelta64(1, 'm')

In [None]:
# -----------------------------
# Step 3: Average response time by complaint type
# -----------------------------
df_mrt = df.groupby('Complaint Type')['elapsed_time_min'].mean().fillna(0).to_frame()

# Plot average response time by complaint type
df_mrt.plot(kind='bar', figsize=(20,10))
plt.title('Average Response Time VS Complaint Type')
plt.xlabel('Complaint Type')
plt.ylabel('Average Response Time in Minutes')
plt.show()

In [None]:
# -----------------------------
# Step 4: Handle extreme anomaly
# -----------------------------
# Example: "Animal in a Park" has only one entry with unusually high time
df[df['Complaint Type'] == 'Animal in a Park']
df.drop(labels=283132, axis=0, inplace=True)

# Recalculate after removing anomaly
df_mrt = df.groupby('Complaint Type')['elapsed_time_min'].mean().fillna(0).to_frame()
df_mrt.plot(kind='bar', figsize=(20, 10))
plt.title('Average Response Time by Complaint Type')
plt.xlabel('Complaint Type')
plt.ylabel('Average Response Time in Minutes')
plt.show()

In [None]:
# -----------------------------
# Step 5: Histograms of response times by complaint type
# -----------------------------
for t in df['Complaint Type'].unique():
    df[df['Complaint Type'] == t]['elapsed_time_sec'].hist(range=(0,5000))
    plt.title('Response Time by Complaint Type')
    plt.xlabel(t)
    plt.ylabel('Response Time in Seconds')
    plt.show()

In [None]:
# -----------------------------
# Step 6: Log transformation for normalizing distributions
# -----------------------------
df_ct = {}
for t in df['Complaint Type'].unique():
    df_ct[t] = np.log(df[df['Complaint Type'] == t]['elapsed_time_sec'])

# Plot histograms of log-transformed times
for t in df['Complaint Type'].unique():
    df_ct[t].hist()
    plt.title('Log-Transformed Response Time by Complaint Type')
    plt.xlabel(t)
    plt.ylabel('Frequency')
    plt.show()

In [None]:
# -----------------------------
# Step 7: Average response time by city
# -----------------------------
for c in df['City'].unique():
    print(c)
    print(df[df['City'] == c]['elapsed_time_sec'].mean())