<a href="https://colab.research.google.com/github/SaquibKhan-DS/311-Customer-Service-Optimization/blob/main/notebooks/04_business_insights_recommendations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 04_business_insights_recommendations.ipynb

# -----------------------------
# Notebook 04: Business Insights & Recommendations
# -----------------------------
# This notebook applies statistical tests, explores correlations,
# and provides actionable insights from the 311 service request data.
# -----------------------------

# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
# -----------------------------
# Step 1: Load dataset
# -----------------------------
df = pd.read_csv('/kaggle/input/311-service-requests-nyc/311_Service_Requests_from_2010_to_Present.csv')

# Convert date columns
df['created_dt'] = pd.to_datetime(df['Created Date'], errors='coerce')
df['closed_dt'] = pd.to_datetime(df['Closed Date'], errors='coerce')

# Filter for valid Closed Dates
df = df[df['Closed Date'].notna()]

# Calculate elapsed time
df['elapsed_time'] = df['closed_dt'] - df['created_dt']
df['elapsed_time_sec'] = df['elapsed_time'] / np.timedelta64(1, 's')

In [None]:
# -----------------------------
# Step 2: Prepare dataset for correlation analysis
# -----------------------------
df_loc = df[['Complaint Type', 'Location', 'City', 'Borough']]

# Convert object columns to numerical codes for correlation
cat_columns = df_loc.describe(include="O").columns
for col in cat_columns:
    df_loc[col] = df_loc[col].astype("category").cat.codes

# Pearson correlation
cor = df_loc.corr(method='pearson')
print("Correlation Matrix:")
print(cor)

In [None]:
# -----------------------------
# Step 3: Chi-square test of independence
# -----------------------------
# Contingency table of complaint type vs city
df_ct = pd.crosstab(df['Complaint Type'], df['City'], margins=True)

chi2, p, dof, ex = stats.chi2_contingency(df_ct)
print(f"Chi-square value: {chi2}")
print(f"P-value: {p}")
print(f"Degrees of freedom: {dof}")
print("Expected frequencies table:")
print(ex)

# Insight: If p-value < 0.05, Complaint Type and City are dependent
if p < 0.05:
    print("Result: Complaint Type and City are NOT independent.")
else:
    print("Result: Complaint Type and City are independent.")

In [None]:
# -----------------------------
# Step 4: Kruskal-Wallis test for comparing response times
# -----------------------------
# Create list of elapsed_time_sec arrays grouped by complaint type
l = []
for t in df['Complaint Type'].unique():
    l.append(df[df['Complaint Type'] == t]['elapsed_time_sec'].values)

# Perform test
kruskal_result = stats.kruskal(*l)
print("Kruskal-Wallis Test Result:")
print(kruskal_result)

# Insight: If p-value < 0.05, average response times differ significantly across complaint types

In [None]:
# -----------------------------
# Step 5: Summary & Recommendations
# -----------------------------
# These should be adapted based on the statistical results
print("\n--- Business Insights ---")
print("1. Significant dependency exists between Complaint Type and City.")
print("2. Average response times vary significantly by complaint type (confirmed by Kruskal-Wallis).")
print("3. Certain complaint categories take disproportionately long to resolve, indicating process bottlenecks.")
print("4. Resource allocation can be optimized by focusing on top complaint types in high-volume cities.")

print("\n--- Recommendations ---")
print("• Prioritize high-volume complaint types in cities with the slowest average response times.")
print("• Investigate process improvements for complaint types with the largest delays.")
print("• Deploy targeted awareness campaigns in boroughs with recurring complaints to reduce load.")