In [None]:
# The imports
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
!pip install altair
import altair as alt
alt.renderers.enable('mimetype')
!pip install ipywidgets



In [None]:
# Load data : 311 Requests for 2024
request_df = pd.read_csv("data/311_request_2024.csv")

# Show the first ten rows
request_df.head(n=10)

# **Matplotlib chart**


In [None]:
# make sure the date format is correct
request_df['requested_datetime'] = pd.to_datetime(request_df['requested_datetime'])

# Top 5 service requests
top_services = request_df['service_name'].value_counts().head(5).index

# Filter and group the data
grouped = (
    request_df[request_df['service_name'].isin(top_services)]
    .groupby([pd.Grouper(key='requested_datetime', freq='M'), 'service_name'])
    .size()
    .reset_index(name='count')
)


In [None]:
# Plotting
plt.figure(figsize=(12,6))
for service in top_services:
    subset = grouped[grouped['service_name'] == service]
    plt.plot(subset['requested_datetime'], subset['count'], marker='o', linewidth=1, label=service)

plt.title("Monthly Requests by Top 5 Service Types (2024)", fontsize=16)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Number of Requests", fontsize=12)
plt.legend(title="Service Type")
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

**Conclusion**:This line chart shows the monthly trends of the Top 5 request types in 2024.  
1.**Information Request** overwhelmingly dominates, with 25,000–35,000 requests per month, peaking in November and dropping sharply in December.  
2.**Seasonal differences**: Maintenance Complaint and Rubbish/Recyclable Collection are slightly higher during summer (May–August), likely linked to seasonal factors.  
3.**Overall trend**: Except for Information Request, other request types remain relatively stable with modest fluctuations.  

Since time is a continuous variable and request counts are numeric, using line plots in Matplotlib clearly reveals fluctuations and overall trends.  

# **Seaborn chart**

In [None]:
# make sure the date format is correct
request_df['requested_datetime'] = pd.to_datetime(request_df['requested_datetime'])

# add new columns
request_df['day_of_week'] = request_df['requested_datetime'].dt.day_name()
request_df['hour'] = request_df['requested_datetime'].dt.hour

# create pivot table: day_of_week × hour, count number of requests
pivot = request_df.pivot_table(
    index='day_of_week',
    columns='hour',
    values='service_name',
    aggfunc='count'
)

# fill NaN values with 0
pivot_filled = pivot.fillna(0)

In [None]:

# logarithmic scale to better show low-volume requests at night
from matplotlib.colors import LogNorm

days_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
pivot_filled = pivot_filled.reindex(days_order)

# Plotting the heatmap
plt.figure(figsize=(16,6))
sns.heatmap(
    pivot_filled, 
    cmap="rocket",          
    linewidths=0.5,           
    annot=True,              
    fmt="d",                 
    annot_kws={"color":"black", "size":8},  
    cbar_kws={'label': 'Number of Requests'},
    norm=LogNorm(vmin=1, vmax=pivot_filled.max().max())  # 对数刻度，保留夜间少量请求信息
)

plt.title("311 Requests Heatmap by Day of Week and Hour (2024)", fontsize=16)
plt.xlabel("Hour of Day", fontsize=12)
plt.ylabel("Day of Week", fontsize=12)
plt.show()



**Conclusion**:
1.Requests peak during the daytime, especially between 12:00 and 18:00, while midnight and early morning hours see very few requests. 
2.Weekday requests are much higher than weekends, showing that requests are concentrated during office hours.  

Seaborn’s `heatmap` is ideal for visualizing two-dimensional data. The color intensity highlights the request volume, making cross-time patterns more intuitive than line or bar charts.  

# **Altair1**

In [None]:
alt.data_transformers.disable_max_rows()

In [None]:
# Ensure datetime format
request_df["requested_datetime"] = pd.to_datetime(request_df["requested_datetime"])
request_df["closed_datetime"] = pd.to_datetime(request_df["closed_datetime"])

# Calculate response time
request_df["response_time_hours"] = (
    (request_df["closed_datetime"] - request_df["requested_datetime"]).dt.total_seconds() / 3600
)

# Select Top 6 service types (fetch more because I will drop 'Information Request')
top_services = (
    request_df["service_name"]
    .value_counts()
    .head(6)
    .index
    .tolist()
)

# Remove 'Information Request' and keep top 5
top_services = [s for s in top_services if s != "Information Request"][:5]

# Define brush for interactivity
brush = alt.selection_interval(encodings=['x'])

# Build faceted histogram
chart1 = (
    alt.Chart(request_df)
    .transform_filter(alt.FieldOneOfPredicate(field="service_name", oneOf=top_services))
    .transform_filter(alt.datum.response_time_hours < 1000)   # remove extreme outliers
    .mark_bar()
    .encode(
        x=alt.X("response_time_hours:Q", 
                bin=alt.Bin(maxbins=40), 
                title="Response Time (hours, binned)"),
        y=alt.Y("count()", title="Count of Requests"),
        color=alt.condition(brush, "service_name:N", alt.value("lightgray")),
        tooltip=["service_name", "count()"]
    )
    .add_params(brush)
    .properties(width=200, height=200)
    .facet(column="service_name:N")  
)

chart1


# **Altair2**

In [None]:


# Calculate Top 10 service types (by request count)
top_services = (
    request_df["service_name"]
    .value_counts()
    .head(10)
    .index
    .tolist()
)

# Define brush for interactivity
brush = alt.selection_interval(encodings=['x'])

# Average response time bar chart
chart2 = (
    alt.Chart(request_df)
    .transform_filter(alt.FieldOneOfPredicate(field="service_name", oneOf=top_services))
    .transform_filter(alt.datum.response_time_hours < 1000) 
    .mark_bar()
    .encode(
        x=alt.X("service_name:N", sort='-y', title="Service Type"),
        y=alt.Y("mean(response_time_hours):Q", title="Average Response Time (hours)"),
        color=alt.condition(brush, "service_name:N", alt.value("lightgray")),
        tooltip=["service_name", "mean(response_time_hours)"]
    )
    .add_params(brush)
    .properties(width=400, height=300)
)

chart2


# **Altair3**

In [None]:

# Calculate Top 10 ZIP codes (by request count)
top_zips = (
    request_df
    .loc[request_df["zipcode"].notna(), "zipcode"]
    .value_counts()
    .head(10)
    .index
    .tolist()
)

# Define brush for interactivity
brush = alt.selection_interval(encodings=['y'])

# Horizontal bar chart: ZIP Code × Count of Requests
chart3 = (
    alt.Chart(request_df)
    .transform_filter(alt.datum.zipcode != None)  # 去掉缺失
    .transform_filter(alt.FieldOneOfPredicate(field="zipcode", oneOf=top_zips))
    .mark_bar()
    .encode(
        y=alt.Y("zipcode:N", sort='-x', title="ZIP Code"),
        x=alt.X("count():Q", title="Count of Requests"),
        color=alt.condition(brush, alt.value("#4C78A8"), alt.value("lightgray")),
        tooltip=["zipcode:N", "count():Q"]
    )
    .add_params(brush)
    .properties(width=520, height=alt.Step(24))
)

chart3


# **Dashboard**

In [None]:


# Ensure datetime format
request_df['requested_datetime'] = pd.to_datetime(request_df['requested_datetime'])
request_df['closed_datetime'] = pd.to_datetime(request_df['closed_datetime'])

# Remove requests that are not closed yet
request_df = request_df.dropna(subset=['closed_datetime'])

# Calculate response time (hours)
request_df['response_hours'] = (request_df['closed_datetime'] - request_df['requested_datetime']).dt.total_seconds() / 3600

# filter out extreme values (>1000 hours ≈ 41 days)
filtered_df = request_df[request_df['response_hours'] < 1000].copy()

# Only keep Top 10 service types
top_types = (
    filtered_df['service_name']
    .value_counts()
    .nlargest(10)
    .index
)
filtered_df = filtered_df[filtered_df['service_name'].isin(top_types)]

# Define brush for interactivity
brush = alt.selection_interval(encodings=['y'])

# bar chart (Top 10 service types)
bar = alt.Chart(filtered_df).mark_bar().encode(
    y=alt.Y('service_name:N', sort='-x', title='Top 10 Request Types'),
    x=alt.X('count():Q', title='Count of Requests'),
    color=alt.condition(brush, 'service_name:N', alt.value('lightgray'))
).add_params(
    brush
).properties(
    width=400,
    height=250,
    title='Top 10 Request Types (brush to filter)'
)

# scatter plot (Response Time vs Requested Date)
scatter = alt.Chart(filtered_df).mark_point(size=1, opacity=0.3).encode(
    x=alt.X('requested_datetime:T', title='Requested Date'),
    y=alt.Y('response_hours:Q', title='Response Time (hours)'),
    color='service_name:N',
    tooltip=['service_name', 'requested_datetime', 'response_hours']
).transform_filter(
    brush
).properties(
    width=400,
    height=300,
    title='Response Time of Brushed Request Types (<1000 hours)'
)

# Combine bar and scatter plots
chart4 = bar & scatter
chart4
