In [2]:
import matplotlib.ticker as ticker
import pandas as pd

df=pd.read_csv("US_Accidents_March23.csv")
incident_counts_by_time = df['Start_Time'].dt.hour.value_counts().sort_index()
incident_counts_by_day = df['Start_Time'].dt.day_name().value_counts().sort_index()
incident_counts_by_week = df['Start_Time'].dt.isocalendar().week.value_counts().sort_index()
incident_counts_by_month = df['Start_Time'].dt.month_name().value_counts().sort_index()
incident_counts_by_year = df['Start_Time'].dt.year.value_counts().sort_index()

fig, axs = plt.subplots(3, 2, figsize=(15, 15))

axs[0, 0].bar(incident_counts_by_time.index, incident_counts_by_time.values, color='skyblue')
axs[0, 0].set_title('Number of Incidents by Hour of the Day', fontsize=14)
axs[0, 0].set_xlabel('Hour of the Day', fontsize=12)
axs[0, 0].set_ylabel('Number of Incidents', fontsize=12)

axs[0, 1].bar(incident_counts_by_day.index, incident_counts_by_day.values, color='lightgreen')
axs[0, 1].set_title('Number of Incidents by Day of the Week', fontsize=14)
axs[0, 1].set_xlabel('Day of the Week', fontsize=12)
axs[0, 1].set_ylabel('Number of Incidents', fontsize=12)

axs[1, 0].bar(incident_counts_by_week.index.astype(int), incident_counts_by_week.values, color='salmon')
axs[1, 0].set_title('Number of Incidents by Week of the Year', fontsize=14)
axs[1, 0].set_xlabel('Week of the Year', fontsize=12)
axs[1, 0].set_ylabel('Number of Incidents', fontsize=12)

axs[1, 1].bar(incident_counts_by_month.index, incident_counts_by_month.values, color='lightcoral')
axs[1, 1].set_title('Number of Incidents by Month', fontsize=14)
axs[1, 1].set_xlabel('Month', fontsize=12) 
axs[1, 1].set_ylabel('Number of Incidents', fontsize=12)
axs[1, 1].tick_params(axis='x', labelrotation=90)

axs[2, 0].bar(incident_counts_by_year.index, incident_counts_by_year.values, color='lightblue')
axs[2, 0].set_title('Number of Incidents by Year', fontsize=14)
axs[2, 0].set_xlabel('Year', fontsize=12)
axs[2, 0].set_ylabel('Number of Incidents', fontsize=12)

axs[2, 1].axis('off')

for ax in axs.flat:
    ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))

plt.tight_layout()
plt.show()

MemoryError: Unable to allocate 128. KiB for an array with shape (16384,) and data type int64

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import io
import base64
from datetime import datetime
from matplotlib.ticker import MaxNLocator

# --- Load & preprocess data ---
df = pd.read_csv('US_Accidents_March23.csv')
df = df.dropna(subset=['State', 'Severity', 'Start_Time'])
df = df[df['Start_Time'] >= pd.Timestamp.now() - pd.DateOffset(years=5)]

top_states = df.groupby('State')['Severity'].mean().sort_values(ascending=False).head(5).index
df_top = df[df['State'].isin(top_states)]
df_top['YearMonth'] = df_top['Start_Time'].dt.to_period('M').astype(str)
df_top['Year'] = df_top['Start_Time'].dt.year
df_top['Month'] = df_top['Start_Time'].dt.month

# --- Utility to save matplotlib figure to base64 ---
def fig_to_base64(fig):
    buf = io.BytesIO()
    fig.savefig(buf, format="png", bbox_inches='tight')
    buf.seek(0)
    encoded = base64.b64encode(buf.read()).decode('utf-8')
    plt.close(fig)
    return encoded

# --- Plot 1: Bar Plot of Average Severity ---
bar_fig, ax = plt.subplots(figsize=(8, 5))
sns.barplot(x=top_states, y=[df_top[df_top['State'] == s]['Severity'].mean() for s in top_states], palette='Reds', ax=ax)
ax.set_title('Top 5 States by Average Congestion Severity')
bar_img = fig_to_base64(bar_fig)

# --- Plot 2: Line Plot - Monthly Severity Trend ---
trend = df_top.groupby(['YearMonth', 'State'])['Severity'].mean().unstack()
line_fig, ax = plt.subplots(figsize=(14, 6))
trend.plot(ax=ax, marker='o')
ax.set_title('Monthly Average Congestion Severity (Last 5 Years)')
ax.set_xlabel('Month-Year')
ax.set_ylabel('Average Severity')
line_img = fig_to_base64(line_fig)

# --- Plot 3: Heatmaps per State ---
heatmap_imgs = []
for state in top_states:
    heatmap_data = df_top[df_top['State'] == state].pivot_table(index='Year', columns='Month', values='Severity', aggfunc='mean')
    heatmap_fig, ax = plt.subplots(figsize=(8, 4))
    sns.heatmap(heatmap_data, cmap='Reds', annot=True, fmt='.1f', ax=ax, cbar_kws={'label': 'Avg Severity'})
    ax.set_title(f'Heatmap of Severity in {state}')
    heatmap_imgs.append(fig_to_base64(heatmap_fig))

# --- Plot 4: Box Plot per State-Year ---
box_fig, ax = plt.subplots(figsize=(12, 6))
sns.boxplot(data=df_top, x='Year', y='Severity', hue='State', ax=ax)
ax.set_title('Yearly Congestion Severity Distribution by State')
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
box_img = fig_to_base64(box_fig)

# --- Plot 5: Facet Line Chart ---
facet = sns.relplot(data=df_top, x='Start_Time', y='Severity', kind='line', col='State', col_wrap=3, height=4, ci=None)
facet.fig.suptitle('Congestion Trends in Top States Over Time', y=1.02)
facet_img = fig_to_base64(facet.fig)

# --- Plot 6: Animated Plotly Chart ---
animated_df = df_top.groupby(['YearMonth', 'State'])['Severity'].mean().reset_index()
fig_plotly = px.line(animated_df, x='YearMonth', y='Severity', color='State', title='Animated Congestion Trend')
plotly_html = fig_plotly.to_html(full_html=False, include_plotlyjs='cdn')

MemoryError: Unable to allocate 128. KiB for an array with shape (16384,) and data type int64