## 2. Temporal Patterns

Analyze accident patterns across different time periods (hourly, daily, monthly, seasonal).


In [None]:
# Create subplots for temporal analysis
fig, axes = plt.subplots(2, 2, figsize=(20, 15))
fig.suptitle('Temporal Patterns in Accidents', fontsize=16)

# Hourly distribution
sns.histplot(data=df, x='Hour', bins=24, ax=axes[0,0])
axes[0,0].set_title('Accidents by Hour of Day')
axes[0,0].set_xlabel('Hour')

# Daily distribution
daily_accidents = df.groupby('DayOfWeek').size()
sns.barplot(x=daily_accidents.index, y=daily_accidents.values, ax=axes[0,1])
axes[0,1].set_title('Accidents by Day of Week')
axes[0,1].set_xticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])

# Monthly distribution
monthly_accidents = df.groupby('Month').size()
sns.barplot(x=monthly_accidents.index, y=monthly_accidents.values, ax=axes[1,0])
axes[1,0].set_title('Accidents by Month')
axes[1,0].set_xticklabels(['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
                          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])

# Yearly trend
yearly_accidents = df.groupby('Year').size()
sns.barplot(x=yearly_accidents.index, y=yearly_accidents.values, ax=axes[1,1])
axes[1,1].set_title('Accidents by Year')

plt.tight_layout()
plt.show()

# Time series analysis
daily_counts = df.groupby('Start_Time').size().reset_index()
daily_counts.columns = ['Date', 'Count']
daily_counts.set_index('Date', inplace=True)

plt.figure(figsize=(15, 6))
daily_counts.plot()
plt.title('Daily Accident Counts Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Accidents')
plt.show()

## 3. Geographical Distribution

Analyze the spatial distribution of accidents using maps and location-based visualizations.


In [None]:
# Create a base map centered on US
import folium
from folium import plugins

# Sample the data if it's too large (for performance)
sample_size = min(10000, len(df))
df_sample = df.sample(n=sample_size, random_state=42)

# Create base map
base_map = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add heatmap layer
heat_data = [[row['Start_Lat'], row['Start_Lng']] for idx, row in df_sample.iterrows()]
plugins.HeatMap(heat_data).add_to(base_map)

# Display map
display(base_map)

# State-wise analysis
plt.figure(figsize=(15, 6))
state_counts = df['State'].value_counts()
sns.barplot(x=state_counts.head(10).index, y=state_counts.head(10).values)
plt.title('Top 10 States by Number of Accidents')
plt.xticks(rotation=45)
plt.xlabel('State')
plt.ylabel('Number of Accidents')
plt.show()

# Create a severity map
severity_map = folium.Map(location=[37.0902, -95.7129], zoom_start=4)

# Add markers colored by severity
for idx, row in df_sample.iterrows():
    color = ['green', 'yellow', 'orange', 'red'][row['Severity']-1]
    folium.CircleMarker(
        location=[row['Start_Lat'], row['Start_Lng']],
        radius=3,
        color=color,
        fill=True
    ).add_to(severity_map)

display(severity_map)

## 4. Weather Analysis

Analyze the relationship between weather conditions and accidents.

# Weather condition distribution
plt.figure(figsize=(15, 6))
weather_counts = df['Weather_Condition'].value_counts().head(10)
sns.barplot(x=weather_counts.index, y=weather_counts.values)
plt.title('Top 10 Weather Conditions in Accidents')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

# Weather vs Severity
plt.figure(figsize=(15, 6))
weather_severity = pd.crosstab(df['Weather_Condition'], df['Severity'])
weather_severity.head(10).plot(kind='bar', stacked=True)
plt.title('Weather Conditions vs Severity')
plt.xlabel('Weather Condition')
plt.ylabel('Number of Accidents')
plt.legend(title='Severity')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

# Weather conditions by time of day
plt.figure(figsize=(15, 6))
weather_hour = pd.crosstab(df['Hour'], df['Weather_Condition'])
weather_hour[weather_hour.sum().nlargest(5).index].plot(kind='line', marker='o')
plt.title('Top 5 Weather Conditions by Hour of Day')
plt.xlabel('Hour')
plt.ylabel('Number of Accidents')
plt.legend(title='Weather Condition', bbox_to_anchor=(1.05, 1))
plt.tight_layout()
plt.show()

## 5. Severity Analysis

Analyze factors affecting accident severity and their relationships.

# Severity distribution over time
plt.figure(figsize=(15, 6))
severity_hour = pd.crosstab(df['Hour'], df['Severity'])
severity_hour.plot(kind='line', marker='o')
plt.title('Accident Severity by Hour of Day')
plt.xlabel('Hour')
plt.ylabel('Number of Accidents')
plt.legend(title='Severity')
plt.show()

# Correlation analysis
numeric_cols = ['Severity', 'Duration', 'Hour', 'Temperature(F)', 
                'Humidity(%)', 'Pressure(in)', 'Visibility(mi)', 
                'Wind_Speed(mph)']
correlation = df[numeric_cols].corr()

plt.figure(figsize=(12, 8))
sns.heatmap(correlation, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix of Numeric Variables')
plt.tight_layout()
plt.show()

# Severity by day/night
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='Severity', hue='Sunrise_Sunset')
plt.title('Accident Severity by Time of Day')
plt.xlabel('Severity')
plt.ylabel('Number of Accidents')
plt.legend(title='Time of Day')
plt.show()

# Create a summary dashboard
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Severity Distribution', 'Severity by Weather',
                   'Severity by Time of Day', 'Severity by State')
)

# Add severity distribution
severity_counts = df['Severity'].value_counts().sort_index()
fig.add_trace(
    go.Bar(x=severity_counts.index, y=severity_counts.values, name='Overall'),
    row=1, col=1
)

# Add severity by weather
weather_sev = pd.crosstab(df['Weather_Condition'], df['Severity']).head(5)
for severity in range(1, 5):
    fig.add_trace(
        go.Bar(x=weather_sev.index, y=weather_sev[severity], name=f'Severity {severity}'),
        row=1, col=2
    )

# Add severity by hour
hourly_sev = pd.crosstab(df['Hour'], df['Severity'])
for severity in range(1, 5):
    fig.add_trace(
        go.Scatter(x=hourly_sev.index, y=hourly_sev[severity], 
                  name=f'Severity {severity}', mode='lines'),
        row=2, col=1
    )

# Add severity by state
state_sev = pd.crosstab(df['State'], df['Severity']).head(5)
for severity in range(1, 5):
    fig.add_trace(
        go.Bar(x=state_sev.index, y=state_sev[severity], name=f'Severity {severity}'),
        row=2, col=2
    )

fig.update_layout(height=800, title_text="Accident Severity Analysis Dashboard")
fig.show()