# Vehicle Access & Transit Proximity Analysis

## Questions to Answer:
1. Vehicle ownership by neighborhood
2. Transit access by zip code
3. Commute time by city or county

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Load cleaned data
df = pd.read_csv('../data/cleaned/clean_transportation.csv')
print(df.head())

In [None]:
# Calculate commute time statistics by city/county
commute_by_region = df.groupby('city').agg({
    'commute_time': ['mean', 'median', 'std', 'min', 'max', 'count'],
    'income': 'mean',
    'vehicle_access': 'mean'
}).round(2)
commute_by_region.columns = ['avg_commute', 'median_commute', 'std_commute', 'min_commute', 'max_commute', 'population', 'avg_income', 'vehicle_ownership_rate']
commute_by_region = commute_by_region.sort_values('avg_commute', ascending=False)
print(commute_by_region)

# Visualize: Average commute time by city
plt.figure(figsize=(12, 6))
commute_by_region['avg_commute'].plot(kind='barh', color='coral')
plt.xlabel('Average Commute Time (minutes)')
plt.ylabel('City/County')
plt.title('Average Commute Time by Geographic Region')
plt.tight_layout()
plt.savefig('../visuals/charts/commute_time_by_city.png', dpi=300)
plt.show()

# Visualize: Commute time vs income by region
plt.figure(figsize=(12, 6))
plt.scatter(commute_by_region['avg_income'], commute_by_region['avg_commute'], s=commute_by_region['population']*2, alpha=0.6)
for idx, region in enumerate(commute_by_region.index):
    plt.annotate(region, (commute_by_region.loc[region, 'avg_income'], commute_by_region.loc[region, 'avg_commute']))
plt.xlabel('Average Income ($)')
plt.ylabel('Average Commute Time (minutes)')
plt.title('Income vs Commute Time by Region\n(bubble size = population)')
plt.tight_layout()
plt.savefig('../visuals/charts/income_vs_commute_by_region.png', dpi=300)
plt.show()

## Analysis 3: Commute Time by City or County

In [None]:
# Calculate transit access metrics by zip code
transit_by_zip = df.groupby('zip_code').agg({
    'distance_to_transit': ['mean', 'median', 'min', 'max'],
    'transit_access': ['mean', 'sum', 'count']
}).round(2)
transit_by_zip.columns = ['avg_distance', 'median_distance', 'min_distance', 'max_distance', 'access_rate', 'total_access', 'population']
transit_by_zip = transit_by_zip.sort_values('avg_distance', ascending=False)
print(transit_by_zip)

# Visualize: Average distance to transit by zip code
plt.figure(figsize=(12, 8))
transit_by_zip['avg_distance'].plot(kind='barh', color='green')
plt.xlabel('Average Distance to Nearest Transit Stop')
plt.ylabel('Zip Code')
plt.title('Transit Access by Zip Code')
plt.tight_layout()
plt.savefig('../visuals/maps/transit_access_by_zip.png', dpi=300)
plt.show()

# Visualize: Transit access rate by zip code
plt.figure(figsize=(12, 8))
transit_by_zip['access_rate'].plot(kind='barh', color='purple')
plt.xlabel('Transit Access Rate')
plt.ylabel('Zip Code')
plt.title('Population with Good Transit Access by Zip Code')
plt.tight_layout()
plt.savefig('../visuals/maps/transit_access_rate_by_zip.png', dpi=300)
plt.show()

## Analysis 2: Transit Access by Zip Code

In [None]:
# Calculate vehicle ownership rate by neighborhood
vehicle_ownership = df.groupby('neighborhood').agg({
    'vehicle_access': ['mean', 'sum', 'count']
}).round(3)
vehicle_ownership.columns = ['ownership_rate', 'total_with_vehicle', 'population']
vehicle_ownership = vehicle_ownership.sort_values('ownership_rate', ascending=False)
print(vehicle_ownership)

# Visualize
plt.figure(figsize=(12, 6))
vehicle_ownership['ownership_rate'].plot(kind='barh', color='steelblue')
plt.xlabel('Vehicle Ownership Rate')
plt.ylabel('Neighborhood')
plt.title('Vehicle Ownership by Neighborhood')
plt.axvline(df['vehicle_access'].mean(), color='red', linestyle='--', linewidth=2, label='City Average')
plt.legend()
plt.tight_layout()
plt.savefig('../visuals/charts/vehicle_ownership_by_neighborhood.png', dpi=300)
plt.show()

## Analysis 1: Vehicle Ownership by Neighborhood