# README

- **Author**: Ying-Dian Lin
- **Created At**: 2025-10-15
- **Last Modified At**: 2025-11-04

---

## What does this file do?

- This file reads the preprocessed NYC daytime speed averages by borough and visualizes the trends over time.
- It creates a line plot comparing daily average speeds for each borough.

---

## What does this file take?

- **Input File**:  
  - /data/raw/NYC_daytime_avg_by_borough.csv  
Description: Contains average speeds per day for each borough.  

---

## What does this file output?

- **Output Image**:  
  - data/temp/img/NYC_borough_speed_trends.png  
Description: A line chart of average speed trends.  


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# === 1. Setup directory ===
os.chdir(os.path.dirname(os.path.abspath(__file__)))

# === 2. Read the combined daily average file ===
file = "NYC_daytime_avg_by_borough.csv"
df = pd.read_csv(file, encoding_errors="ignore")

# Convert DATE to datetime for proper time plotting
df['DATE'] = pd.to_datetime(df['DATE'], errors='coerce')
df = df.dropna(subset=['DATE']).sort_values('DATE')

# === 3. Define boroughs (ensure columns exist) ===
boroughs = ["Manhattan", "Staten Island", "Brooklyn", "Bronx", "Queens", "Manhaton", "Staten_Island"]
boroughs = [b for b in boroughs if b in df.columns]

# === 4. Plot ===
plt.figure(figsize=(12, 6))

for b in boroughs:
    if df[b].notna().any():
        plt.plot(df['DATE'], df[b], label=b, linewidth=2)

# === 5. Format plot ===
plt.title("Daily Average Speed by Borough", fontsize=16)
plt.xlabel("Date", fontsize=12)
plt.ylabel("Average Speed (mph)", fontsize=12)
plt.legend(title="Borough", fontsize=10)
plt.grid(True, linestyle="--", alpha=0.6)
plt.tight_layout()

# === 6. Save and/or show ===
plt.savefig("NYC_borough_speed_trends.png", dpi=300)
plt.show()

print("âœ… Plot saved as 'NYC_borough_speed_trends.png' and displayed.")
print(df.head())
