In [None]:
import pandas as pd
import numpy as np
import re
import requests
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from pathlib import Path
import nbformat
import json
import pathlib

In [None]:
# Loads the three uploaded notebooks to inspect their content
notebook_paths = [
    "/mnt/data/Capstone 3.ipynb",
    "/mnt/data/Capstone1.ipynb",
    "/mnt/data/Capstone2.ipynb"
]

notebooks_content = {}

for path in notebook_paths:
    try:
        with open(path, "r", encoding="utf-8") as f:
            nb = nbformat.read(f, as_version=4)
            # Capture all markdown cell sources and first 1000 chars of code cells for review
            cells_preview = []
            for cell in nb.cells:
                if cell.cell_type == "markdown":
                    cells_preview.append(("markdown", cell.source))
                elif cell.cell_type == "code":
                    snippet = cell.source[:1000]  # only first 1000 characters
                    cells_preview.append(("code", snippet))
            notebooks_content[path] = cells_preview
    except Exception as e:
        notebooks_content[path] = str(e)

notebooks_content.keys()

In [None]:
# downloads yearly totals from ArcGIS and writes a small CSV for Capstone3
BASE = "https://services2.arcgis.com/nf3p7v7Zy4fTOh6M/ArcGIS/rest/services/Traffic_Points/FeatureServer/0/query"

params = {
    "where": "COUNTY='DAVIDSON' AND AADT_YEAR BETWEEN 2013 AND 2023",
    "outFields": "AADT_YEAR",
    "groupByFieldsForStatistics": "AADT_YEAR",
    "outStatistics": json.dumps([{
        "statisticType": "sum",
        "onStatisticField": "AADT",
        "outStatisticFieldName": "aadt_total"
    }]),
    "orderByFields": "AADT_YEAR",
    "returnGeometry": "false",
    "f": "json",
}

r = requests.get(BASE, params=params, timeout=60)
r.raise_for_status()
data = r.json()

rows = [f["attributes"] for f in data.get("features", [])]
df = pd.DataFrame(rows).rename(columns={"AADT_YEAR": "year"}).sort_values("year")

out = Path("outputs"); out.mkdir(parents=True, exist_ok=True)
csv_path = out / "aadt_davidson_year_totals.csv"
df.to_csv(csv_path, index=False)
print("saved:", csv_path)
df

In [None]:
# downloads raw point rows with pagination; writes detailed points and year totals
BASE = "https://services2.arcgis.com/nf3p7v7Zy4fTOh6M/ArcGIS/rest/services/Traffic_Points/FeatureServer/0/query"

OUT_DIR = Path("data_raw/tdot_history"); OUT_DIR.mkdir(parents=True, exist_ok=True)
OUTS    = Path("outputs");              OUTS.mkdir(parents=True, exist_ok=True)

params = {
    "where": "COUNTY='DAVIDSON' AND AADT_YEAR BETWEEN 2013 AND 2023",
    "outFields": "LOCAL_ID,COUNTY,AADT_YEAR,AADT",
    "orderByFields": "OBJECTID",
    "returnGeometry": "false",
    "f": "json",
    "resultRecordCount": 2000,
    "resultOffset": 0,
}

all_rows = []
while True:
    r = requests.get(BASE, params=params, timeout=60)
    r.raise_for_status()
    data = r.json()

    feats = data.get("features", [])
    rows = [f.get("attributes") or f.get("properties") or {} for f in feats]
    all_rows.extend(rows)
    print(f"fetched {len(rows)} rows at offset {params['resultOffset']}")

    if len(rows) < params["resultRecordCount"]:
        break
    params["resultOffset"] += params["resultRecordCount"]

points = pd.DataFrame(all_rows)
if points.empty:
    raise ValueError("no rows returned. check the where/outFields parameters.")

# guarantees expected columns and types
if "AADT_YEAR" in points.columns and "year" not in points.columns:
    points = points.rename(columns={"AADT_YEAR": "year"})

if "year" not in points.columns:
    raise ValueError(f"'year' column missing. columns: {list(points.columns)[:20]}")

points["year"] = pd.to_numeric(points["year"], errors="coerce")
points["AADT"] = pd.to_numeric(points["AADT"], errors="coerce")
points = points.dropna(subset=["year","AADT"]).sort_values("year")

# writes detailed points and annual totals
points_csv = OUT_DIR / "tdot_aadt_points_davidson_2013_2023.csv"
points.to_csv(points_csv, index=False)
print("saved points:", points_csv, "| rows:", len(points))

year_totals = (points.groupby("year", as_index=False)["AADT"]
                      .sum()
                      .rename(columns={"AADT": "aadt_total"})
                      .sort_values("year"))
totals_csv = OUTS / "aadt_davidson_year_totals.csv"
year_totals.to_csv(totals_csv, index=False)
print("saved year totals:", totals_csv)
year_totals

In [None]:
# verifies outputs and normalizes columns if needed
OUTS = Path("outputs")
RAW  = Path("data_raw/tdot_history")

totals_csv = OUTS / "aadt_davidson_year_totals.csv"
points_csv = RAW  / "tdot_aadt_points_davidson_2013_2023.csv"

assert totals_csv.exists(), f"Missing {totals_csv}. Re-run a downloader cell."

df_tot = pd.read_csv(totals_csv)

# normalizes column names if the service returned unexpected headers
if "AADT_YEAR" in df_tot.columns and "year" not in df_tot.columns:
    df_tot = df_tot.rename(columns={"AADT_YEAR": "year"})
if "aadt_total" not in df_tot.columns and "AADT" in df_tot.columns:
    df_tot = (df_tot.groupby("year", as_index=False)["AADT"]
                     .sum()
                     .rename(columns={"AADT": "aadt_total"}))

# coerces types and sorts
df_tot["year"] = pd.to_numeric(df_tot["year"], errors="coerce")
df_tot["aadt_total"] = pd.to_numeric(df_tot["aadt_total"], errors="coerce")
df_tot = (df_tot.dropna(subset=["year","aadt_total"])
                .astype({"year": int})
                .sort_values("year")
                .reset_index(drop=True))

display(df_tot)

# coverage check for ACS alignment
need = list(range(2013, 2024))
have = df_tot["year"].tolist()
print("Years present:", have)
print("Years missing (need 2013–2023):", [y for y in need if y not in have])

# writes back the normalized totals to ensure Capstone3 consumes the right schema
df_tot.to_csv(totals_csv, index=False)
print("Normalized + saved:", totals_csv)