## Project Explanation

This project aims to visualize the relationship between subsidized housing locations and the DART public transit network in Dallas. The goal is to understand how accessible subsidized housing is via public transportation, which can inform urban planning and policy decisions.

The map displays:
- **DART Train Network:** Represented by lines and circles indicating stops. The grey shaded area shows a buffer around the train stops, representing the estimated service area.
- **Subsidized Housing Projects:** Shown as red circles. The size of each circle is scaled to represent the population of the housing project.

By visualizing these layers together, we can identify areas with high concentrations of subsidized housing that are well-served by public transit, as well as areas that may have limited access.

In [None]:
!pip install folium
!pip install geopandas
!pip install pandas
!pip install folium.plugins
!pip install branca
!pip install shapely
!pip install leafmap

[31mERROR: Could not find a version that satisfies the requirement folium.plugins (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for folium.plugins[0m[31m
Collecting leafmap
  Downloading leafmap-0.55.0-py2.py3-none-any.whl.metadata (17 kB)
Collecting duckdb>=1.4.1 (from leafmap)
  Downloading duckdb-1.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (14 kB)
Collecting geojson (from leafmap)
  Downloading geojson-3.2.0-py3-none-any.whl.metadata (16 kB)
Collecting ipyvuetify (from leafmap)
  Downloading ipyvuetify-1.11.3-py2.py3-none-any.whl.metadata (7.5 kB)
Collecting maplibre (from leafmap)
  Downloading maplibre-0.3.5-py3-none-any.whl.metadata (4.0 kB)
Collecting pystac-client (from leafmap)
  Downloading pystac_client-0.9.0-py3-none-any.whl.metadata (3.1 kB)
Collecting whiteboxgui (from leafmap)
  Downloading whiteboxgui-2.3.0-py2.py3-none-any.whl.metadata (5.7 kB)
Collecting ipyvue<2,>=1.7 (from ipyvuetify->leafmap)
  Downl

In [None]:
from textwrap import fill
import folium
import pandas as pd
import math
DALLAS_LAT = 32.7767
DALLAS_LONG = -96.7970
def distance(lat1, lon1, lat2, lon2):#probably overkill
    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = math.sin(dlat/2)**2 + math.cos(lat1)*math.cos(lat2)*math.sin(dlon/2)**2
    c = 2 * math.asin(math.sqrt(a))
    R = 6371000
    return R * c
def offset_train_stop(alpha, degrees, route):
    offset_lat = alpha * (degrees) * (-1)**(route%2)
    #return offset_lat, 0 no offset routes
    return 0,0
def train_pathfind(points):
  start = max(points, key=lambda x: distance(x[0], x[1], DALLAS_LAT, DALLAS_LONG))
  unvisited = points.copy()
  unvisited.remove(start)
  sorted_points = []
  sorted_points.append(start)
  while unvisited:
    closest_point = min(unvisited, key=lambda x: distance(x[0], x[1], sorted_points[-1][0], sorted_points[-1][1]))
    unvisited.remove(closest_point)
    sorted_points.append(closest_point)
  return sorted_points
#DATASET = '/content/drive/MyDrive/Datasets/train_stops - train_stops.csv'
DATASET = '/content/drive/MyDrive/Datasets/train_stops.csv'
df_stops = pd.read_csv(DATASET)
m = folium.Map(location=[32.7767, -96.7970], zoom_start=10, tiles='CartoDB positron')
#color_map = {26777:'blue', 26778:'green', 26779: 'orange', 26780: 'red'}
color_map = {26777:'black', 26778:'black', 26779: 'black', 26780: 'black', 26782: 'black'}
df_stops['lat'] = pd.to_numeric(df_stops['lat'], errors='coerce')
df_stops['lng'] = pd.to_numeric(df_stops['lng'], errors='coerce')
df_stops.dropna(subset=['lat', 'lng'], inplace=True)
visited = {}
for route in df_stops.loc[df_stops['route_id']>= 26777, 'route_id'].unique():
  if route not in color_map:
    continue
  points = []
  for lat, lng in df_stops.loc[df_stops['route_id'] == route, ['lat', 'lng']].values:
    if (lat, lng) in visited:
      visit = visited[(lat, lng)]
      visited[(lat, lng)] = visit + 1
      offset_lat, offset_lng = offset_train_stop(visit%2 + visit, 0.0005, route)
      # if visit == 1:
      #   folium.Marker(location=[float(lat), float(lng)], popup=f'coord: {lat}, {lng}').add_to(m)
      folium.Circle(location=[float(lat + offset_lat), float(lng + offset_lng)], radius=100, color=color_map.get(route, 'gray'), fill=True, fill_color=color_map.get(route, 'gray'), fill_opacity=0.6).add_to(m) # Added radius and fill options
      points.append((lat + offset_lat, lng + offset_lng))
    else:
      folium.Circle(location=[float(lat), float(lng)], radius=100, color=color_map.get(route, 'gray'), fill=True, fill_color=color_map.get(route, 'gray'), fill_opacity=0.6).add_to(m) # Added radius and fill options
      visited[(lat, lng)] = 1
      points.append((lat, lng))
  sorted_points = train_pathfind(points)
  folium.PolyLine(sorted_points, color=color_map.get(route, 'gray'), weight=2.5, opacity=1).add_to(m)

In [None]:
import geopandas as gpd
from shapely.geometry import Point
from shapely.ops import unary_union

def create_network_shape(points, buffer_radius=3200):
    geometry = [Point(lon, lat) for lat, lon in points]
    gdf = gpd.GeoDataFrame(geometry=geometry, crs="EPSG:4326")
    gdf_projected = gdf.to_crs("EPSG:3857")  # Web Mercator
    buffers = gdf_projected.buffer(buffer_radius)
    union_geometry = unary_union(buffers)
    union_geometry_4326 = gpd.GeoSeries([union_geometry], crs=gdf_projected.crs).to_crs("EPSG:4326").iloc[0]
    network_shape = gpd.GeoDataFrame(geometry=[union_geometry_4326], crs="EPSG:4326")
    return network_shape

# Extract all unique train stop coordinates from your dataframe
all_points = []
for _, row in df_stops.iterrows():
    if pd.notna(row['lat']) and pd.notna(row['lng']):
        all_points.append((row['lat'], row['lng']))

# Remove duplicates to get unique stops only
unique_points = list(set(all_points))
print(f"Found {len(unique_points)} unique train stops")
# Create and export the network shape
if unique_points:
    network_gdf = create_network_shape(unique_points, buffer_radius=2400)
    bounds = network_gdf.bounds.iloc[0]
    area_sq_km = network_gdf.to_crs('EPSG:3857').area.iloc[0] / 1e6  # Convert to sq km
    print(f"\nNetwork coverage area: {area_sq_km:.2f} sq km")
    print(f"Bounding box: {bounds['miny']:.4f}, {bounds['minx']:.4f} to {bounds['maxy']:.4f}, {bounds['maxx']:.4f}")
    folium.GeoJson(
        network_gdf.to_json(),
        style_function=lambda x: {
            'fillColor': 'gray',
            'color': 'gray',
            'weight': 2,
            'fillOpacity': 0.2,
            'Opacity':0.2
        },
        name="DART Network Coverage",
        tooltip="DART Service Area"
    ).add_to(m)
    folium.LayerControl().add_to(m)
else:
    print("No valid train stop points found!")


Found 77 unique train stops

Network coverage area: 812.95 sq km
Bounding box: 32.6359, -97.3498 to 33.0515, -96.5418


## Map Visualization

The map above shows the DART train network and subsidized housing projects in Dallas. You can interact with the map by zooming and panning. The DART network coverage area is displayed as a semi-transparent grey layer. The red circles represent subsidized housing projects, with larger circles indicating higher populations. You can hover over the red circles to see the estimated population of each project.

In [None]:
#Adding in subsidized housing projects in dallas
import leafmap.foliumap as leafmap
import branca
def population_radius(number):
  if int(number.replace(',','')) < 100:
    return 100
  return int(number.replace(',',''))
def parse_point_string(point_string):
  coords_str = point_string.replace('POINT (', '').replace(')', '')
  lng_str, lat_str = coords_str.split()
  lat = float(lat_str)
  lng = float(lng_str)
  return lat, lng

projects_df = pd.read_csv('/content/drive/MyDrive/Datasets/housing_projects.csv')
projects_gdf = gpd.GeoDataFrame(
    projects_df,
    geometry=[Point(parse_point_string(x)[1], parse_point_string(x)[0]) for x in projects_df['the_geom']],
    crs="EPSG:4326"
)
# Perform a spatial join to find projects within the network_gdf
projects_within_network = gpd.sjoin(projects_gdf, network_gdf, how="inner", predicate="intersects")
inside_population = 0
total_population = 0
for index, row in projects_gdf.iterrows():
  lat, lng = parse_point_string(row['the_geom'])
  project_population = row['people_tot']
  if int(project_population.replace(',','')) < 0:
    continue

  total_population += int(project_population.replace(',',''))

  # Check if the current project is in the projects_within_network GeoDataFrame
  if index in projects_within_network.index:
    color = 'green'
    inside_population += int(project_population.replace(',',''))
  else:
    color = 'red'

  circle = folium.Circle(
      location=[lat, lng],
      radius=population_radius(project_population),
      color=color,
      fill=True,
      fill_color=color,
      fill_opacity=0.6,
      tooltip=f'Project Population: {population_radius(project_population)}',
      name="Subsidized Housing Projects"
  ).add_to(m)
  circle.add_to(m)

print(f'population within_range: {inside_population}')
print(f'population outside range: {total_population - inside_population}')
print(f'total population: {total_population}')
legend_html = """
{% macro html(this, kwargs) %}
<div style="position: fixed;
            bottom: 50px; left: 50px; width: 250px; max-height: 200px;
            border: 2px solid #555; z-index:9999; font-size:14px;
            background-color: white; padding: 10px; border-radius: 5px; box-shadow: 0 4px 8px rgba(0,0,0,0.2);">

  <h4 style="margin-top: 0; margin-bottom: 10px; text-align: center; border-bottom: 1px solid #ccc; padding-bottom: 5px;">Map Legend</h4>

  <div style="margin-bottom: 8px;">
    <span style="display: inline-block; width: 12px; height: 12px; border-radius: 50%; background-color: black; margin-right: 5px; border: 1px solid #333;"></span>
    <span style="vertical-align: top;">Train Stations</span>
  </div>

  <div style="margin-bottom: 8px;">
    <span style="display: inline-block; width: 12px; height: 12px; background-color: lightgray; border: 1px solid #555; margin-right: 5px;"></span>
    <span style="vertical-align: top;">Train Service Area</span>
  </div>

  <div style="margin-bottom: 8px;">
    <span style="display: inline-block; width: 12px; height: 12px; border-radius: 50%; background-color: green; margin-right: 5px; border: 1px solid #333;"></span>
    <span style="vertical-align: top;">Projects Inside Service Area</span>
  </div>

  <div style="margin-bottom: 8px;">
    <span style="display: inline-block; width: 12px; height: 12px; border-radius: 50%; background-color: red; margin-right: 5px; border: 1px solid #333;"></span>
    <span style="vertical-align: top;">Projects Outside Service Area</span>
  </div>

  <div style="margin-bottom: 0;">
    <span style="display: inline-block; width: 12px; height: 12px; border-radius: 50%; background-color: white; margin-right: 5px;"></span>
    <span style="vertical-align: top;">Circle Size &rarr; Population</span>
  </div>
</div>
{% endmacro %}
"""
legend = branca.element.MacroElement()
legend._template = branca.element.Template(legend_html)
m.get_root().add_child(legend)

m

population within_range: 4407
population outside range: 9610
total population: 14017


In [None]:
m.save('dallas_housing_dart_map.html')
print("Map saved as dallas_housing_dart_map_final.html")

Map saved as dallas_housing_dart_map_final.html


# EXTRA
## Here was my original code for the heatmap based off of scraped train data from 8AM-9AM

In [None]:
#Here is the code I ran in a docker container to get the real locations of DART trains in the morning. Running this code here wont work because there is no Xserver up.
import asyncio
from playwright.async_api import async_playwright
import json, time, signal
import csv
from datetime import datetime
import zmq
OUT = "snapshots.json"
stop_signal_received = False
DATASET = "train_dataset_morning.csv"
f = open(DATASET, "a", newline="")
writer = csv.writer(f)
writer.writerow(['timestamp_iso', 'vehicle_id', 'transitMode', 'orientation_angle', 'lat', 'lng', 'stop_id','headSign', 'route_id', 'trip_id'])
def get_dart_record(json,timestamp):
    return [timestamp, json["id"], json["transitMode"],json["orientation"],json["coordinate"]["lat"],json["coordinate"]["lng"],json["stop"]["id"],json["headSign"],json["route"]["id"],json["trip"]["id"]]
async def main():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        page = await browser.new_page()
        await page.goto("https://dart.mygopass.org/")
        async def handle_response(resp):
            ts = datetime.now().isoformat()
            try:
                if "vehicles/snapshot" in str(resp.url):
                    ct = resp.headers.get("content-type", "").lower()
                    if "json" in ct:
                        data = await resp.json()
                        content = data.get('content', [])
                        busses = [content for content in content if content["transitMode"] == "BUS"]
                        trains = [content for content in content if content["transitMode"] == "LIGHT_RAIL"]
                        busses_l =  len(busses)
                        trains_l = len(trains)
                        for i in range(0,trains_l):
                            writer.writerow(get_dart_record(trains[i], ts))
            except Exception as e:
                print("Response handler error:", e)
        page.on("response", handle_response)
        while True:
            await asyncio.sleep(1)
        await browser.close()
asyncio.run(main())


ModuleNotFoundError: No module named 'playwright'