In [None]:
"""
MBTA Data Collection - One Time Only
Fetches T stops and routes, loads into Snowflake
"""

import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas
import pandas as pd
import requests

SNOWFLAKE_CONFIG = {
    'user': '',
    'password': '',
    'account': '',
    'warehouse': '',
    'database': '',
    'schema': '',
    'role': ''
}

def fetch_mbta_stops():
    """Fetch all MBTA stops"""
    print("üìç Fetching MBTA stops...")
    url = 'https://api-v3.mbta.com/stops'
    response = requests.get(url)
    data = response.json()
    
    stops = []
    for stop in data['data']:
        attr = stop['attributes']
        stops.append({
            'STOP_ID': stop['id'],
            'STOP_NAME': attr.get('name'),
            'STOP_CODE': attr.get('code'),
            'PLATFORM_NAME': attr.get('platform_name'),
            'LATITUDE': attr.get('latitude'),
            'LONGITUDE': attr.get('longitude'),
            'ZONE_ID': attr.get('zone_id'),
            'ADDRESS': attr.get('address'),
            'MUNICIPALITY': attr.get('municipality'),
            'WHEELCHAIR_BOARDING': attr.get('wheelchair_boarding') == 1,
            'PARENT_STATION': attr.get('parent_station'),
            'PLATFORM_CODE': attr.get('platform_code')
        })
    
    print(f"‚úÖ Fetched {len(stops)} stops")
    return pd.DataFrame(stops)

def fetch_mbta_routes():
    """Fetch all MBTA routes"""
    print("üöá Fetching MBTA routes...")
    url = 'https://api-v3.mbta.com/routes'
    response = requests.get(url)
    data = response.json()
    
    routes = []
    for route in data['data']:
        attr = route['attributes']
        
        # Map route type numbers to names
        route_type_map = {
            0: 'Light Rail',
            1: 'Subway',
            2: 'Commuter Rail',
            3: 'Bus',
            4: 'Ferry'
        }
        
        routes.append({
            'ROUTE_ID': route['id'],
            'ROUTE_SHORT_NAME': attr.get('short_name'),
            'ROUTE_LONG_NAME': attr.get('long_name'),
            'ROUTE_TYPE': route_type_map.get(attr.get('type'), 'Other'),
            'ROUTE_COLOR': attr.get('color'),
            'ROUTE_TEXT_COLOR': attr.get('text_color'),
            'ROUTE_SORT_ORDER': attr.get('sort_order')
        })
    
    print(f"‚úÖ Fetched {len(routes)} routes")
    return pd.DataFrame(routes)

def main():
    print("üöá Starting MBTA Data Collection\n")
    
    # Fetch data
    stops_df = fetch_mbta_stops()
    routes_df = fetch_mbta_routes()
    
    # Connect to Snowflake
    print("\nüîå Connecting to Snowflake...")
    conn = snowflake.connector.connect(**SNOWFLAKE_CONFIG)
    print("‚úÖ Connected!\n")
    
    # Reset indexes
    stops_df = stops_df.reset_index(drop=True)
    routes_df = routes_df.reset_index(drop=True)
    
    # Load stops
    print("‚¨ÜÔ∏è  Loading MBTA stops...")
    try:
        success, nchunks, nrows, _ = write_pandas(
            conn, 
            stops_df, 
            'BRONZE_MBTA_STOPS',
            database='LOCEATS_DB',
            schema='BRONZE',
            auto_create_table=False,
            quote_identifiers=False,
            overwrite=False
        )
        print(f"‚úÖ Loaded {nrows} stops")
    except Exception as e:
        print(f"‚ùå Error loading stops: {e}")
        conn.close()
        return
    
    # Load routes
    print("‚¨ÜÔ∏è  Loading MBTA routes...")
    try:
        success, nchunks, nrows, _ = write_pandas(
            conn, 
            routes_df, 
            'BRONZE_MBTA_ROUTES',
            database='LOCEATS_DB',
            schema='BRONZE',
            auto_create_table=False,
            quote_identifiers=False,
            overwrite=False
        )
        print(f"‚úÖ Loaded {nrows} routes")
    except Exception as e:
        print(f"‚ùå Error loading routes: {e}")
        conn.close()
        return
    
    conn.close()
    
    print("\n" + "="*60)
    print("‚úÖ MBTA DATA COLLECTION COMPLETE!")
    print("="*60)
    print(f"Stops: {len(stops_df)}")
    print(f"Routes: {len(routes_df)}")
    print("\nData loaded into:")
    print("- LOCEATS_DB.BRONZE.BRONZE_MBTA_STOPS")
    print("- LOCEATS_DB.BRONZE.BRONZE_MBTA_ROUTES")

if __name__ == "__main__":
    main()

üöá Starting MBTA Data Collection

üìç Fetching MBTA stops...
‚úÖ Fetched 10280 stops
üöá Fetching MBTA routes...
‚úÖ Fetched 177 routes

üîå Connecting to Snowflake...
‚úÖ Connected!

‚¨ÜÔ∏è  Loading MBTA stops...
‚ùå Error loading stops: 001008 (22023): SQL compilation error:
invalid value [?] for parameter 'ON_ERROR'


In [3]:
"""
MBTA Data Collection - One Time Only
Fetches T stops and routes, saves as CSV for Snowflake upload
"""

import pandas as pd
import requests

def fetch_mbta_stops():
    """Fetch all MBTA stops"""
    print("üìç Fetching MBTA stops...")
    url = 'https://api-v3.mbta.com/stops'
    response = requests.get(url)
    data = response.json()
    
    stops = []
    for stop in data['data']:
        attr = stop['attributes']
        stops.append({
            'stop_id': stop['id'],
            'stop_name': attr.get('name'),
            'stop_code': attr.get('code'),
            'platform_name': attr.get('platform_name'),
            'latitude': attr.get('latitude'),
            'longitude': attr.get('longitude'),
            'zone_id': attr.get('zone_id'),
            'address': attr.get('address'),
            'municipality': attr.get('municipality'),
            'wheelchair_boarding': attr.get('wheelchair_boarding') == 1,
            'parent_station': attr.get('parent_station'),
            'platform_code': attr.get('platform_code')
        })
    
    print(f"‚úÖ Fetched {len(stops)} stops")
    return pd.DataFrame(stops)

def fetch_mbta_routes():
    """Fetch all MBTA routes"""
    print("üöá Fetching MBTA routes...")
    url = 'https://api-v3.mbta.com/routes'
    response = requests.get(url)
    data = response.json()
    
    routes = []
    for route in data['data']:
        attr = route['attributes']
        
        # Map route type numbers to names
        route_type_map = {
            0: 'Light Rail',
            1: 'Subway',
            2: 'Commuter Rail',
            3: 'Bus',
            4: 'Ferry'
        }
        
        routes.append({
            'route_id': route['id'],
            'route_short_name': attr.get('short_name'),
            'route_long_name': attr.get('long_name'),
            'route_type': route_type_map.get(attr.get('type'), 'Other'),
            'route_color': attr.get('color'),
            'route_text_color': attr.get('text_color'),
            'route_sort_order': attr.get('sort_order')
        })
    
    print(f"‚úÖ Fetched {len(routes)} routes")
    return pd.DataFrame(routes)

def main():
    print("üöá Starting MBTA Data Collection\n")
    
    # Fetch data
    stops_df = fetch_mbta_stops()
    routes_df = fetch_mbta_routes()
    
    # Save as CSV
    print("\nüíæ Saving to CSV files...")
    stops_df.to_csv('mbta_stops.csv', index=False)
    routes_df.to_csv('mbta_routes.csv', index=False)
    
    print("\n" + "="*60)
    print("‚úÖ MBTA DATA SAVED TO CSV!")
    print("="*60)
    print(f"Stops: {len(stops_df)} ‚Üí mbta_stops.csv")
    print(f"Routes: {len(routes_df)} ‚Üí mbta_routes.csv")
    print("\nNext steps:")
    print("1. Upload mbta_stops.csv to Snowflake stage")
    print("2. Upload mbta_routes.csv to Snowflake stage")
    print("3. Run the COPY INTO commands in Snowflake")

if __name__ == "__main__":
    main()

üöá Starting MBTA Data Collection

üìç Fetching MBTA stops...
‚úÖ Fetched 10280 stops
üöá Fetching MBTA routes...
‚úÖ Fetched 177 routes

üíæ Saving to CSV files...

‚úÖ MBTA DATA SAVED TO CSV!
Stops: 10280 ‚Üí mbta_stops.csv
Routes: 177 ‚Üí mbta_routes.csv

Next steps:
1. Upload mbta_stops.csv to Snowflake stage
2. Upload mbta_routes.csv to Snowflake stage
3. Run the COPY INTO commands in Snowflake


In [4]:
# Count by municipality
import pandas as pd
stops = pd.read_csv('mbta_stops.csv')
print(stops['municipality'].value_counts().head(20))

municipality
Boston        3674
Quincy         614
Cambridge      534
Newton         382
Lynn           345
Somerville     329
Malden         320
Medford        266
Revere         260
Brookline      216
Waltham        194
Braintree      180
Milton         176
Arlington      157
Weymouth       154
Melrose        136
Saugus         132
Chelsea        130
Woburn         124
Randolph       119
Name: count, dtype: int64


In [5]:
routes = pd.read_csv('mbta_routes.csv')
print(routes['route_type'].value_counts())

route_type
Bus              150
Commuter Rail     13
Ferry              6
Light Rail         5
Subway             3
Name: count, dtype: int64
