In [1]:
# Make Jupyter Notebook full screen 
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

## Objective:
    
Pull in the data files from the Madison Open Data Portal and peek at what is in them. This notebook is a good reference to have open while working with the data. 

## Notes: 

#### Data Origins: 

- mmt_GTFS: http://transitdata.cityofmadison.com/GTFS/
- Open data portal: https://data-cityofmadison.opendata.arcgis.com/

<br/>
<br/>
<br/>
Author: Megan Tabbutt <br/>
Last Revised: 9_8_20

In [28]:
# Import statements:

import pandas as pd
import numpy as np
import datetime

In [3]:
# Local data paths:

GTFS_DATA_PATH = "mmt_gtfs/"
# import os; os.listdir(GTFS_DATA_PATH) # Peak whats in the directory 

In [4]:
def getPandasDFCSV(path, file, sep=','):  
    """ Keyword Arg sep: deliminator used in txt file (default = ',')"""
    pandasDF = pd.read_csv(path +  file, sep=sep)
    return pandasDF

### stop_times:

In [44]:
stop_times_df = getPandasDFCSV(GTFS_DATA_PATH, 'stop_times.txt')
stop_times_df.head(3)
#stop_times_df.info()

Unnamed: 0,trip_id,stop_sequence,stop_id,pickup_type,drop_off_type,arrival_time,departure_time,timepoint,stop_headsign,shape_dist_traveled
0,9999999,1,7605,0,1,5:27:00,5:27:00,1,EAST TRANSFER,0.0249
1,9999999,2,7739,0,0,5:27:40,5:27:40,0,EAST TRANSFER,0.2003
2,9999999,3,7119,0,0,5:28:33,5:28:33,0,EAST TRANSFER,0.4352


### trips:

In [6]:
trips_df = getPandasDFCSV(GTFS_DATA_PATH, 'trips.txt')
trips_df.head(3)

Unnamed: 0,route_id,route_short_name,service_id,trip_id,trip_headsign,direction_id,direction_name,block_id,shape_id,shape_code,trip_type,trip_sort,wheelchair_accessible,bikes_allowed
0,9041,16,92_WKD,9999999,EAST TRANSFER,0,East Transfer,193133,55293,P16E,D,19620,1,1
1,9028,2,92_WKD,1007843,NORTH TRANSFER: VIA SHERMAN,0,North Transfer,194263,55156,2S,W,20100,1,1
2,9028,2,92_WKD,1007844,NORTH TRANSFER: VIA FORDEM,0,North Transfer,194056,55154,2F,W,21600,1,1


### stops:

In [7]:
stops_df = getPandasDFCSV(GTFS_DATA_PATH, 'stops.txt')
stops_df.head(10)
#stops_df.info()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,agency_id,jurisdiction_id,location_type,parent_station,relative_position,cardinal_direction,wheelchair_boarding,primary_street,address_range,cross_location
0,1110,1110,E Dayton & Wisconsin (WB),This EVENT/DETOUR ONLY stop (#1110) is westbou...,43.076428,-89.38592,MMT,CMAD,0,,3,270.0,1,E Dayton,2,Wisconsin
1,1111,1111,W Doty & M L K Junior (EB),This EVENT/DETOUR ONLY stop (#1111) is eastbou...,43.072938,-89.382354,MMT,CMAD,0,,3,90.0,1,W Doty,1,M L K Junior
2,1124,1124,S Fairchild & W Main (SB),This EVENT/DETOUR ONLY stop (#1124) is southbo...,43.072467,-89.385273,MMT,CMAD,0,,3,180.0,1,S Fairchild,98,W Main
3,1135,1135,N Webster & E Mifflin (NB),This EVENT/DETOUR ONLY stop (#1135) is northbo...,43.077272,-89.383413,MMT,CMAD,0,,1,0.0,1,N Webster,101,E Mifflin
4,1140,1140,E Washington & N Webster (WB),This EVENT/DETOUR ONLY stop (#1140) is westbou...,43.07639,-89.38193,MMT,CMAD,0,,3,270.0,1,E Washington,200,N Webster
5,1146,1146,N Fairchild & W Washington (SB),This EVENT/DETOUR ONLY stop (#1146) is southbo...,43.073504,-89.386724,MMT,CMAD,0,,3,180.0,1,N Fairchild,50,W Washington
6,1155,1155,S Webster & E Main (NB),This EVENT/DETOUR ONLY stop (#1155) is northbo...,43.075579,-89.381192,MMT,CMAD,0,,1,0.0,1,S Webster,99,E Main
7,1174,1174,W Dayton & State (WB),This EVENT/DETOUR ONLY stop (#1174) is westbou...,43.07512,-89.387855,MMT,CMAD,0,,3,270.0,1,W Dayton,198,State
8,1189,1189,E Doty & S Pinckney (EB),This EVENT/DETOUR ONLY stop (#1189) is eastbou...,43.074111,-89.380724,MMT,CMAD,0,,3,90.0,1,E Doty,101,S Pinckney
9,4000,SoTP,South Transfer Point,The South Transfer Point station is located at...,43.038957,-89.394651,MMT,MTSM,1,,5,,1,South Transfer Point,2430,S Park


### routes:

In [8]:
routes_df = getPandasDFCSV(GTFS_DATA_PATH, 'routes.txt')
routes_df.head(3)

Unnamed: 0,route_id,service_id,agency_id,route_short_name,route_long_name,route_service_name,route_desc,route_type,route_url,route_color,route_text_color,bikes_allowed
0,9027,92,MMT,1,,OLD UNIV:CAP SQR,Weekday schedule trips suspended until further...,3,http://www.cityofmadison.com/metro/routes-sche...,E3D23E,000000,1
1,9028,92,MMT,2,,WEST TP:NORTH TP,Daily schedule trips operate regularly every 3...,3,http://www.cityofmadison.com/metro/routes-sche...,981F66,FFFFFF,1
2,9029,92,MMT,3,,WEST TP:EAST TP,Weekday schedule trips suspended until further...,3,http://www.cityofmadison.com/metro/routes-sche...,CD8382,000000,1


***

# Extra Data from Analyses:

## Trips that are being neglected b/c they happen past midnight

Someone should go back in and fix those times to work with the functions. 

In [45]:
arrivalTimesArray = np.array(stop_times_df['arrival_time'])

arrivalTimesArrayFixed = []
for time in arrivalTimesArray:
    timeList = time.split(":")
    timeList = [int(time) for time in timeList]
    if timeList[0] < 24:
        dateTime = datetime.datetime(2000, 1, 1, timeList[0], timeList[1], timeList[2])
    else:
        dateTime = datetime.datetime(2000, 1, 2, timeList[0]-24, timeList[1], timeList[2])
    arrivalTimesArrayFixed.append(dateTime)
    
stop_times_df['arrival_time'] = arrivalTimesArrayFixed
stop_times_df.head()

Unnamed: 0,trip_id,stop_sequence,stop_id,pickup_type,drop_off_type,arrival_time,departure_time,timepoint,stop_headsign,shape_dist_traveled
0,9999999,1,7605,0,1,2000-01-01 05:27:00,5:27:00,1,EAST TRANSFER,0.0249
1,9999999,2,7739,0,0,2000-01-01 05:27:40,5:27:40,0,EAST TRANSFER,0.2003
2,9999999,3,7119,0,0,2000-01-01 05:28:33,5:28:33,0,EAST TRANSFER,0.4352
3,9999999,4,7107,0,0,2000-01-01 05:28:53,5:28:53,0,EAST TRANSFER,0.5255
4,9999999,5,7663,0,0,2000-01-01 05:29:51,5:29:51,0,EAST TRANSFER,0.782


In [46]:
departureTimesArray = np.array(stop_times_df['departure_time'])

departureTimesArrayFixed = []
for time in departureTimesArray:
    timeList = time.split(":")
    timeList = [int(time) for time in timeList]
    if timeList[0] < 24:
        dateTime = datetime.datetime(2000, 1, 1, timeList[0], timeList[1], timeList[2])
    else:
        dateTime = datetime.datetime(2000, 1, 2, timeList[0]-24, timeList[1], timeList[2])
    departureTimesArrayFixed.append(dateTime)
    
stop_times_df['departure_time'] = departureTimesArrayFixed
stop_times_df.head()

Unnamed: 0,trip_id,stop_sequence,stop_id,pickup_type,drop_off_type,arrival_time,departure_time,timepoint,stop_headsign,shape_dist_traveled
0,9999999,1,7605,0,1,2000-01-01 05:27:00,2000-01-01 05:27:00,1,EAST TRANSFER,0.0249
1,9999999,2,7739,0,0,2000-01-01 05:27:40,2000-01-01 05:27:40,0,EAST TRANSFER,0.2003
2,9999999,3,7119,0,0,2000-01-01 05:28:33,2000-01-01 05:28:33,0,EAST TRANSFER,0.4352
3,9999999,4,7107,0,0,2000-01-01 05:28:53,2000-01-01 05:28:53,0,EAST TRANSFER,0.5255
4,9999999,5,7663,0,0,2000-01-01 05:29:51,2000-01-01 05:29:51,0,EAST TRANSFER,0.782


datetime.datetime(2000, 1, 1, 5, 27, 40)