In [1]:
#Feel free to modify this code as you see fit.

#NOTE: Some stations that are marked as separate are actually part of the same "complex." e.g. 59 ST & 59 ST COLUMBUS
#I haven't found a way to automatically combine their entries in the dataframe.

import requests
import pandas as pd

def mta_tally(link, dateTimeStamp):

    #You can change the link if you want the total traffic at an earlier date.
    #To get the traffic from April 3 to July 10, you'd subtract the outputted numbers of April 3 from July 10.
    #You'd need to mess around with the source code quite a bit; not that that's a bad thing.
    r=requests.get(link)

    #Contains every line in the archive. 
    lines=r.text.split('\n')

    latest=[]
    for line in lines:
        #Set this line to the date/timestamp you want to pull from the data. MM/DD/YYYY,HH:00:00
        #Timestamps are taken every four hours.
        if line.split(dateTimeStamp)[0]!=line:
            l=line.split(',')
            #Stores relevant data in a tuple, and appends it to the list.
            #This line keeps the station name, entries, exits, turnstile ID aka SCP, and date/timestamp.
            latest.append((l[3].strip(),l[9].strip(),l[10].strip(),l[2].strip(),l[6].strip(),l[7].strip()))


    data_dict={}
    unique_stations=[]

    for datapoint in latest:
        if datapoint[0] in unique_stations:
            pass
        else:
            #Create an entry in the dict for every unique station.
            unique_stations.append(datapoint[0])
            data_dict[datapoint[0]]=[]

    #Fill the dict with data for entries and exits.
    for datapoint in latest:
        data_dict[datapoint[0]].append((int(datapoint[1]),int(datapoint[2]), datapoint[3], datapoint[4], datapoint[5]))

    #Tally up the lifetime entries and exits for every station.

    tally_dict={}

    for station in data_dict:
        tally_dict[station]=[]
        entries=0
        exits=0
        for turnstile in data_dict[station]:
            #print(station, turnstile)
            entries+=turnstile[0]
            exits+=turnstile[1]
        #Add Exits minus Entries, and Exit-Entry ratio to the dict, just in case anyone finds it helpful.
        tally_dict[station]=(entries,exits,exits-entries, exits/entries)

    #Create a Pandas dataframe from the dictionary, and make the column names more helpful
    mta_df=pd.DataFrame.from_dict(tally_dict,orient='index')
    mta_df.rename(columns = {0:"ENTRIES"}, inplace = True)
    mta_df.rename(columns = {1:"EXITS"}, inplace = True)
    mta_df.rename(columns = {2:"EXITS LESS ENTRIES"}, inplace = True)
    mta_df.rename(columns = {3:"EXIT-ENTRY RATIO"}, inplace = True)

In [None]:
#Example use:

#jul10link='http://web.mta.info/developers/data/nyct/turnstile/turnstile_210710.txt'
#jul10dateTimeStamp="07/09/2021,20:00:00"
#mta_tally(jul10link, jul10dateTimeStamp)