# Station Ranking

## Packages and Configs

In [1]:
#Required Packages
import numpy as np
import pandas as pd
import datetime
from datetime import timedelta
import urllib.request
import matplotlib.pyplot as plt
import pickle

In [2]:
#Setup Configs
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 25)
pd.set_option('display.precision', 3)

## Data Import

In [3]:
with open('pickle/mta_data_daily.pickle', 'rb') as to_read: #run scrape_data to generate
    mta_data_daily = pickle.load(to_read)

with open('pickle/wgt_daily_top.pickle', 'rb') as to_read: #run eda_gender to generate
    wgt_daily_top = pickle.load(to_read)
    
with open('pickle/station_CU_proximity.pickle', 'rb') as to_read: #run college_university_data to generate
    station_CU_proximity = pickle.load(to_read)  

## Unweighted Ranking

In [4]:
mta_data_daily.head()

Unnamed: 0,C/A,UNIT,SCP,STATION,LINENAME,DATE,TIME_DELTA,ENTRIES_DELTA,EXITS_DELTA,TOTAL_DELTA
0,A002,R051,02-00-00,59 ST,NQR456W,2019-03-30,1 days,893.0,299.0,1192.0
1,A002,R051,02-00-00,59 ST,NQR456W,2019-03-31,1 days,571.0,228.0,799.0
2,A002,R051,02-00-00,59 ST,NQR456W,2019-04-02,1 days,1593.0,554.0,2147.0
3,A002,R051,02-00-00,59 ST,NQR456W,2019-04-03,1 days,1652.0,424.0,2076.0
4,A002,R051,02-00-00,59 ST,NQR456W,2019-04-04,1 days,1638.0,511.0,2149.0


In [5]:
mta_data_daily_station = mta_data_daily.groupby(['STATION', 'DATE'])['TOTAL_DELTA'].sum().reset_index()
mta_data_daily_station.head()

Unnamed: 0,STATION,DATE,TOTAL_DELTA
0,1 AV,2019-03-30,32582.0
1,1 AV,2019-03-31,23767.0
2,1 AV,2019-04-01,41797.0
3,1 AV,2019-04-02,40623.0
4,1 AV,2019-04-03,39438.0


In [6]:
mta_data_avg_station = mta_data_daily_station.groupby(['STATION'])['TOTAL_DELTA'].mean().reset_index()
mta_data_avg_station = mta_data_avg_station.sort_values(['TOTAL_DELTA'], ascending = False).reset_index(drop=True)
mta_data_avg_station.head()

Unnamed: 0,STATION,TOTAL_DELTA
0,34 ST-PENN STA,259611.698
1,34 ST-HERALD SQ,186831.855
2,GRD CNTRL-42 ST,180043.29
3,14 ST-UNION SQ,171547.065
4,TIMES SQ-42 ST,164357.677


## Cross Metric Comparison

### % Female Weighting

In [7]:
wgt_daily_top['% Female Commuter Rank'] = wgt_daily_top.index + 1
wgt_daily_top.head()

Unnamed: 0,STATION,Weighted_Traffic,% Female Commuter Rank
0,23 ST,992500.245,1
1,FULTON ST,810617.24,2
2,GRD CNTRL-42 ST,690646.062,3
3,86 ST,632487.527,4
4,34 ST-PENN STA,524934.854,5


In [8]:
master_table = mta_data_avg_station.copy(deep=True)
master_table = pd.merge(master_table,wgt_daily_top[['STATION','% Female Commuter Rank']],on='STATION',how='left')
master_table.head()

Unnamed: 0,STATION,TOTAL_DELTA,% Female Commuter Rank
0,34 ST-PENN STA,259611.698,5
1,34 ST-HERALD SQ,186831.855,367
2,GRD CNTRL-42 ST,180043.29,3
3,14 ST-UNION SQ,171547.065,6
4,TIMES SQ-42 ST,164357.677,7


### University Exposure

In [9]:
station_CU_proximity['University Exposure'] = station_CU_proximity['SCHOOL COUNT'].map({
    11 : 'High',
    10 : 'High',
    9 : 'High',
    8 : 'Medium',
    7 : 'Medium',
    6 : 'Medium',
    5 : 'Medium',
    4 : 'Medium',
    3 : 'Low',
    2 : 'Low',
    1 : 'Low',
    0 : 'None'
})
station_CU_proximity.head()

Unnamed: 0,C/A,STATION,stop_id,Stop Name,GTFS Latitude,GTFS Longitude,Borough,Daytime Routes,coord,SCHOOL COUNT,University Exposure
0,A002,59 ST,R11,Lexington Av/59 St,40.763,-73.967,M,N W R,"(40.76266, -73.967258)",6,Medium
1,A006,5 AV/59 ST,R13,5 Av/59 St,40.765,-73.973,M,N W R,"(40.764811, -73.973347)",3,Low
3,A010,57 ST-7 AV,R14,57 St - 7 Av,40.765,-73.981,M,N Q R W,"(40.764664, -73.98065799999999)",4,Medium
5,A013,49 ST,R15,49 St,40.76,-73.984,M,N R W,"(40.759901, -73.984139)",3,Low
9,A021,TIMES SQ-42 ST,R16,Times Sq - 42 St,40.755,-73.987,M,N Q R W,"(40.754672, -73.986754)",4,Medium


In [16]:
#!conda install --name metis -c conda-forge tabulate

UsageError: Line magic function `%conda` not found.


In [17]:
from tabulate import tabulate

In [10]:
master_table = pd.merge(master_table,station_CU_proximity[['STATION','University Exposure']],on='STATION',how='left')
master_table = master_table.rename(columns={'TOTAL_DELTA' : 'Avg. Daily Commuters'})
master_table.head(10)

Unnamed: 0,STATION,Avg. Daily Commuters,% Female Commuter Rank,University Exposure
0,34 ST-PENN STA,259611.698,5,Medium
1,34 ST-HERALD SQ,186831.855,367,Medium
2,GRD CNTRL-42 ST,180043.29,3,Low
3,14 ST-UNION SQ,171547.065,6,High
4,TIMES SQ-42 ST,164357.677,7,Medium
5,23 ST,147124.258,1,High
6,42 ST-PORT AUTH,146540.143,11,Medium
7,86 ST,131357.742,4,
8,FULTON ST,126048.397,2,Medium
9,125 ST,119682.581,8,Medium


In [19]:
print(tabulate(master_table.head(10), headers="keys",tablefmt="html"))

<table>
<thead>
<tr><th style="text-align: right;">  </th><th>STATION        </th><th style="text-align: right;">  Avg. Daily Commuters</th><th style="text-align: right;">  % Female Commuter Rank</th><th>University Exposure  </th></tr>
</thead>
<tbody>
<tr><td style="text-align: right;"> 0</td><td>34 ST-PENN STA </td><td style="text-align: right;">                259612</td><td style="text-align: right;">                       5</td><td>Medium               </td></tr>
<tr><td style="text-align: right;"> 1</td><td>34 ST-HERALD SQ</td><td style="text-align: right;">                186832</td><td style="text-align: right;">                     367</td><td>Medium               </td></tr>
<tr><td style="text-align: right;"> 2</td><td>GRD CNTRL-42 ST</td><td style="text-align: right;">                180043</td><td style="text-align: right;">                       3</td><td>Low                  </td></tr>
<tr><td style="text-align: right;"> 3</td><td>14 ST-UNION SQ </td><td style="text-align

In [11]:
top10 = master_table['STATION'].head(10)
top10

0     34 ST-PENN STA
1    34 ST-HERALD SQ
2    GRD CNTRL-42 ST
3     14 ST-UNION SQ
4     TIMES SQ-42 ST
5              23 ST
6    42 ST-PORT AUTH
7              86 ST
8          FULTON ST
9             125 ST
Name: STATION, dtype: object

In [12]:
with open('pickle/top10.pickle', 'wb') as to_write:
    pickle.dump(top10, to_write)