### This notebook will be for processing and cleaning the subway turnstile dataset for 2019
- Will need to determine all the turnstile differences for entries between current and last value
- Same for exits 
- This can give me a representation of how many people enter/exit station X in a given time period
- Do this for only stations in Manhattan

#### Manually cross refrencing which stations are in Manhattan:
- https://new.mta.info/accessibility/stations

In [89]:
import json
import pandas as pd

### Extract subway names and co-ordinates from overpass turbo json file


In [90]:
#there are 161 stations in this dataset

# Open the geojson file and read it
with open('manSubTurn.geojson') as file:
    data = json.load(file)


subway=pd.DataFrame(columns=['name','short_name','lat','lon','Link to Turnstile Data'])

for i in range(len(data['features'])):
    #Extract 
    name=data['features'][i]['properties']['name']
    try:
        short_n=data['features'][i]['properties']['short_name']
    except:
        short_n='None'
        
    lat=data['features'][i]['geometry']['coordinates'][1]
    lon=data['features'][i]['geometry']['coordinates'][0]
    subway.loc[i]=[name,short_n,lat,lon,'None']
    

In [91]:
subway.tail(20)

Unnamed: 0,name,short_name,lat,lon,Link to Turnstile Data
142,Grand Central Terminal,,40.752806,-73.977179,
143,Marble Hill,,40.874924,-73.912702,
144,Manhattan,,41.418606,-87.989013,
145,59th Street,59 St,40.762707,-73.96788,
146,Chambers Street,Chambers St,40.714916,-74.007968,
147,Fulton Street,Fulton St,40.710186,-74.007664,
148,49th Street,49 St,40.759901,-73.984139,
149,34th Street–Herald Square,34 St–Herald Sq,40.749719,-73.987823,
150,23rd Street,23 St,40.742853,-73.992834,
151,Union Square,Union Sq,40.734603,-73.99036,


In [92]:
#remove duplicates? depends on how granular the data for 2019 is

### Now import the turnstile data for 2019

In [93]:
with open('Turnstile_Usage_Data__2019.csv') as file:
    turn_data = pd.read_csv(file)

turn_data.head(20)

Unnamed: 0,C/A,Unit,SCP,Station,Line Name,Division,Date,Time,Description,Entries,Exits
0,A033,R170,02-00-05,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,17538854,7031168
1,A033,R170,02-00-02,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,14983900,14554087
2,A033,R170,02-06-00,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,769115,559221
3,A033,R170,02-00-03,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,7191422,8417203
4,A033,R170,02-06-01,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,71047673,20925389
5,A033,R170,02-00-01,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,2430093,2921770
6,A033,R170,02-00-00,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,271981,828662
7,A033,R170,02-00-04,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,6483080,4945335
8,A034,R170,03-00-02,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,4632187,3269462
9,A034,R170,03-03-00,14 ST-UNION SQ,LNQR456W,BMT,12/27/2019,00:00:00,REGULAR,69926,219187


In [94]:
#find how many unique subway names there are
turn_data['Station'].nunique()

379

#### Columns of interest
- Station, this is the station name which I will compare with Overpass turbo name data to only have manhattan turnstile data
- Date, needed for determing day of the week and time of year
- Time, needed for determing time of day
- Entries, needed for determing how many people entered the station, useful for busyness
- Exits, needed for determing how many people exited the station, useful for busyness
- Might also need to include individual turnstile data to see where people are entering/exiting the station and for comparing that turnstile to its previous value to see the difference

### Drop the rows that are not in Manhattan
#### Change overpass turbo names to match turnstile names

In [95]:
#capitalize all overpass turbo names
subway['name'] = subway['name'].str.upper()
subway['short_name'] = subway['short_name'].str.upper()
subway.head(20)

Unnamed: 0,name,short_name,lat,lon,Link to Turnstile Data
0,96TH STREET,96 ST,40.794379,-73.972,
1,BOWLING GREEN,NONE,40.704509,-74.014095,
2,SOUTH FERRY,NONE,40.70171,-74.013149,
3,86TH STREET,86 ST,40.779494,-73.955529,
4,28TH STREET,28 ST,40.743314,-73.984075,
5,14TH STREET–UNION SQUARE,14 ST–UNION SQ,40.735761,-73.990649,
6,28TH STREET,28 ST,40.745494,-73.988691,
7,CANAL STREET,CANAL ST,40.718092,-73.999892,
8,BROADWAY–LAFAYETTE STREET,B’WAY–LAFAYETTE ST,40.725297,-73.996204,
9,5TH AVENUE,5 AV,40.753743,-73.9819,


In [96]:
# Now search for the short name in the turnstile data
# If it is not there, search for the long name

stations_to_extract_from_turnstile = []
change_name_stations_to_extract_from_turnstile = []
for i in range(len(subway)):
    name = subway.iloc[i]['name']
    short_name = subway.iloc[i]['short_name']
    if short_name in turn_data['Station'].values:
        stations_to_extract_from_turnstile.append(short_name)
        subway.loc[i,'Link to Turnstile Data'] = short_name
    elif name in turn_data['Station'].values:
        stations_to_extract_from_turnstile.append(name)
        subway.loc[i,'Link to Turnstile Data'] = name
    else:
        change_name_stations_to_extract_from_turnstile.append('None for '+name)
        

print(stations_to_extract_from_turnstile)

In [None]:
subway.head(20)

Unnamed: 0,name,short_name,lat,lon,Link to Turnstile Data
0,96TH STREET,96 ST,40.794379,-73.972,96 ST
1,BOWLING GREEN,NONE,40.704509,-74.014095,BOWLING GREEN
2,SOUTH FERRY,NONE,40.70171,-74.013149,SOUTH FERRY
3,86TH STREET,86 ST,40.779494,-73.955529,86 ST
4,28TH STREET,28 ST,40.743314,-73.984075,28 ST
5,14TH STREET–UNION SQUARE,14 ST–UNION SQ,40.735761,-73.990649,
6,28TH STREET,28 ST,40.745494,-73.988691,28 ST
7,CANAL STREET,CANAL ST,40.718092,-73.999892,CANAL ST
8,BROADWAY–LAFAYETTE STREET,B’WAY–LAFAYETTE ST,40.725297,-73.996204,
9,5TH AVENUE,5 AV,40.753743,-73.9819,


In [None]:
print(change_name_stations_to_extract_from_turnstile)
#will need to change these names manually

['None for 14TH STREET–UNION SQUARE', 'None for BROADWAY–LAFAYETTE STREET', 'None for 5TH AVENUE', 'None for 42ND STREET–GRAND CENTRAL', 'None for 5TH AVENUE–59TH STREET', 'None for HARLEM–148TH STREET', 'None for 163RD STREET–AMSTERDAM AVENUE', 'None for WTC CORTLANDT', 'None for WEST 4TH STREET–WASHINGTON SQUARE', 'None for ESSEX STREET', 'None for LEXINGTON AVENUE–63RD STREET', 'None for 42ND STREET–BRYANT PARK', 'None for LEXINGTON AVENUE–59TH STREET', 'None for 5TH AVENUE–53RD STREET', 'None for 57TH STREET–7TH AVENUE', 'None for 8TH STREET–NEW YORK UNIVERSITY', 'None for 42ND STREET–PORT AUTHORITY BUS TERMINAL', 'None for ROOSEVELT ISLAND', 'None for NEW YORK PENN STATION', 'None for 66TH STREET–LINCOLN CENTER', 'None for WORLD TRADE CENTER', 'None for CENTRAL PARK NORTH–110TH STREET', 'None for BROOKLYN BRIDGE–CITY HALL', 'None for 42ND STREET–TIMES SQUARE', 'None for MARBLE HILL–225TH STREET', 'None for 137TH STREET–CITY COLLEGE', 'None for CATHEDRAL PARKWAY–110TH STREET', 'Non

In [None]:
print(len(change_name_stations_to_extract_from_turnstile))

56


In [None]:
change_name_stations_to_extract_from_turnstile.append('14 ST-UNION SQ')
change_name_stations_to_extract_from_turnstile.append('B\'WAY-LAFAYETTE')
change_name_stations_to_extract_from_turnstile.append('5 AVE')
change_name_stations_to_extract_from_turnstile.append('GRD CNTRL-42 ST')
change_name_stations_to_extract_from_turnstile.append('5 AV/59 ST')
change_name_stations_to_extract_from_turnstile.append('HARLEM 148 ST')
change_name_stations_to_extract_from_turnstile.append('163 ST-AMSTERDM')
change_name_stations_to_extract_from_turnstile.append('WTC-CORTLANDT')
change_name_stations_to_extract_from_turnstile.append('W 4 ST-WASH SQ')
change_name_stations_to_extract_from_turnstile.append('DELANCEY/ESSEX')
change_name_stations_to_extract_from_turnstile.append('LEXINGTON AV/63')
change_name_stations_to_extract_from_turnstile.append('42 ST-BRYANT PK')
change_name_stations_to_extract_from_turnstile.append('None')
change_name_stations_to_extract_from_turnstile.append('5 AV/53 ST')
change_name_stations_to_extract_from_turnstile.append('57 ST-7 AV')
change_name_stations_to_extract_from_turnstile.append('8 ST-NYU')
change_name_stations_to_extract_from_turnstile.append('None as bus terminal')
change_name_stations_to_extract_from_turnstile.append('ROOSEVELT ISLND')
change_name_stations_to_extract_from_turnstile.append('34 ST-PENN STA')
change_name_stations_to_extract_from_turnstile.append('66 ST-LINCOLN')
change_name_stations_to_extract_from_turnstile.append('WORLD TRADE CTR')
change_name_stations_to_extract_from_turnstile.append('CENTRAL PK N110')
change_name_stations_to_extract_from_turnstile.append('BROOKLYN BRIDGE')
change_name_stations_to_extract_from_turnstile.append('TIMES SQ-42 ST')
change_name_stations_to_extract_from_turnstile.append('MARBLE HILL-225')
change_name_stations_to_extract_from_turnstile.append('137 ST CITY COL')
change_name_stations_to_extract_from_turnstile.append('CATHEDRAL PKWY')
change_name_stations_to_extract_from_turnstile.append('116 ST-COLUMBIA')
change_name_stations_to_extract_from_turnstile.append('34 ST-PENN STA')
change_name_stations_to_extract_from_turnstile.append('TIMES SQ-42 ST')
change_name_stations_to_extract_from_turnstile.append('CHRISTOPHER ST')
change_name_stations_to_extract_from_turnstile.append('CATHEDRAL PKWY')
change_name_stations_to_extract_from_turnstile.append('68ST-HUNTER CO')
change_name_stations_to_extract_from_turnstile.append('34 ST-PENN STA')
change_name_stations_to_extract_from_turnstile.append('LEXINGTON AV/53')
change_name_stations_to_extract_from_turnstile.append('47-50 STS ROCK')
change_name_stations_to_extract_from_turnstile.append('DELANCEY/ESSEX')
change_name_stations_to_extract_from_turnstile.append('34 ST-HERALD SQ')
change_name_stations_to_extract_from_turnstile.append('81 ST-MUSEUM')
change_name_stations_to_extract_from_turnstile.append('59 ST COLUMBUS')
change_name_stations_to_extract_from_turnstile.append('WHITEHALL S-FRY')
change_name_stations_to_extract_from_turnstile.append('WORLD TRADE CTR')
change_name_stations_to_extract_from_turnstile.append('14 ST-UNION SQ')
change_name_stations_to_extract_from_turnstile.append('GRD CNTRL-42 ST')
change_name_stations_to_extract_from_turnstile.append('GRD CNTRL-42 ST')
change_name_stations_to_extract_from_turnstile.append('INWOOD-207 ST')
change_name_stations_to_extract_from_turnstile.append('125 ST')
change_name_stations_to_extract_from_turnstile.append('MARBLE HILL-225')
change_name_stations_to_extract_from_turnstile.append('GRD CNTRL-42 ST')
change_name_stations_to_extract_from_turnstile.append('34 ST-HERALD SQ')
change_name_stations_to_extract_from_turnstile.append('14 ST-UNION SQ')
change_name_stations_to_extract_from_turnstile.append('34 ST-HUDSON YD')
change_name_stations_to_extract_from_turnstile.append('59 ST COLUMBUS')
change_name_stations_to_extract_from_turnstile.append('TIMES SQ-42 ST')
change_name_stations_to_extract_from_turnstile.append('TIMES SQ-42 ST')
change_name_stations_to_extract_from_turnstile.append('TIMES SQ-42 ST')

In [None]:
len(change_name_stations_to_extract_from_turnstile)

112

- Missing for LEXINGTON AVENUE–59TH STREET

- Have a triple of penn station 34 st
- Duplicate of TIMES SQ-42 ST
- Duplicate of CATHEDRAL PKWY
- Duplicate of DELANCEY/ESSEX
- Duplicate of WORLD TRADE CTR
- Duplicate of 14 ST-UNION SQ
- Quadruple of GRD CNTRL-42 ST
- Duplicate of 125 ST
- Duplicate of MARBLE HILL-225
- 



In [None]:
stations_to_extract_from_turnstile = []
j=56
#change_name_stations_to_extract_from_turnstile = []
for i in range(len(subway)):
    name = subway.iloc[i]['name']
    short_name = subway.iloc[i]['short_name']
    if short_name in turn_data['Station'].values:
        stations_to_extract_from_turnstile.append(short_name)
        subway.loc[i,'Link to Turnstile Data'] = short_name
    elif name in turn_data['Station'].values:
        stations_to_extract_from_turnstile.append(name)
        subway.loc[i,'Link to Turnstile Data'] = name
    else:
        subway.loc[i,'Link to Turnstile Data'] = change_name_stations_to_extract_from_turnstile[j]
        j=j+1

subway.head(20)

Unnamed: 0,name,short_name,lat,lon,Link to Turnstile Data
0,96TH STREET,96 ST,40.794379,-73.972,96 ST
1,BOWLING GREEN,NONE,40.704509,-74.014095,BOWLING GREEN
2,SOUTH FERRY,NONE,40.70171,-74.013149,SOUTH FERRY
3,86TH STREET,86 ST,40.779494,-73.955529,86 ST
4,28TH STREET,28 ST,40.743314,-73.984075,28 ST
5,14TH STREET–UNION SQUARE,14 ST–UNION SQ,40.735761,-73.990649,14 ST-UNION SQ
6,28TH STREET,28 ST,40.745494,-73.988691,28 ST
7,CANAL STREET,CANAL ST,40.718092,-73.999892,CANAL ST
8,BROADWAY–LAFAYETTE STREET,B’WAY–LAFAYETTE ST,40.725297,-73.996204,B'WAY-LAFAYETTE
9,5TH AVENUE,5 AV,40.753743,-73.9819,5 AVE


In [None]:
#show the entire data frame
#pd.set_option('display.max_rows', None)
#subway

In [None]:
#remove any row that has a duplicate value in the link to turnstile data column
subway = subway.drop_duplicates(subset=['Link to Turnstile Data'], keep='first')
len(subway)

96

### Now have 96 subway locations in Manhattan that we can reference in the turnstile data


In [None]:
#trim the csv data to a new file containg the stations in manhattan
#this will be the data we use for the rest of the project

man_turn_data = pd.DataFrame(columns=turn_data.columns)  # Initialize an empty DataFrame with the same columns as turn_data
'''
for i in range(len(subway)):
    station_to_find=subway['Link to Turnstile Data'][i]
    print(station_to_find)
    for j in range(len(turn_data)):
        if turn_data['Station'][j] == station_to_find:
            man_turn_data = man_turn_data.append(turn_data.iloc[j], ignore_index=True)

man_turn_data
'''

"\nfor i in range(len(subway)):\n    station_to_find=subway['Link to Turnstile Data'][i]\n    print(station_to_find)\n    for j in range(len(turn_data)):\n        if turn_data['Station'][j] == station_to_find:\n            man_turn_data = man_turn_data.append(turn_data.iloc[j], ignore_index=True)\n\nman_turn_data\n"

In [None]:
man_turn_data = pd.merge(subway, turn_data, left_on='Link to Turnstile Data', right_on='Station')


In [None]:
man_turn_data.head(10)

Unnamed: 0,name,short_name,lat,lon,Link to Turnstile Data,C/A,Unit,SCP,Station,Line Name,Division,Date,Time,Description,Entries,Exits
0,96TH STREET,96 ST,40.794379,-73.972,96 ST,R168A,R168,00-00-02,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,7050714,3195113
1,96TH STREET,96 ST,40.794379,-73.972,96 ST,R168A,R168,00-02-00,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,6592911,7757191
2,96TH STREET,96 ST,40.794379,-73.972,96 ST,R168A,R168,00-03-02,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,11982395,11692946
3,96TH STREET,96 ST,40.794379,-73.972,96 ST,R168A,R168,00-00-00,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,1875264,1933375
4,96TH STREET,96 ST,40.794379,-73.972,96 ST,R168A,R168,00-00-01,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,6189156,2545523
5,96TH STREET,96 ST,40.794379,-73.972,96 ST,R168A,R168,00-03-01,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,4028999,5063535
6,96TH STREET,96 ST,40.794379,-73.972,96 ST,R168A,R168,00-03-00,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,4580292,10195272
7,96TH STREET,96 ST,40.794379,-73.972,96 ST,R169,R168,01-00-01,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,1620619,988385
8,96TH STREET,96 ST,40.794379,-73.972,96 ST,R169,R168,01-03-02,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,9245121,8843365
9,96TH STREET,96 ST,40.794379,-73.972,96 ST,R169,R168,01-05-01,96 ST,123,IRT,12/27/2019,00:00:00,REGULAR,0,1607


In [None]:
len(man_turn_data)
#4.5 million rows, which is 5.6 less than the original 


4576249

### feature selection


In [None]:
#lets drop:
#short_name, link to turnstile data, Unit,  Line Name, Division
# if desc is recover aud disregard those rows?
man_turn_data.drop(['short_name','Link to Turnstile Data','Unit','Line Name','Division'], axis=1, inplace=True)

In [None]:
man_turn_data.head(20)

Unnamed: 0,name,lat,lon,SCP,Station,Date,Time,Description,Entries,Exits
0,96TH STREET,40.794379,-73.972,00-00-02,96 ST,12/27/2019,00:00:00,REGULAR,7050714,3195113
1,96TH STREET,40.794379,-73.972,00-02-00,96 ST,12/27/2019,00:00:00,REGULAR,6592911,7757191
2,96TH STREET,40.794379,-73.972,00-03-02,96 ST,12/27/2019,00:00:00,REGULAR,11982395,11692946
3,96TH STREET,40.794379,-73.972,00-00-00,96 ST,12/27/2019,00:00:00,REGULAR,1875264,1933375
4,96TH STREET,40.794379,-73.972,00-00-01,96 ST,12/27/2019,00:00:00,REGULAR,6189156,2545523
5,96TH STREET,40.794379,-73.972,00-03-01,96 ST,12/27/2019,00:00:00,REGULAR,4028999,5063535
6,96TH STREET,40.794379,-73.972,00-03-00,96 ST,12/27/2019,00:00:00,REGULAR,4580292,10195272
7,96TH STREET,40.794379,-73.972,01-00-01,96 ST,12/27/2019,00:00:00,REGULAR,1620619,988385
8,96TH STREET,40.794379,-73.972,01-03-02,96 ST,12/27/2019,00:00:00,REGULAR,9245121,8843365
9,96TH STREET,40.794379,-73.972,01-05-01,96 ST,12/27/2019,00:00:00,REGULAR,0,1607


In [None]:
#group alphabetically by station Date and Time
man_turn_data.sort_values(by=['name'], inplace=True)
man_turn_data.head(20)

Unnamed: 0,name,lat,lon,SCP,Station,Date,Time,Description,Entries,Exits
3007920,103RD STREET,40.790298,-73.947624,00-00-01,103 ST,11/17/2019,16:00:00,REGULAR,4691357,1945485
3010850,103RD STREET,40.790298,-73.947624,00-00-02,103 ST,10/18/2019,13:00:00,REGULAR,16673625,4540530
3010851,103RD STREET,40.790298,-73.947624,00-00-01,103 ST,10/18/2019,13:00:00,REGULAR,13639657,3993107
3010852,103RD STREET,40.790298,-73.947624,00-03-02,103 ST,10/18/2019,13:00:00,REGULAR,11693983,10975178
3010853,103RD STREET,40.790298,-73.947624,00-00-00,103 ST,10/18/2019,13:00:00,REGULAR,2355096,925048
3010854,103RD STREET,40.790298,-73.947624,00-00-00,103 ST,10/18/2019,13:00:00,REGULAR,2355096,925048
3010855,103RD STREET,40.790298,-73.947624,00-03-01,103 ST,10/18/2019,13:00:00,REGULAR,12771924,10179811
3010856,103RD STREET,40.790298,-73.947624,00-03-01,103 ST,10/18/2019,13:00:00,REGULAR,12771924,10179811
3010857,103RD STREET,40.790298,-73.947624,00-00-01,103 ST,10/18/2019,13:00:00,REGULAR,13639657,3993107
3010858,103RD STREET,40.790298,-73.947624,00-03-02,103 ST,10/18/2019,13:00:00,REGULAR,11693983,10975178


In [None]:
#convert the date and time to unix timestamps
man_turn_data['DateTime'] = pd.to_datetime(man_turn_data['Date'] + ' ' + man_turn_data['Time'])

man_turn_data['UnixTimestamp'] = man_turn_data['DateTime'].apply(lambda x: x.timestamp())
man_turn_data.head(20)


Unnamed: 0,name,lat,lon,SCP,Station,Date,Time,Description,Entries,Exits,DateTime,UnixTimestamp
3007920,103RD STREET,40.790298,-73.947624,00-00-01,103 ST,11/17/2019,16:00:00,REGULAR,4691357,1945485,2019-11-17 16:00:00,1574006000.0
3010850,103RD STREET,40.790298,-73.947624,00-00-02,103 ST,10/18/2019,13:00:00,REGULAR,16673625,4540530,2019-10-18 13:00:00,1571404000.0
3010851,103RD STREET,40.790298,-73.947624,00-00-01,103 ST,10/18/2019,13:00:00,REGULAR,13639657,3993107,2019-10-18 13:00:00,1571404000.0
3010852,103RD STREET,40.790298,-73.947624,00-03-02,103 ST,10/18/2019,13:00:00,REGULAR,11693983,10975178,2019-10-18 13:00:00,1571404000.0
3010853,103RD STREET,40.790298,-73.947624,00-00-00,103 ST,10/18/2019,13:00:00,REGULAR,2355096,925048,2019-10-18 13:00:00,1571404000.0
3010854,103RD STREET,40.790298,-73.947624,00-00-00,103 ST,10/18/2019,13:00:00,REGULAR,2355096,925048,2019-10-18 13:00:00,1571404000.0
3010855,103RD STREET,40.790298,-73.947624,00-03-01,103 ST,10/18/2019,13:00:00,REGULAR,12771924,10179811,2019-10-18 13:00:00,1571404000.0
3010856,103RD STREET,40.790298,-73.947624,00-03-01,103 ST,10/18/2019,13:00:00,REGULAR,12771924,10179811,2019-10-18 13:00:00,1571404000.0
3010857,103RD STREET,40.790298,-73.947624,00-00-01,103 ST,10/18/2019,13:00:00,REGULAR,13639657,3993107,2019-10-18 13:00:00,1571404000.0
3010858,103RD STREET,40.790298,-73.947624,00-03-02,103 ST,10/18/2019,13:00:00,REGULAR,11693983,10975178,2019-10-18 13:00:00,1571404000.0


In [None]:
#now order by unix timestamp and the name in alphabetical order

man_turn_data = man_turn_data.sort_values(by=['name','UnixTimestamp'])
man_turn_data.head(20)

Unnamed: 0,name,lat,lon,SCP,Station,Date,Time,Description,Entries,Exits,DateTime,UnixTimestamp
3037515,103RD STREET,40.790298,-73.947624,00-03-02,103 ST,12/29/2018,00:00:00,REGULAR,8476658,26737489,2018-12-29 00:00:00,1546042000.0
3037514,103RD STREET,40.790298,-73.947624,00-00-01,103 ST,12/29/2018,00:00:00,REGULAR,3999244,1607925,2018-12-29 00:00:00,1546042000.0
3037513,103RD STREET,40.790298,-73.947624,00-03-00,103 ST,12/29/2018,00:00:00,REGULAR,408415,557699,2018-12-29 00:00:00,1546042000.0
3037512,103RD STREET,40.790298,-73.947624,00-03-01,103 ST,12/29/2018,00:00:00,REGULAR,1659007,3170302,2018-12-29 00:00:00,1546042000.0
3037511,103RD STREET,40.790298,-73.947624,00-00-00,103 ST,12/29/2018,00:00:00,REGULAR,1634656,656635,2018-12-29 00:00:00,1546042000.0
3037510,103RD STREET,40.790298,-73.947624,00-00-02,103 ST,12/29/2018,00:00:00,REGULAR,16113503,4435458,2018-12-29 00:00:00,1546042000.0
3037509,103RD STREET,40.790298,-73.947624,00-03-00,103 ST,12/29/2018,00:00:00,REGULAR,8149599,4546826,2018-12-29 00:00:00,1546042000.0
3037508,103RD STREET,40.790298,-73.947624,00-03-02,103 ST,12/29/2018,00:00:00,REGULAR,11186555,10491083,2018-12-29 00:00:00,1546042000.0
3037507,103RD STREET,40.790298,-73.947624,00-03-01,103 ST,12/29/2018,00:00:00,REGULAR,12358341,9863905,2018-12-29 00:00:00,1546042000.0
3037506,103RD STREET,40.790298,-73.947624,00-00-01,103 ST,12/29/2018,00:00:00,REGULAR,13143142,3857058,2018-12-29 00:00:00,1546042000.0


In [None]:
man_turn_data.to_csv('man_turn_data.csv')