## City Bike Data
### June data for 2018, 2019, and 2020

In [12]:
# Dependencies
import pandas as pd
import numpy as np
import datetime as dt

In [13]:
# Load in CSV files and save dataframes
june2018 = pd.read_csv("monthly_data/201806-citibike-tripdata.csv")
june2019 = pd.read_csv("monthly_data/201906-citibike-tripdata.csv")
june2020 = pd.read_csv("monthly_data/202006-citibike-tripdata.csv")

In [14]:
# Combine 2018, 2019, 2020 as they have similar column names
june181920 = june2018.append([june2019, june2020], ignore_index = True)
june181920.head()

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender
0,569,2018-06-01 01:57:20.5140,2018-06-01 02:06:50.0880,72,W 52 St & 11 Ave,40.767272,-73.993929,173,Broadway & W 49 St,40.760683,-73.984527,21481,Subscriber,1999,1
1,480,2018-06-01 02:02:42.3980,2018-06-01 02:10:43.3540,72,W 52 St & 11 Ave,40.767272,-73.993929,477,W 41 St & 8 Ave,40.756405,-73.990026,19123,Subscriber,1988,1
2,692,2018-06-01 02:04:23.6240,2018-06-01 02:15:55.7470,72,W 52 St & 11 Ave,40.767272,-73.993929,457,Broadway & W 58 St,40.766953,-73.981693,26983,Subscriber,1986,1
3,664,2018-06-01 03:00:55.4610,2018-06-01 03:11:59.9060,72,W 52 St & 11 Ave,40.767272,-73.993929,379,W 31 St & 7 Ave,40.749156,-73.9916,26742,Subscriber,1973,1
4,818,2018-06-01 06:04:54.4270,2018-06-01 06:18:32.6170,72,W 52 St & 11 Ave,40.767272,-73.993929,459,W 20 St & 11 Ave,40.746745,-74.007756,26386,Subscriber,1984,1


In [15]:
# Rename column names to match the eventual ending data set
june181920 = june181920.rename(columns={"starttime": "Start Time", 
                                    "stoptime": "Stop Time",
                                    "start station id": "Start Station ID",
                                    "start station name": "Start Sation Name",
                                    "start station latitude": "Start Station Lat",
                                    "start station longitude": "Start Station Lng",
                                    "end station id": "End Station ID",
                                    "end station name": "End Station Name",
                                    "end station latitude": "End Station Lat",
                                    "end station longitude": "End Station Lng",
                                    "usertype": "Member Type",
                                    "gender": "Gender",
                                    })


In [16]:
# Drop incomplete trips
june181920.dropna

<bound method DataFrame.dropna of          tripduration                Start Time                 Stop Time  \
0                 569  2018-06-01 01:57:20.5140  2018-06-01 02:06:50.0880   
1                 480  2018-06-01 02:02:42.3980  2018-06-01 02:10:43.3540   
2                 692  2018-06-01 02:04:23.6240  2018-06-01 02:15:55.7470   
3                 664  2018-06-01 03:00:55.4610  2018-06-01 03:11:59.9060   
4                 818  2018-06-01 06:04:54.4270  2018-06-01 06:18:32.6170   
...               ...                       ...                       ...   
5960741           685  2020-06-30 23:59:41.1160  2020-07-01 00:11:06.7790   
5960742           446  2020-06-30 23:59:46.4260  2020-07-01 00:07:13.0860   
5960743           439  2020-06-30 23:59:47.4770  2020-07-01 00:07:06.5590   
5960744           890  2020-06-30 23:59:53.3950  2020-07-01 00:14:43.4270   
5960745           433  2020-06-30 23:59:53.9010  2020-07-01 00:07:07.0950   

         Start Station ID          Start 

In [17]:
# Convert the data and time to the same time format.
june181920["Start Time"] = pd.to_datetime(june181920["Start Time"])
june181920["Stop Time"] = pd.to_datetime(june181920["Stop Time"])

In [18]:
# Change the Gender column from values to names
june181920["Gender"].replace([0,1,2], ["unknown", "male", "female"], inplace=True)

In [19]:
june181920.dtypes

tripduration                  int64
Start Time           datetime64[ns]
Stop Time            datetime64[ns]
Start Station ID              int64
Start Sation Name            object
Start Station Lat           float64
Start Station Lng           float64
End Station ID                int64
End Station Name             object
End Station Lat             float64
End Station Lng             float64
bikeid                        int64
Member Type                  object
birth year                    int64
Gender                       object
dtype: object

In [20]:
# Confirm the data set
june181920.head()

Unnamed: 0,tripduration,Start Time,Stop Time,Start Station ID,Start Sation Name,Start Station Lat,Start Station Lng,End Station ID,End Station Name,End Station Lat,End Station Lng,bikeid,Member Type,birth year,Gender
0,569,2018-06-01 01:57:20.514,2018-06-01 02:06:50.088,72,W 52 St & 11 Ave,40.767272,-73.993929,173,Broadway & W 49 St,40.760683,-73.984527,21481,Subscriber,1999,male
1,480,2018-06-01 02:02:42.398,2018-06-01 02:10:43.354,72,W 52 St & 11 Ave,40.767272,-73.993929,477,W 41 St & 8 Ave,40.756405,-73.990026,19123,Subscriber,1988,male
2,692,2018-06-01 02:04:23.624,2018-06-01 02:15:55.747,72,W 52 St & 11 Ave,40.767272,-73.993929,457,Broadway & W 58 St,40.766953,-73.981693,26983,Subscriber,1986,male
3,664,2018-06-01 03:00:55.461,2018-06-01 03:11:59.906,72,W 52 St & 11 Ave,40.767272,-73.993929,379,W 31 St & 7 Ave,40.749156,-73.9916,26742,Subscriber,1973,male
4,818,2018-06-01 06:04:54.427,2018-06-01 06:18:32.617,72,W 52 St & 11 Ave,40.767272,-73.993929,459,W 20 St & 11 Ave,40.746745,-74.007756,26386,Subscriber,1984,male


In [21]:
# export combined dataframe to a new csv
june181920.to_csv("june18-20.csv", index = False)