In [1]:
import pandas as pd
from haversine import haversine, Unit

In [2]:
bike = pd.read_csv('2017-fordgobike-tripdata.csv')

In [3]:
bike.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 519700 entries, 0 to 519699
Data columns (total 15 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   duration_sec             519700 non-null  int64  
 1   start_time               519700 non-null  object 
 2   end_time                 519700 non-null  object 
 3   start_station_id         519700 non-null  int64  
 4   start_station_name       519700 non-null  object 
 5   start_station_latitude   519700 non-null  float64
 6   start_station_longitude  519700 non-null  float64
 7   end_station_id           519700 non-null  int64  
 8   end_station_name         519700 non-null  object 
 9   end_station_latitude     519700 non-null  float64
 10  end_station_longitude    519700 non-null  float64
 11  bike_id                  519700 non-null  int64  
 12  user_type                519700 non-null  object 
 13  member_birth_year        453159 non-null  float64
 14  memb

In [4]:
bike.head(20)

Unnamed: 0,duration_sec,start_time,end_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bike_id,user_type,member_birth_year,member_gender
0,80110,2017-12-31 16:57:39.6540,2018-01-01 15:12:50.2450,74,Laguna St at Hayes St,37.776435,-122.426244,43,San Francisco Public Library (Grove St at Hyde...,37.778768,-122.415929,96,Customer,1987.0,Male
1,78800,2017-12-31 15:56:34.8420,2018-01-01 13:49:55.6170,284,Yerba Buena Center for the Arts (Howard St at ...,37.784872,-122.400876,96,Dolores St at 15th St,37.76621,-122.426614,88,Customer,1965.0,Female
2,45768,2017-12-31 22:45:48.4110,2018-01-01 11:28:36.8830,245,Downtown Berkeley BART,37.870348,-122.267764,245,Downtown Berkeley BART,37.870348,-122.267764,1094,Customer,,
3,62172,2017-12-31 17:31:10.6360,2018-01-01 10:47:23.5310,60,8th St at Ringold St,37.77452,-122.409449,5,Powell St BART Station (Market St at 5th St),37.783899,-122.408445,2831,Customer,,
4,43603,2017-12-31 14:23:14.0010,2018-01-01 02:29:57.5710,239,Bancroft Way at Telegraph Ave,37.868813,-122.258764,247,Fulton St at Bancroft Way,37.867789,-122.265896,3167,Subscriber,1997.0,Female
5,9226,2017-12-31 22:51:00.9180,2018-01-01 01:24:47.1660,30,San Francisco Caltrain (Townsend St at 4th St),37.776598,-122.395282,30,San Francisco Caltrain (Townsend St at 4th St),37.776598,-122.395282,1487,Customer,,
6,4507,2017-12-31 23:49:28.4220,2018-01-01 01:04:35.6190,259,Addison St at Fourth St,37.866249,-122.299371,259,Addison St at Fourth St,37.866249,-122.299371,3539,Customer,1991.0,Female
7,4334,2017-12-31 23:46:37.1960,2018-01-01 00:58:51.2110,284,Yerba Buena Center for the Arts (Howard St at ...,37.784872,-122.400876,284,Yerba Buena Center for the Arts (Howard St at ...,37.784872,-122.400876,1503,Customer,,
8,4150,2017-12-31 23:37:07.5480,2018-01-01 00:46:18.3080,20,Mechanics Monument Plaza (Market St at Bush St),37.7913,-122.399051,20,Mechanics Monument Plaza (Market St at Bush St),37.7913,-122.399051,3125,Customer,,
9,4238,2017-12-31 23:35:38.1450,2018-01-01 00:46:17.0530,20,Mechanics Monument Plaza (Market St at Bush St),37.7913,-122.399051,20,Mechanics Monument Plaza (Market St at Bush St),37.7913,-122.399051,2543,Customer,,


In [5]:
# Check if there are any records with out start location details
bike.start_station_latitude.value_counts().sum() ==  bike.start_station_longitude.value_counts().sum()

True

In [6]:
# Check if there are any records with out end location details
bike.end_station_latitude.value_counts().sum() == bike.end_station_longitude.value_counts().sum()

True

In [7]:
#Check if there are any duplicated records
bike.duplicated().sum()

0

In [8]:
def haversine_func(data):
    lon1 = data["start_station_longitude"]
    lat1 = data["start_station_latitude"]
    lon2 = data["end_station_longitude"]
    lat2 = data["end_station_latitude"]
    return haversine((lat1,lon1),(lat2,lon2))

In [9]:
bike['haversine_dist'] = bike.apply(lambda bike : haversine_func(bike), axis = 1)

##  2 - Calculate the duration in seconds of each trip

In [10]:
bike['start_time']=pd.to_datetime(bike['start_time'])

In [11]:
bike['end_time']=pd.to_datetime(bike['end_time'])

In [12]:
bike["trip_duration_in_sec"] = bike["end_time"]-bike["start_time"]

In [13]:
bike["trip_duration_in_sec"]

0        0 days 22:15:10.591000
1        0 days 21:53:20.775000
2        0 days 12:42:48.472000
3        0 days 17:16:12.895000
4        0 days 12:06:43.570000
                  ...          
519695   0 days 00:07:15.910000
519696   0 days 00:07:11.459000
519697   0 days 00:07:04.840000
519698   0 days 00:06:06.051000
519699   0 days 00:03:08.961000
Name: trip_duration_in_sec, Length: 519700, dtype: timedelta64[ns]

In [14]:
bike["trip_duration_in_sec"] = bike["trip_duration_in_sec"].astype("timedelta64[s]")
bike["trip_duration_in_sec"]

0         80110.0
1         78800.0
2         45768.0
3         62172.0
4         43603.0
           ...   
519695      435.0
519696      431.0
519697      424.0
519698      366.0
519699      188.0
Name: trip_duration_in_sec, Length: 519700, dtype: float64

In [15]:
bike.head()

Unnamed: 0,duration_sec,start_time,end_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bike_id,user_type,member_birth_year,member_gender,haversine_dist,trip_duration_in_sec
0,80110,2017-12-31 16:57:39.654,2018-01-01 15:12:50.245,74,Laguna St at Hayes St,37.776435,-122.426244,43,San Francisco Public Library (Grove St at Hyde...,37.778768,-122.415929,96,Customer,1987.0,Male,0.942932,80110.0
1,78800,2017-12-31 15:56:34.842,2018-01-01 13:49:55.617,284,Yerba Buena Center for the Arts (Howard St at ...,37.784872,-122.400876,96,Dolores St at 15th St,37.76621,-122.426614,88,Customer,1965.0,Female,3.069731,78800.0
2,45768,2017-12-31 22:45:48.411,2018-01-01 11:28:36.883,245,Downtown Berkeley BART,37.870348,-122.267764,245,Downtown Berkeley BART,37.870348,-122.267764,1094,Customer,,,0.0,45768.0
3,62172,2017-12-31 17:31:10.636,2018-01-01 10:47:23.531,60,8th St at Ringold St,37.77452,-122.409449,5,Powell St BART Station (Market St at 5th St),37.783899,-122.408445,2831,Customer,,,1.046624,62172.0
4,43603,2017-12-31 14:23:14.001,2018-01-01 02:29:57.571,239,Bancroft Way at Telegraph Ave,37.868813,-122.258764,247,Fulton St at Bancroft Way,37.867789,-122.265896,3167,Subscriber,1997.0,Female,0.63634,43603.0


In [26]:
bike["user_type"].unique()

array(['Customer', 'Subscriber'], dtype=object)

### By assuming each minute cost 0.35 cent calculate the fee for each trip

In [16]:
def cost_of_trip(trip_duration):
    cost_of_a_trip = (trip_duration / 60) * 0.35
    return cost_of_a_trip

In [17]:
bike["Cost of trip"] = bike["trip_duration_in_sec"].apply(cost_of_trip)

In [18]:
bike.head()

Unnamed: 0,duration_sec,start_time,end_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bike_id,user_type,member_birth_year,member_gender,haversine_dist,trip_duration_in_sec,Cost of trip
0,80110,2017-12-31 16:57:39.654,2018-01-01 15:12:50.245,74,Laguna St at Hayes St,37.776435,-122.426244,43,San Francisco Public Library (Grove St at Hyde...,37.778768,-122.415929,96,Customer,1987.0,Male,0.942932,80110.0,467.308333
1,78800,2017-12-31 15:56:34.842,2018-01-01 13:49:55.617,284,Yerba Buena Center for the Arts (Howard St at ...,37.784872,-122.400876,96,Dolores St at 15th St,37.76621,-122.426614,88,Customer,1965.0,Female,3.069731,78800.0,459.666667
2,45768,2017-12-31 22:45:48.411,2018-01-01 11:28:36.883,245,Downtown Berkeley BART,37.870348,-122.267764,245,Downtown Berkeley BART,37.870348,-122.267764,1094,Customer,,,0.0,45768.0,266.98
3,62172,2017-12-31 17:31:10.636,2018-01-01 10:47:23.531,60,8th St at Ringold St,37.77452,-122.409449,5,Powell St BART Station (Market St at 5th St),37.783899,-122.408445,2831,Customer,,,1.046624,62172.0,362.67
4,43603,2017-12-31 14:23:14.001,2018-01-01 02:29:57.571,239,Bancroft Way at Telegraph Ave,37.868813,-122.258764,247,Fulton St at Bancroft Way,37.867789,-122.265896,3167,Subscriber,1997.0,Female,0.63634,43603.0,254.350833


### Calculating the total distance for each bike and listing top 10

In [19]:
bike_total_dist = bike.groupby('bike_id')[['haversine_dist']].sum().sort_values(by='haversine_dist', ascending=False)
bike_total_dist.head(10)

Unnamed: 0_level_0,haversine_dist
bike_id,Unnamed: 1_level_1
68,743.36949
2178,721.182231
256,671.91615
235,670.161992
2049,656.828082
441,656.642275
2226,647.823268
796,646.867722
190,640.294175
2365,639.412701


In [20]:
 bike['member_gender'].fillna("Not_filled", inplace = True)

In [21]:
bike

Unnamed: 0,duration_sec,start_time,end_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bike_id,user_type,member_birth_year,member_gender,haversine_dist,trip_duration_in_sec,Cost of trip
0,80110,2017-12-31 16:57:39.654,2018-01-01 15:12:50.245,74,Laguna St at Hayes St,37.776435,-122.426244,43,San Francisco Public Library (Grove St at Hyde...,37.778768,-122.415929,96,Customer,1987.0,Male,0.942932,80110.0,467.308333
1,78800,2017-12-31 15:56:34.842,2018-01-01 13:49:55.617,284,Yerba Buena Center for the Arts (Howard St at ...,37.784872,-122.400876,96,Dolores St at 15th St,37.766210,-122.426614,88,Customer,1965.0,Female,3.069731,78800.0,459.666667
2,45768,2017-12-31 22:45:48.411,2018-01-01 11:28:36.883,245,Downtown Berkeley BART,37.870348,-122.267764,245,Downtown Berkeley BART,37.870348,-122.267764,1094,Customer,,Not_filled,0.000000,45768.0,266.980000
3,62172,2017-12-31 17:31:10.636,2018-01-01 10:47:23.531,60,8th St at Ringold St,37.774520,-122.409449,5,Powell St BART Station (Market St at 5th St),37.783899,-122.408445,2831,Customer,,Not_filled,1.046624,62172.0,362.670000
4,43603,2017-12-31 14:23:14.001,2018-01-01 02:29:57.571,239,Bancroft Way at Telegraph Ave,37.868813,-122.258764,247,Fulton St at Bancroft Way,37.867789,-122.265896,3167,Subscriber,1997.0,Female,0.636340,43603.0,254.350833
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
519695,435,2017-06-28 10:00:54.528,2017-06-28 10:08:10.438,81,Berry St at 4th St,37.775880,-122.393170,45,5th St at Howard St,37.781752,-122.405127,400,Subscriber,1991.0,Male,1.237187,435.0,2.537500
519696,431,2017-06-28 09:56:39.631,2017-06-28 10:03:51.090,66,3rd St at Townsend St,37.778742,-122.392741,321,5th at Folsom,37.780146,-122.403071,316,Subscriber,1973.0,Male,0.921190,431.0,2.514167
519697,424,2017-06-28 09:47:36.347,2017-06-28 09:54:41.187,21,Montgomery St BART Station (Market St at 2nd St),37.789625,-122.400811,48,2nd St at S Park St,37.782411,-122.392706,240,Subscriber,1985.0,Female,1.072757,424.0,2.473333
519698,366,2017-06-28 09:47:41.664,2017-06-28 09:53:47.715,58,Market St at 10th St,37.776619,-122.417385,59,S Van Ness Ave at Market St,37.774814,-122.418954,669,Subscriber,1981.0,Male,0.243515,366.0,2.135000


In [22]:
no_of_trips_per_station_sorted = bike.groupby(["start_station_id", "member_gender"]).count()["duration_sec"].unstack()

In [23]:
no_of_trips_per_station_sorted["sum"] = no_of_trips_per_station_sorted.sum(axis =1)

In [24]:
no_of_trips_per_station_sorted['Female_Ratio'] = (no_of_trips_per_station_sorted['Female'] / no_of_trips_per_station_sorted["sum"]) * 100
no_of_trips_per_station_sorted['Male_Ratio'] = (no_of_trips_per_station_sorted['Male'] / no_of_trips_per_station_sorted["sum"]) * 100
no_of_trips_per_station_sorted['others_Ratio'] = (no_of_trips_per_station_sorted['Other'] / no_of_trips_per_station_sorted["sum"]) * 100
no_of_trips_per_station_sorted['Not_filled_Ratio'] = (no_of_trips_per_station_sorted['Not_filled'] / no_of_trips_per_station_sorted["sum"]) * 100

In [25]:
no_of_trips_per_station_sorted.sort_values(by = "sum", ascending= False).head(10)

member_gender,Female,Male,Not_filled,Other,sum,Female_Ratio,Male_Ratio,others_Ratio,Not_filled_Ratio
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
15,2819.0,8586.0,3678.0,104.0,15187.0,18.561928,56.535195,0.684796,24.218081
6,1938.0,7444.0,4163.0,119.0,13664.0,14.183255,54.478923,0.870902,30.46692
30,2035.0,9813.0,617.0,81.0,12546.0,16.220309,78.216165,0.645624,4.917902
67,2105.0,9409.0,428.0,113.0,12055.0,17.461634,78.050601,0.93737,3.550394
58,1870.0,9005.0,1050.0,35.0,11960.0,15.635452,75.292642,0.292642,8.779264
21,1631.0,8367.0,1293.0,43.0,11334.0,14.39033,73.822128,0.379389,11.408152
81,2302.0,7773.0,816.0,65.0,10956.0,21.011318,70.947426,0.593282,7.447974
3,1768.0,6070.0,2240.0,64.0,10142.0,17.432459,59.850128,0.631039,22.086373
22,1980.0,7512.0,320.0,114.0,9926.0,19.947612,75.680032,1.148499,3.223857
16,1740.0,6532.0,978.0,97.0,9347.0,18.615599,69.883385,1.037766,10.46325
