In [55]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from shapely.geometry import Point

%matplotlib inline

In [42]:
may_trips = pd.read_csv('../data/may_trip.csv')
june_trips = pd.read_csv('../data/june_trip.csv')
july_trips = pd.read_csv('../data/july_trip.csv')

In [43]:
may_trips.shape

(224969, 16)

In [44]:
june_trips.shape

(205627, 16)

In [45]:
july_trips.shape

(134926, 16)

In [46]:
224969+205627+134926

565522

In [47]:
all_trips = pd.concat([may_trips, june_trips, july_trips])

In [48]:
all_trips.head()

Unnamed: 0,pubTimeStamp,companyName,tripRecordNum,sumdID,tripDuration,tripDistance,startDate,startTime,endDate,endTime,startLatitude,startLongitude,endLatitude,endLongitude,tripRoute,create_dt
0,2019-05-01 00:00:55.423000,Bird,BRD2134,Powered9EAJL,3.0,958.00528,2019-05-01 00:00:00,00:00:20.460000,2019-05-01 00:00:00,00:02:52.346666,36.1571,-86.8036,36.1566,-86.8067,"[[36.157235,-86.803612],[36.157235,-86.80362],...",2019-05-02 05:30:23.780000
1,2019-05-01 00:03:33.147000,Lyft,LFT5,Powered296631,1.7156,1371.39112,2019-05-01 00:00:00,00:01:50.090000,2019-05-01 00:00:00,00:03:33.026666,36.15797,-86.77896,36.16054,-86.77689,"[[36.15797,-86.77896],[36.15795,-86.77873],[36...",2019-05-02 07:20:32.757000
2,2019-05-01 00:05:55.570000,Bird,BRD2168,Powered7S2UU,3.0,2296.588,2019-05-01 00:00:00,00:03:47.363333,2019-05-01 00:00:00,00:07:13.596666,36.1547,-86.7818,36.1565,-86.7868,"[[36.155068,-86.782124],[36.156597,-86.78675]]",2019-05-02 05:30:24.530000
3,2019-05-01 00:05:55.570000,Bird,BRD2166,PoweredZIIVX,3.0,1200.78744,2019-05-01 00:00:00,00:04:21.386666,2019-05-01 00:00:00,00:06:59.176666,36.1494,-86.7795,36.1531,-86.7796,"[[36.149741,-86.779344],[36.149741,-86.779327]...",2019-05-02 05:30:24.237000
4,2019-05-01 00:05:55.570000,Bird,BRD2165,PoweredJ7MB3,2.0,351.04988,2019-05-01 00:00:00,00:04:27.796666,2019-05-01 00:00:00,00:06:23.150000,36.1778,-86.7866,36.1774,-86.7876,"[[36.177699,-86.786477],[36.177711,-86.786469]...",2019-05-02 05:30:24.207000


In [49]:
all_trips = all_trips.rename(columns={"pubTimeStamp": "time_stamp",
                        "companyName": "company_name",
                        "tripRecordNum": "trip_number",
                        "sumdID": "scooter_id",
                         "tripDuration": "trip_duration",
                         "tripDistance": "trip_distance",
                         "startDate": "start_date",
                         "startTime": "start_time",
                         "endDate": "end_date",
                         "endTime": "end_time",
                         "startLatitude": "start_lat",
                         "startLongitude": "start_lon",
                         "endLatitude": "end_lat",
                         "endLongitude": "end_lon",
                         "tripRoute": "trip_route",
                         "create_dt": "created_date"})

In [50]:
all_trips.shape

(565522, 16)

In [51]:
all_trips.dtypes

time_stamp        object
company_name      object
trip_number       object
scooter_id        object
trip_duration    float64
trip_distance    float64
start_date        object
start_time        object
end_date          object
end_time          object
start_lat        float64
start_lon        float64
end_lat          float64
end_lon          float64
trip_route        object
created_date      object
dtype: object

In [52]:
all_trips['end_date'] = all_trips['end_date'].astype('datetime64')

In [54]:
all_trips['start_date'] = all_trips['start_date'].astype('datetime64')

In [56]:
all_trips['start_geo'] = all_trips.apply(lambda x: Point((x.start_lon, 
                                                         x.start_lat)), 
                                        axis=1)

In [57]:
all_trips['end_geo'] = all_trips.apply(lambda x: Point((x.end_lon, 
                                                         x.end_lat)), 
                                        axis=1)

In [58]:
all_trips.dtypes

time_stamp               object
company_name             object
trip_number              object
scooter_id               object
trip_duration           float64
trip_distance           float64
start_date       datetime64[ns]
start_time               object
end_date         datetime64[ns]
end_time                 object
start_lat               float64
start_lon               float64
end_lat                 float64
end_lon                 float64
trip_route               object
created_date             object
start_geo                object
end_geo                  object
dtype: object

In [59]:
all_trips.head(3)

Unnamed: 0,time_stamp,company_name,trip_number,scooter_id,trip_duration,trip_distance,start_date,start_time,end_date,end_time,start_lat,start_lon,end_lat,end_lon,trip_route,created_date,start_geo,end_geo
0,2019-05-01 00:00:55.423000,Bird,BRD2134,Powered9EAJL,3.0,958.00528,2019-05-01,00:00:20.460000,2019-05-01,00:02:52.346666,36.1571,-86.8036,36.1566,-86.8067,"[[36.157235,-86.803612],[36.157235,-86.80362],...",2019-05-02 05:30:23.780000,POINT (-86.8036 36.1571),POINT (-86.80670000000001 36.1566)
1,2019-05-01 00:03:33.147000,Lyft,LFT5,Powered296631,1.7156,1371.39112,2019-05-01,00:01:50.090000,2019-05-01,00:03:33.026666,36.15797,-86.77896,36.16054,-86.77689,"[[36.15797,-86.77896],[36.15795,-86.77873],[36...",2019-05-02 07:20:32.757000,POINT (-86.77896 36.15797),POINT (-86.77688999999999 36.16054)
2,2019-05-01 00:05:55.570000,Bird,BRD2168,Powered7S2UU,3.0,2296.588,2019-05-01,00:03:47.363333,2019-05-01,00:07:13.596666,36.1547,-86.7818,36.1565,-86.7868,"[[36.155068,-86.782124],[36.156597,-86.78675]]",2019-05-02 05:30:24.530000,POINT (-86.7818 36.1547),POINT (-86.7868 36.1565)


In [64]:
all_trips['company_name'].value_counts()

Lime             225694
Bird             152745
Lyft             120991
SPIN              34450
Bolt Mobility     21890
JUMP               6437
Gotcha             3315
Name: company_name, dtype: int64

In [66]:
company_month = all_trips.groupby('company_name', 'start_date')

ValueError: No axis named start_date for object type DataFrame