In [1]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
import folium

%matplotlib inline

In [2]:
may_trips = pd.read_csv('../data/may_trip.csv')
june_trips = pd.read_csv('../data/june_trip.csv')
july_trips = pd.read_csv('../data/july_trip.csv')
july_pickle = pd.read_pickle("../data/july.pkl")

In [3]:
may_trips.shape

(224969, 16)

In [4]:
june_trips.shape

(205627, 16)

In [5]:
july_trips.shape

(134926, 16)

In [6]:
224969+205627+134926

565522

In [7]:
all_trips = pd.concat([may_trips, june_trips, july_trips])

In [8]:
july_trips = july_trips.rename(columns={"pubTimeStamp": "time_stamp",
                        "companyName": "company_name",
                        "tripRecordNum": "trip_number",
                        "sumdID": "scooter_id",
                         "tripDuration": "trip_duration",
                         "tripDistance": "trip_distance",
                         "startDate": "start_date",
                         "startTime": "start_time",
                         "endDate": "end_date",
                         "endTime": "end_time",
                         "startLatitude": "start_lat",
                         "startLongitude": "start_lon",
                         "endLatitude": "end_lat",
                         "endLongitude": "end_lon",
                         "tripRoute": "trip_route",
                         "create_dt": "created_date"})
july_trips.head()

Unnamed: 0,time_stamp,company_name,trip_number,scooter_id,trip_duration,trip_distance,start_date,start_time,end_date,end_time,start_lat,start_lon,end_lat,end_lon,trip_route,created_date
0,2019-07-01 00:02:52.213000,Bird,BRD5179,PoweredZSHKJ,2.0,0.0,2019-07-01 00:00:00,00:01:32.290000,2019-07-01 00:00:00,00:03:55.966666,36.1775,-86.7516,36.1778,-86.7514,"[[36.177438,-86.751861],[36.177425,-86.751987]...",2019-07-02 05:30:18.333000
1,2019-07-01 00:02:52.213000,Bird,BRD5177,Powered22JF6,0.0,0.0,2019-07-01 00:00:00,00:02:38.640000,2019-07-01 00:00:00,00:02:54.406666,36.1627,-86.7711,36.1627,-86.7711,"[[36.162718,-86.771102],[36.162718,-86.771102]]",2019-07-02 05:30:18.273000
2,2019-07-01 00:02:52.213000,Bird,BRD5176,Powered22JF6,1.0,0.0,2019-07-01 00:00:00,00:03:04.333333,2019-07-01 00:00:00,00:03:37.680000,36.1627,-86.7713,36.1627,-86.7713,"[[36.162734,-86.77122],[36.162706,-86.771325],...",2019-07-02 05:30:18.243000
3,2019-07-01 00:02:52.213000,Bird,BRD5178,PoweredIB2CC,1.0,0.0,2019-07-01 00:00:00,00:02:06.963333,2019-07-01 00:00:00,00:03:25.766666,36.1525,-86.7988,36.1525,-86.7988,"[[36.152499,-86.798708]]",2019-07-02 05:30:18.303000
4,2019-07-01 00:02:52.213000,Bird,BRD5180,PoweredW4G7R,2.0,0.0,2019-07-01 00:00:00,00:01:19.843333,2019-07-01 00:00:00,00:02:53.976666,36.1578,-86.776,36.1575,-86.7758,"[[36.157523,-86.775794]]",2019-07-02 05:30:18.363000


In [9]:
july_trips['end_date'] = july_trips['end_date'].astype('datetime64')

In [10]:
july_trips['start_date'] = july_trips['start_date'].astype('datetime64')

In [11]:
july_trips['start_geo'] = july_trips.apply(lambda x: Point((x.start_lon, 
                                                         x.start_lat)), 
                                        axis=1)

In [12]:
july_trips['end_geo'] = july_trips.apply(lambda x: Point((x.end_lon, 
                                                         x.end_lat)), 
                                        axis=1)

In [13]:
july_trips["scooter_id"].value_counts()

PoweredUNKNOWN          327
Powered5N5J5BJDODHYX    141
PoweredOEHCPCAUHESR5    134
PoweredIHGSNSRXCZ2ZG    128
Powered5BW4GPYGBMUTL    126
                       ... 
PoweredUJHQ7              1
PoweredJXSCI              1
Powered3655517            1
PoweredKURT7              1
PoweredVNWRA              1
Name: scooter_id, Length: 5585, dtype: int64

In [14]:
july_trips.dtypes

time_stamp               object
company_name             object
trip_number              object
scooter_id               object
trip_duration           float64
trip_distance           float64
start_date       datetime64[ns]
start_time               object
end_date         datetime64[ns]
end_time                 object
start_lat               float64
start_lon               float64
end_lat                 float64
end_lon                 float64
trip_route               object
created_date             object
start_geo                object
end_geo                  object
dtype: object

In [15]:
july_trips['dates'] = pd.to_datetime(july_trips['time_stamp']).dt.date
july_trips['time'] = pd.to_datetime(july_trips['time_stamp']).dt.time
july_trips['dates'] = july_trips['dates'].astype('datetime64')
july_trips.dtypes

time_stamp               object
company_name             object
trip_number              object
scooter_id               object
trip_duration           float64
trip_distance           float64
start_date       datetime64[ns]
start_time               object
end_date         datetime64[ns]
end_time                 object
start_lat               float64
start_lon               float64
end_lat                 float64
end_lon                 float64
trip_route               object
created_date             object
start_geo                object
end_geo                  object
dates            datetime64[ns]
time                     object
dtype: object

In [16]:
july_pickle = july_pickle.rename(columns={"pubdatetime": "time_stamp",
                        "companyname": "company_name",
                        "sumdid": "scooter_id"
                         })
july_pickle.columns

Index(['time_stamp', 'latitude', 'longitude', 'scooter_id', 'chargelevel',
       'company_name'],
      dtype='object')

In [17]:
july_pickle['dates'] = pd.to_datetime(july_pickle['time_stamp']).dt.date
july_pickle['time'] = pd.to_datetime(july_pickle['time_stamp']).dt.time
july_pickle['dates'] = july_pickle['dates'].astype('datetime64')
july_pickle.dtypes

time_stamp      datetime64[ns]
latitude               float64
longitude              float64
scooter_id              object
chargelevel            float64
company_name             int64
dates           datetime64[ns]
time                    object
dtype: object

In [18]:
jul_01 = july_trips['dates']== "2019-07-01"
july_01 = july_trips[jul_01]
#only unique IDs/then take the July pickle file and
#condense it down to unique ids - how to subtract all of a series from another list or series
#maybe pick a brand or a certain time
july_01.shape

(4248, 20)

In [19]:
july_01_un = july_01.drop_duplicates(subset = ["scooter_id"])
july_01_un.shape
#turn this into a list, and then do the same for the pickle data
#match it on both sides - once you have the lists, do a loc/isin with a ~

(2050, 20)

In [20]:
jul_list = july_01_un["scooter_id"].tolist

In [21]:
jul_pickle = july_pickle['dates']== "2019-07-01"
july_pickle_01 = july_pickle[jul_pickle]
july_pickle_01["scooter_id"] = july_pickle_01["scooter_id"].astype('|S')
july_pickle_01.dtypes

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  july_pickle_01["scooter_id"] = july_pickle_01["scooter_id"].astype('|S')


time_stamp      datetime64[ns]
latitude               float64
longitude              float64
scooter_id                |S44
chargelevel            float64
company_name             int64
dates           datetime64[ns]
time                    object
dtype: object

In [22]:
july_pickle_un = july_pickle_01.drop_duplicates(subset = ["scooter_id"])
july_pickle_un.shape

(5082, 8)

In [23]:
jp01_filtered = july_pickle_un.loc[july_pickle_un["scooter_id"].isin([jul_list])]
jp01_filtered.head(1)

Unnamed: 0,time_stamp,latitude,longitude,scooter_id,chargelevel,company_name,dates,time


In [24]:
jpu_list = july_pickle_un["scooter_id"].tolist

In [25]:
i = 0
for row_index, row_values in july_01_un.iterrows():
    if i <3: 
        print('index is', row_index)
        print('values are:')
        print(' ')
        print(row_values)
        print('------------------------- ')
        i+=1

index is 0
values are:
 
time_stamp                              2019-07-01 00:02:52.213000
company_name                                                  Bird
trip_number                                                BRD5179
scooter_id                                            PoweredZSHKJ
trip_duration                                                  2.0
trip_distance                                                  0.0
start_date                                     2019-07-01 00:00:00
start_time                                         00:01:32.290000
end_date                                       2019-07-01 00:00:00
end_time                                           00:03:55.966666
start_lat                                                  36.1775
start_lon                                                 -86.7516
end_lat                                                    36.1778
end_lon                                                   -86.7514
trip_route       [[36.177438,-86.7518

In [None]:
i = 0
for row_index, row_values in july_pickle.iterrows():
    if i <3: 
        print('index is', row_index)
        print('values are:')
        print(' ')
        print(row_values)
        print('------------------------- ')
        i+=1

index is 0
values are:
 
time_stamp      2019-07-01 00:00:33.550000
latitude                         36.156678
longitude                       -86.809004
scooter_id                   Powered635135
chargelevel                           22.0
company_name                             1
dates                  2019-07-01 00:00:00
time                       00:00:33.550000
Name: 0, dtype: object
------------------------- 
index is 1
values are:
 
time_stamp      2019-07-01 00:00:34.973000
latitude                         36.145674
longitude                       -86.794138
scooter_id                   Powered790946
chargelevel                           33.0
company_name                             1
dates                  2019-07-01 00:00:00
time                       00:00:34.973000
Name: 1, dtype: object
------------------------- 
index is 2
values are:
 
time_stamp      2019-07-01 00:00:41.183000
latitude                         36.179319
longitude                       -86.751538
scooter_

In [None]:
#load in map of nashville
nash_map = folium.Map(location = [36.1612, -86.7775], zoom_start=12)

for ind, row in july_01_un.iterrows():
    point = [row['start_lat'], row['start_lon']]
    message = row['scooter_id'] + ': ' + 'scooter_id'
    folium.CircleMarker(location = point, popup = message, radius=4).add_to(nash_map)

nash_map