In [None]:
import pandas as pd
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from shapely.geometry import Point, LineString
import matplotlib.pyplot as plt
import numpy as np
from sqlalchemy import create_engine, text

# We start by importing zip codes

In [None]:
zipcodes = gpd.read_file('../data/zipcodes.geojson')
zipcodes = gpd.GeoDataFrame(zipcodes)
zipcodes.head()

In [None]:
area_center = [36.23245257407244, -86.77676762469858]

In [None]:
nash_map = folium.Map(location =  area_center, zoom_start = 11)

folium.GeoJson(zipcodes).add_to(nash_map)

nash_map

# Next, we bring in some of the data from the scooters table

In [None]:
database_name = 'scooters'

connection_string = f"postgresql://postgres:postgres@localhost:5050/{database_name}"

engine = create_engine(connection_string)

In [None]:
query = '''
SELECT latitude, longitude
FROM scooters
WHERE pubdatetime::varchar LIKE '2019-07-19%'
--LIMIT 100000
'''

with engine.connect() as connection:    
    scooters_sample = pd.read_sql(text(query), con = connection)
    
scooters_sample

## Then we add this sample to the map

In [None]:
scooters_sample = gpd.GeoDataFrame(scooters_sample)

In [None]:
area_center = [36.23245257407244, -86.77676762469858]
nash_map = folium.Map(location =  area_center, zoom_start = 11)

marker_cluster = MarkerCluster().add_to(nash_map)

folium.GeoJson(zipcodes).add_to(nash_map)

for row_index, row_value in scooters_sample.iterrows():
    loc = [row_value['latitude'], row_value['longitude']]
    icon=folium.Icon(color="red",icon="bicycle", prefix='fa')

    print(loc)
    
    marker = folium.Marker(
        location = loc, 
        icon = icon) 

    marker.add_to(marker_cluster)

#nash_map

## Now we add a geometry column to the sample and join it with the zipcodes

In [None]:
scooters_sample['geometry'] = scooters_sample.apply(lambda x: Point((float(x.longitude), # lambda is a way to create a function that is not saved afterward
                                                                     float(x.latitude))), # Point, which we imported, takes two float values and outputs a point
                                                    axis=1)
scooters_sample

In [None]:
scooters_sample.crs = zipcodes.crs

In [None]:
zip_sample = gpd.sjoin(scooters_sample, zipcodes, op = 'within')
zip_sample

## Lastly, group by zipcode to see which zipcode had the most abandoned scooters

In [None]:
scooter_zip = zip_sample.groupby('zip')['zip'].count().sort_values(ascending = False)
scooter_zip

# Moving on to something slightly different
## We'll now take a look at which zipcodes were the most popular for trips

In [None]:
query = '''
SELECT triproute
FROM trips
WHERE triproute <> '[]';
'''

with engine.connect() as connection:    
    trips = pd.read_sql(text(query), con = connection)
    
trips

## Let's look at the triproute column and turn it into a set
### NOTE: the eval() function works for this scenario but shouldn't be used when developing applications because it can easily allow for arbitrary code execution (which is a powerful hacking method)

In [None]:
type(trips.triproute[0])

In [None]:
routes = trips.triproute

In [None]:
x = eval(routes[0])
x

In [None]:
x[0]

In [None]:
x[0][0]

In [None]:
coord_list = []

for route in routes:
    route_list = eval(route) # This turns each route into a list, meaning I can iterate through its elements
    #print('Route is:', route_list)

    s_lat = route_list[0][0]
    s_lng = route_list[0][1]
    start = (s_lng, s_lat)
    #print('Starts at', start)
    
    e_lat = route_list[len(route_list)-1][0]
    e_lng = route_list[len(route_list)-1][1]
    end = (e_lng, e_lat)
    #print('Ends at', end, '\n')

    coord_list.append([start, end])
    
    ''' *This kinda works but I don't need to use it*
    for coord in route_list:
        lat = coord[0]
        #print('Lat is:', lat)
        lng = coord[1]
        #print('Lng is:', lng)
        coord_list.append((lat,lng))
    '''

coord_list

In [None]:
len(coord_list)

## Now, with the coordinates all together in a hashable list, we'll make a GeoDataFrame with the starting and ending coordinates of each trip

In [None]:
coord_df = gpd.GeoDataFrame(coord_list, columns = ['start_coord', 'end_coord'])
coord_df

In [None]:
coord_df.loc[0, 'start_coord']

## And now we add a geometry column and a crs to have it ready to map and join with zipcodes

In [None]:
geometry = []

for row, value in coord_df.iterrows():
    geo = (coord_df.loc[row, 'start_coord'], coord_df.loc[row, 'end_coord'])
    geometry.append(LineString(geo))

geometry

In [None]:
coord_df['geometry'] = geometry
coord_df

In [None]:
coord_df.crs = zipcodes.crs

In [None]:
coord_df.loc[0, 'start_coord'][0]

In [None]:
trip_map = folium.Map(location =  area_center, zoom_start = 11)

folium.GeoJson(coord_df).add_to(trip_map)
'''
for index, value in coord_df.iterrows():
    loc = [coord_df.loc[index, 'start_coord'][0], coord_df.loc[index, 'start_coord'][1]]
    icon=folium.Icon(color="red",icon="bicycle", prefix='fa')

    marker = folium.Marker(
        location = loc, 
        icon = icon)

    marker.add_to(trip_map)
'''
trip_map

## While we see the route of each trip, there's too much overlap to decipher anything. So, let's take a look at the starting and ending coordinates of each trip individually to see which zipcodes are popular.

In [None]:
trip_map = folium.Map(location =  area_center, zoom_start = 11)

folium.GeoJson(zipcodes).add_to(trip_map)

marker_cluster = MarkerCluster().add_to(trip_map)

for index, value in coord_df.iterrows():

    ### add the starting coordinates of each trip
    #print(index, value,'\n')
    loc = [float(coord_df.loc[index, 'start_coord'][1]), float(coord_df.loc[index, 'start_coord'][0])]
    icon=folium.Icon(color="green",icon="bicycle", prefix='fa')
    
    marker = folium.Marker(
        location = loc, 
        icon = icon)

    marker.add_to(marker_cluster)


    ### add the ending coordinates of each trip
    loc = [float(coord_df.loc[index, 'end_coord'][1]), float(coord_df.loc[index, 'end_coord'][0])]
    icon=folium.Icon(color="red",icon="bicycle", prefix='fa')
    
    marker = folium.Marker(
        location = loc, 
        icon = icon)

    marker.add_to(marker_cluster)

trip_map

## Finally, let's break down the start and end coordinates so we can group by zipcode

In [None]:
start_zip = gpd.GeoDataFrame(coord_df['start_coord'])
#start_zip.crs = zipcodes.crs
#start_zip

In [None]:
geometry = []

for row, value in start_zip.iterrows():
    geo = (coord_df.loc[row, 'start_coord'])
    geometry.append(Point(geo))

In [None]:
start_zip['geometry'] = geometry
start_zip.crs = zipcodes.crs

In [None]:
start_zip = gpd.sjoin(start_zip, zipcodes, op = 'within').reset_index(drop = True)
#start_zip

In [None]:
#start_zip

In [None]:
start_zip = start_zip.groupby('zip')['zip'].count().sort_values(ascending = False)
start_zip

In [None]:
end_zip = gpd.GeoDataFrame(coord_df['end_coord'])

geometry = []

for row, value in end_zip.iterrows():
    geo = (end_zip.loc[row, 'end_coord'])
    geometry.append(Point(geo))

end_zip['geometry'] = geometry
end_zip.crs = zipcodes.crs

In [None]:
end_zip = gpd.sjoin(end_zip, zipcodes, op = 'within').reset_index(drop = True)

In [None]:
end_zip = end_zip.groupby('zip')['zip'].count().sort_values(ascending = False)
#end_zip

In [None]:
print('Starting Zipcodes:\n',start_zip,'\n\nEnding Zipcodes:\n', end_zip)#,'\n\nAll Scooter Zipcodes:\n',scooter_zip)