In [1]:
from sqlalchemy import create_engine, text
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

In [3]:
#read in tripsdf csv created in the other notebook
tripsdf = pd.read_csv('data/tripsdf.csv')

In [4]:
#Clean the company names in the trips table so that they match the scooters table
tripsdf['companyname'] = tripsdf['companyname'].str.replace('Bolt Mobility','Bolt').str.replace('SPIN','Spin').str.replace('JUMP','Jump')

# Metro would like to know how many scooters are needed, and something that could help with this is knowing peak demand. 

## Estimate the highest count of scooters being used at the same time. 

## When were the highest volume times? 

## Does this vary by zip code or other geographic region?

In [5]:
#This changes the starttime and startdate columns to datetime:
tripsdf['startdatetime'] = pd.to_datetime(tripsdf['startdate'].astype('str')+' '+tripsdf['starttime'].astype('str'), format ='mixed')

#This gives us the count of scooters used per hour for the duration of the dataset
scooters_per_hour = tripsdf.assign(hour = tripsdf['startdatetime'].dt.hour).groupby(['hour'])['startdatetime'].count().reset_index()
scooters_per_hour.sort_values(['startdatetime'], ascending = False).head(24)


Unnamed: 0,hour,startdatetime
23,23,44832
18,18,44717
19,19,44698
0,0,43462
17,17,43339
20,20,41973
21,21,40902
22,22,38149
16,16,36433
1,1,32690


In [None]:
print('weekend_hours', 'count')

In [None]:
plt.plot(scooters_per_hour)
plt.show()

#This gives us the count of scooters used per day_of_week for the duration of the dataset
scooters_per_day = tripsdf.assign(day_of_week = tripsdf['startdatetime'].dt.day_name).groupby(['day_of_week'])['startdatetime'].count().reset_index()
scooters_per_day.head()

#Couldn't get this to work

In [None]:
#Next, lets look at the concentration of scooters per zipcode:

#read in zip codes file
zipcodes = gpd.read_file('zipcodes.geojson')
#optional: print(zipcodes.crs)
#optional: zipcodes.head()

#convert each lat and long point to a geodataframe, matching the CRS of the zipcodes file
trips_start_geo = gpd.GeoDataFrame(tripsdf,
                           crs = zipcodes.crs,
                           geometry = gpd.points_from_xy(tripsdf.startlongitude, tripsdf.startlatitude))
trips_end_geo = gpd.GeoDataFrame(tripsdf,
                           crs = zipcodes.crs,
                           geometry = gpd.points_from_xy(tripsdf.endlongitude, tripsdf.endlatitude))

#clean up the zipcodes dataframe to only relevant info
zipcodes = zipcodes[['zip', 'po_name', 'geometry']]

#list of trip start points within each zip code
start_points_by_zip = gpd.sjoin(trips_start_geo, zipcodes, predicate = 'within')

#list of trip end points within each zip code
end_points_by_zip = gpd.sjoin(trips_end_geo, zipcodes, predicate = 'within')

In [None]:
#create a dataframe that records the count of trips that start in each zip code
start_points = start_points_by_zip['zip'].value_counts().to_frame().reset_index()

In [None]:
#create a dataframe that records the count of trips that end in each zip code
end_points = end_points_by_zip['zip'].value_counts().to_frame().reset_index()

In [None]:
#combine start points with zipcode data to make "start_zips"
start_zips = zipcodes.merge(start_points, left_on='zip', right_on='zip')

In [None]:
#combine end points with zipcode data to make "end_zips"
end_zips = zipcodes.merge(end_points, left_on='zip', right_on='zip')

In [None]:
fig, ax = plt.subplots(figsize=(10,10))
start_zips.plot(column = 'count',
              cmap = 'RdPu', 
              legend = True,
              edgecolor = 'black', 
              ax = ax)
plt.title('Scooter Usage by Zipcode', fontsize = 14)
ax.axis('off')

#### Answer: Top 3 zip codes by trip volume are 37203, 37201, and 37219

In [None]:
#Lets look at the scooters in the highest density zipcode: 37203
        #Note: You can use this code to check the counts per zip: 
        #start_points.head()

#First, filter the full dataframe (start_points_by_zip) to get the scooters that started trips in 37203. 
#You have to use that specific variable bc its the one right after the geopandas sjoin that still includes all the individual rows
start_zips_37203 = start_points_by_zip.loc[start_points_by_zip['zip'].isin(['37203', '37201', '37219'])]
start_zips_37203.info()

In [None]:
#convert startdatetime to a datetime format
start_zips_37203['startdatetime'] = pd.to_datetime(start_zips_37203['startdate'].astype('str')+' '+start_zips_37203['starttime'].astype('str'), format ='mixed')


In [None]:
start_zips_37203.info()

In [None]:
#create a new column that indicates what hour each trip is
start_zips_37203['hour'] = start_zips_37203['startdatetime'].dt.hour

In [None]:
#create a new column that indicates what day of the week each trip is
start_zips_37203['week_day'] = start_zips_37203['startdatetime'].dt.day_name()

In [None]:
#notice the dataframe now has hour and week_day as columns
start_zips_37203.head()

In [None]:
#This will show which days of the week had the most trips
start_zips_37203[['sumdid', 'week_day']].groupby(['week_day'])['sumdid']\
                                                        .nunique()\
                                                        .reset_index(name = 'count')\
                                                        .sort_values(['count'], ascending = False)

In [None]:
scooters_per_hour_on_weekend = start_zips_37203.groupby(['hour'])['startdatetime'].count().reset_index().sort_values(by = 'startdatetime', ascending = False)
scooters_per_hour_on_weekend.head(24)

In [None]:
#filter the zip code geodataframe to just the polygon (row) for 37203
polygon37203 = zipcodes.loc[zipcodes['zip'].isin(['37203', '37201', '37219'])]
polygon37203.shape

In [None]:
#First plot the polygon for 37203. coloring it light green
#Next plot the points for the scooters and color them by hour of the day

ax = polygon37203.plot(figsize = (8, 10), color = 'lightgreen')
start_zips_37203.plot( ax = ax, column = 'hour');
plt.show()


### Folium maps require a center point for the street map. 

In [None]:
#Make use of a GeoSeries attribute (.centroid) which gives the center of a polygon
center = polygon37203.geometry.centroid[35]
print(center)

### Folium requires a location point as an array 
- with *latitude* first
- but shapely Points have *longitude* first 

In [None]:
# reverse the order when constructing the array for folium location
area_center = [center.y, center.x]

# check the order of area_center, the location for our folium map
print(area_center)

### To create a folium map just call the folium Map() constructor 
- location is the only argument required by the constructor
- Set area_center as the location of the folium map.
- zoom_start is an optional argument
- A setting of 12 should get us nice and close

In [None]:
#create our map of Nashville Zip Code 37203 and show it
map_37203 = folium.Map(location =  area_center, zoom_start = 12)
map_37203

#### Next, let's add our 37207 polygon and bus stop markers.

#### `itterows()` is a generator that iterates through the rows of a DataFrame and returns a tuple with the row id and row values. 
- Below, we are printing the row values for the first 3 row as we iterate through the GeoDataFrame of scooter tripss tarted in 37203. 
- This idea will be helpful for creating our markers!

In [None]:
i = 0
for row_index, row_values in start_zips_37203.iterrows():
    if i <3: 
        print('index is', row_index)
        print('values are:')
        print(' ')
        print(row_values)
        print('------------------------- ')
        i+=1

#### Create map_37203 again with additional features:
- add the zipcode area outline (polygon_37203)
- iterate through the start_zips_37203 to
    - create location from each lat and lng
    - create a popup from the hour column
    - create a custom icon if you want with [font-awesome](https://fontawesome.com/v4.7.0/icons/)
    - build a marker from each location and popup and (optionally) your custom icon 
    - add the marker to  `map_37203`
- display `map_37203`

In [None]:
#There are over 25k markers in the start_zips_37203 dataframe so we amy need to filter it:
    #check number of rows: start_zips_37203.shape
start_zips_37203_hours_12_to_1pm = start_zips_37203[start_zips_37203['hour'].isin([12, 13])]
start_zips_37203_hours_12_to_1pm.head()

In [None]:
start_zips_37203_hours_12_to_1pm.shape

In [None]:
start_zips_37203_peak_hours = start_zips_37203[(start_zips_37203['hour'].isin([18, 19, 23])) & (start_zips_37203['week_day'].isin(['Saturday', 'Sunday', 'Friday']))] 
start_zips_37203_peak_hours.shape

#### If you have a lot of markers, you might want to use marker clusters
- Folium has `MarkerCluster()` and `FastMarkerCluster()`
- Construct a `MarkerCluster()` 
    - construct a `MarkerCluster()` and `.add_to()` the map
    - when you loop through the data and create markers add those to the marker cluster
- Construct a `FastMarkerCluster()` by passing a list of locations

In [None]:
#draw our zip code area: 37203
cluster_map_37203 = folium.Map(location =  area_center, zoom_start = 12)

#create a marker cluster
marker_cluster = MarkerCluster().add_to(cluster_map_37203)

folium.GeoJson(polygon37203).add_to(cluster_map_37203)

#iterate through start_zips_37203_peak_hours to create locations and markers 
#for each scooter trip start point
#remember for Folium locations, lat is listed first!!
# inside the loop add each marker to the cluster
for row_index, row_values in start_zips_37203_peak_hours.iterrows():
    loc = [row_values['startlatitude'], row_values['startlongitude']]
    pop = str(row_values['hour'])
    icon=folium.Icon(color="blue",icon="fa-dot-circle-o", prefix='fa')
    
    marker = folium.Marker(
        location = loc, 
        popup = pop, icon = icon) 
    
    marker.add_to(marker_cluster)

    
#save an interactive HTML map by calling .save()
#cluster_map_37207.save('../maps/cluster37207.html')

In [None]:
#display our clustered map
cluster_map_37203