In [None]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

In [None]:
from sqlalchemy import create_engine, text

In [None]:
pip  install psycopg2-binary

In [None]:
database_name = 'scooter'    

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [None]:
engine = create_engine(connection_string)

In [None]:
query = '''
(SELECT *
FROM scooters
ORDER BY pubdatetime
LIMIT 50)
UNION
(SELECT * 
FROM scooters
ORDER BY pubdatetime DESC
LIMIT 50)
'''



with engine.connect() as connection: result = connection.execute(text(query))

In [None]:
result.fetchone()

In [None]:
result.fetchall()

In [None]:
with engine.connect() as connection:    
    scooters = pd.read_sql(text(query), con = connection)
    trips = pd.read_sql(text(query), con = connection)

In [None]:
scooters.head()

In [None]:
scooters.isna().sum()

In [None]:
trips.isna().sum()

In [None]:
scooters.info()

In [None]:
scooters.pubdatetime.min()

In [None]:
scooters.pubdatetime.max()

In [None]:
scooters.pubdatetime.min()

In [None]:
scooters.pubdatetime.max()

In [None]:
scooters.groupby(by = 'pubdatetime')

In [None]:
scooters.pubdatetime.head()

In [None]:
scooters.pubdatetime.tail()

In [None]:
trips.pubdatetime.head()

In [None]:
trips.pubdatetime.tail()

In [None]:
query = '''
SELECT sumdid, SUM(tripdistance)AS total_distance, SUM(tripduration)AS total_duration
FROM trips
WHERE companyname = 'Lime'
GROUP BY sumdid
'''



with engine.connect() as connection:    
    scooters = pd.read_sql(text(query), con = connection)

In [None]:
scooters.head()

In [None]:
zipcodes = gpd.read_file('../data/zipcodes.geojson')
print(zipcodes.crs)
zipcodes.head( )

In [None]:
from datetime import datetime

In [None]:
query = '''
SELECT DISTINCT(sumdid), latitude, longitude
FROM scooters
WHERE companyname = 'Lime'
'''

with engine.connect() as connection:    
    scooters_loc = pd.read_sql(text(query), con = connection)

In [None]:
scooters_loc.head()

In [None]:
scooters_loc['geometry'] = scooters_loc.apply(lambda x: Point((float(x.longitude), 
                                                         float(x.latitude))), 
                                        axis=1)

In [None]:
scooters_loc_geo = gpd.GeoDataFrame(scooters_loc, 
                           crs = zipcodes.crs, 
                           geometry = scooters_loc['geometry'])

In [None]:
area_center = zipcodes.geometry.centroid[25]

In [None]:
area_center = [area_center.x, area_center.y]
print(area_center)

In [None]:
query  ='''
SELECT pubtimestamp, COUNT(sumdid)AS num_of_scooters
FROM trips
WHERE companyname = 'Lime'
GROUP BY pubtimestamp
ORDER BY num_of_scooters DESC
'''

with engine.connect() as connection:
    scooters_by_hour = pd.read_sql(text(query), con = connection)

In [None]:
scooters_by_hour.head()

In [None]:
scooters_by_hour['time'] = scooters_by_hour['pubtimestamp'].dt.time

In [None]:
scooters_by_hour.info()

In [None]:
scooters_by_hour['hour'] = scooters_by_hour['pubtimestamp'].dt.hour

In [None]:
scooters_by_hour.info()

In [None]:
scooters_by_hour.tail()

In [None]:
scooters_by_hour = scooters_by_hour.drop(columns =['pubtimestamp', 'time'])

In [None]:
scooters_by_hour.groupby(by = "hour")

In [None]:
scooters_by_hour.describe()

In [None]:
top_scooters_by_hour = scooters_by_hour.sort_values('num_of_scooters', ascending=False).head(10)

In [None]:
top_scooters_by_hour

In [None]:
query = '''
WITH hours AS(SELECT EXTRACT(HOUR from starttime)AS hour, sumdid
		FROM trips
		WHERE companyname = 'Lime'
		ORDER BY hour DESC)
SELECT hour, COUNT(hour)AS num_of_start_times
FROM hours
GROUP BY hour
ORDER BY num_of_start_times DESC;
'''

with engine.connect() as connection:
    scooter_start_time = pd.read_sql(text(query), con = connection)


In [None]:
scooter_start_time

In [None]:
query = '''

SELECT starttime, sumdid
FROM trips;
'''

with engine.connect() as connection:
    scooter_starts = pd.read_sql(text(query), con = connection)

In [None]:
import seaborn as sns

In [None]:
start_time_graph = sns.catplot(
    data=scooter_start_time, kind="bar",
    x="num_of_start_times", y="hour", orient = 'h')

start_time_graph.savefig('../data/start_time_graph.png')

In [None]:
ax = zipcodes.plot(figsize = (10, 10), color = 'lightgreen')
scooters_loc_geo.plot( ax = ax, column = 'sumdid');
plt.show();

In [None]:
query = '''
(SELECT sumdid, ROUND(MAX(tripdistance)* 0.0001894, 2)AS max_distance_miles, startlatitude, startlongitude
	FROM trips
	WHERE companyname = 'Lime'
	GROUP BY sumdid, startlatitude, startlongitude
	ORDER BY max_distance_miles DESC
	LIMIT 50);
 '''

with engine.connect() as connection:
    top_scooter_starts = pd.read_sql(text(query), con = connection)

In [None]:
query = '''
(SELECT sumdid, ROUND(MAX(tripdistance)* 0.0001894, 2)AS max_distance_miles, endlatitude, endlongitude
	FROM trips
	WHERE companyname = 'Lime'
	GROUP BY sumdid, endlatitude, endlongitude
	ORDER BY max_distance_miles DESC
	LIMIT 50);
 '''

with engine.connect() as connection:
    top_scooter_ends = pd.read_sql(text(query), con = connection)

In [None]:
top_scooter_starts

In [None]:
top_scooter_ends

In [None]:
top_scooter_starts['geometry'] = top_scooter_starts.apply(lambda x: Point((float(x.startlongitude), 
                                                         float(x.startlatitude))), 
                                        axis=1)
top_scooter_starts.head(3)

In [None]:
top_scooter_ends['geometry'] = top_scooter_ends.apply(lambda x: Point((float(x.endlongitude), 
                                                         float(x.endlatitude))), 
                                        axis=1)
top_scooter_ends.head(3)

In [None]:
top_scoots_geo = gpd.GeoDataFrame(top_scooter_starts, 
                           crs = zipcodes.crs, 
                           geometry = top_scooter_starts['geometry'])

In [None]:
top_scoots_end_geo = gpd.GeoDataFrame(top_scooter_ends, 
                           crs = zipcodes.crs, 
                           geometry = top_scooter_ends['geometry'])

In [None]:
zipcodes.plot()

In [None]:
leg_kwds = {'title': 'Zipcodes', 'loc': 'upper left', 
            'bbox_to_anchor': (1, 1.03), 'ncol': 2}

ax = zipcodes.plot(column = 'zip', figsize=(10, 10),
              edgecolor = 'black',
              legend = True, legend_kwds = leg_kwds, 
              cmap = 'Set3')
top_scoots_geo.plot( ax = ax, column = 'sumdid');
plt.show();

In [None]:
leg_kwds = {'title': 'Zipcodes', 'loc': 'upper left', 
            'bbox_to_anchor': (1, 1.03), 'ncol': 2}

ax = zipcodes.plot(column = 'zip', figsize=(5, 5),
              edgecolor = 'black',
              legend = True, legend_kwds = leg_kwds, 
              cmap = 'Set3')
top_scoots_end_geo.plot(ax = ax, column = 'sumdid');
plt.show();

In [None]:
center = zipcodes.geometry.centroid[27]
print(center)

In [None]:
area_center = top_scoots_geo.geometry.centroid[25]
print(area_center)

In [None]:
area_center = [area_center.y, area_center.x]

In [None]:
zips_map = folium.Map(location = area_center, zoom_start = 12)

folium.GeoJson(top_scoots_geo).add_to(zips_map)


for row_index, row_values in top_scoots_geo.iterrows():
    loc = [row_values['startlatitude'], row_values['startlongitude']]
    pop = str(row_values['sumdid'])
    icon=folium.Icon(color="red", prefix='fa')

   

    marker = folium.Marker(
        location = loc, 
        popup = pop, icon = icon) 

    marker.add_to(zips_map)
zips_map.save('../data/zips_map.html')
zips_map

In [None]:
zips_map_end = folium.Map(location = area_center, zoom_start = 12)

folium.GeoJson(top_scoots_end_geo).add_to(zips_map)


for row_index, row_values in top_scoots_end_geo.iterrows():
    loc = [row_values['endlatitude'], row_values['endlongitude']]
    pop = str(row_values['sumdid'])
    icon=folium.Icon(color="blue", prefix='fa')

    marker = folium.Marker(
        location = loc, 
        popup = pop, icon = icon) 

    marker.add_to(zips_map_end)
zips_map_end.save('../data/zips_map_end.html')
zips_map_end

In [None]:
start_end_map = folium.Map(location = area_center, zoom_start = 12)

folium.GeoJson(top_scoots_end_geo).add_to(zips_map)


for row_index, row_values in top_scoots_end_geo.iterrows():
    point = [row_values['endlatitude'], row_values['endlongitude']]
    pop = str(row_values['sumdid'])
    icon=folium.Icon(color="blue",icon = "stop", prefix='fa')

    marker1 = folium.Marker(
        location = point,
        popup = pop, icon = icon) 
 

    marker1.add_to(start_end_map)
  
start_end_map.save('../data/start_end_map.html')
start_end_map

In [None]:
top_scoots_end_geo


In [None]:
end_type = 'end'

In [None]:
top_end_loc = top_scoots_end_geo.rename(columns = {'endlatitude': 'latitude', 'endlongitude':'longitude'})

top_end_loc.insert(1,'type', 'end')
top_end_loc

In [None]:
top_scoots_geo

In [None]:
top_start_loc = top_scoots_geo.rename(columns = {'startlatitude': 'latitude', 'startlongitude':'longitude'})

top_start_loc.insert(1,'type', 'start')
top_start_loc

In [None]:
top_start_end_loc = pd.concat([top_start_loc, top_end_loc])
top_start_end_loc

In [None]:
top_start_end_loc_map = folium.Map(location = area_center, zoom_start = 12)

folium.GeoJson(top_start_end_loc).add_to(zips_map)


for row_index, row_values in top_start_end_loc.iterrows():
   
   

    if row_values['type'] =='end':
        
        marker = folium.Marker(
            location = [row_values['geometry'].y, row_values['geometry'].x],
            popup = row_values['sumdid'], icon = folium.Icon(color= "red", icon ="stop", prefix = 'fa'))
       
        marker.add_to(top_start_end_loc_map)

    if row_values['type']=='start':
        
        marker = folium.Marker(
            location = [row_values['geometry'].y, row_values['geometry'].x],
            popup = row_values['sumdid'], icon = folium.Icon(color= "green", icon ="play", prefix = 'fa'))
       
        marker.add_to(top_start_end_loc_map)
 

   
top_start_end_loc_map.save('../data/top_start_end_loc_map.html')
top_start_end_loc_map

In [None]:
query = '''
SELECT sumdid, ROUND(MAX(tripdistance)* 0.0001894, 2)AS max_distance_miles, startlatitude, startlongitude
	FROM trips
	WHERE companyname IN ('Lime', 'Bird', 'Spin')
	GROUP BY sumdid, startlatitude, startlongitude
	ORDER BY max_distance_miles DESC
	LIMIT 50
 '''

with engine.connect() as connection:
    top_all_scooters_start = pd.read_sql(text(query), con = connection)

In [None]:
query = '''
	SELECT sumdid, ROUND(MAX(tripdistance)* 0.0001894, 2)AS max_distance_miles, endlatitude, endlongitude
	FROM trips
	WHERE companyname IN ('Lime', 'Bird', 'Spin')
	GROUP BY sumdid, endlatitude, endlongitude
	ORDER BY max_distance_miles DESC
	LIMIT 50
 '''

with engine.connect() as connection:
    top_all_scooters_end = pd.read_sql(text(query), con = connection)

In [None]:
top_start_loc_all = top_scoots_geo.rename(columns = {'startlatitude': 'latitude', 'startlongitude':'longitude'})

top_start_loc_all.insert(1,'type', 'start')
top_start_loc_all

In [None]:
top_end_loc_all = top_scoots_end_geo.rename(columns = {'endlatitude': 'latitude', 'endlongitude':'longitude'})

top_end_loc_all.insert(1,'type', 'end')
top_end_loc_all

In [None]:
top_start_end_loc_all = pd.concat([top_start_loc_all, top_end_loc_all])
top_start_end_loc_all

In [None]:
top_start_end_loc_all_map = folium.Map(location = area_center, zoom_start = 12)

folium.GeoJson(top_start_end_loc_all).add_to(zips_map)


for row_index, row_values in top_start_end_loc_all.iterrows():
   
   

    if row_values['type'] =='end':
        
        marker = folium.Marker(
            location = [row_values['geometry'].y, row_values['geometry'].x],
            popup = row_values['sumdid'], icon = folium.Icon(color= "red", icon ="stop", prefix = 'fa'))
       
        marker.add_to(top_start_end_loc_map)

    if row_values['type']=='start':
        
        marker = folium.Marker(
            location = [row_values['geometry'].y, row_values['geometry'].x],
            popup = row_values['sumdid'], icon = folium.Icon(color= "green", icon ="play", prefix = 'fa'))
       
        marker.add_to(top_start_end_loc_all_map)
 

   
top_start_end_loc_all_map.save('../data/top_start_end_loc_all_map.png')
top_start_end_loc_all_map