In [None]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

In [None]:
from sqlalchemy import create_engine, text

In [None]:
pip  install psycopg2-binary

In [None]:
database_name = 'scooter'    

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [None]:
engine = create_engine(connection_string)

In [None]:
query = '''
(SELECT *
FROM scooters
ORDER BY pubdatetime
LIMIT 50)
UNION
(SELECT * 
FROM scooters
ORDER BY pubdatetime DESC
LIMIT 50)
'''



with engine.connect() as connection: result = connection.execute(text(query))

In [None]:
result.fetchone()

In [None]:
result.fetchall()

In [None]:
with engine.connect() as connection:    
    scooters = pd.read_sql(text(query), con = connection)
    trips = pd.read_sql(text(query), con = connection)

In [None]:
scooters.head()

In [None]:
scooters.isna().sum()

In [None]:
trips.isna().sum()

In [None]:
scooters.info()

In [None]:
scooters.pubdatetime.min()

In [None]:
scooters.pubdatetime.max()

In [None]:
scooters.pubdatetime.min()

In [None]:
scooters.pubdatetime.max()

In [None]:
scooters.groupby(by = 'pubdatetime')

In [None]:
scooters.pubdatetime.head()

In [None]:
scooters.pubdatetime.tail()

In [None]:
trips.pubdatetime.head()

In [None]:
trips.pubdatetime.tail()

In [None]:
query = '''
SELECT sumdid, SUM(tripdistance)AS total_distance, SUM(tripduration)AS total_duration
FROM trips
WHERE companyname = 'Lime'
GROUP BY sumdid
'''



with engine.connect() as connection:    
    scooters = pd.read_sql(text(query), con = connection)

In [None]:
scooters.head()

In [None]:
zipcodes = gpd.read_file('../data/zipcodes.geojson')
print(zipcodes.crs)
zipcodes.head( )

In [None]:
from datetime import datetime

In [None]:
query = '''
SELECT DISTINCT(sumdid), latitude, longitude
FROM scooters
WHERE companyname = 'Lime'
'''

with engine.connect() as connection:    
    scooters_loc = pd.read_sql(text(query), con = connection)

In [None]:
scooters_loc.head()

In [None]:
scooters_loc['geometry'] = scooters_loc.apply(lambda x: Point((float(x.longitude), 
                                                         float(x.latitude))), 
                                        axis=1)

In [None]:
scooters_loc_geo = gpd.GeoDataFrame(scooters_loc, 
                           crs = zipcodes.crs, 
                           geometry = scooters_loc['geometry'])

In [None]:
area_center = zipcodes.geometry.centroid[25]

In [None]:
area_center = [area_center.x, area_center.y]
print(area_center)

In [None]:
query  ='''
SELECT pubtimestamp, COUNT(sumdid)AS num_of_scooters
FROM trips
WHERE companyname = 'Lime'
GROUP BY pubtimestamp
ORDER BY num_of_scooters DESC
'''

with engine.connect() as connection:
    scooters_by_hour = pd.read_sql(text(query), con = connection)

In [None]:
scooters_by_hour.head()

In [None]:
scooters_by_hour.info()

In [None]:
scooters_by_hour['time'] = scooters_by_hour['pubtimestamp'].dt.time

In [None]:
scooters_by_hour.info()

In [None]:
scooters_by_hour['hour'] = scooters_by_hour['pubtimestamp'].dt.hour

In [None]:
scooters_by_hour.info()

In [None]:
scooters_by_hour.tail()

In [None]:
scooters_by_hour = scooters_by_hour.drop(columns =['pubtimestamp', 'time'])

In [None]:
scooters_by_hour.groupby(by = "hour")

In [None]:
scooters_by_hour.describe()

In [None]:
top_scooters_by_hour = scooters_by_hour.sort_values('num_of_scooters', ascending=False).head(10)

In [None]:
top_scooters_by_hour

In [None]:
top_scooters_by_hour