In [1]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import psycopg2
import seaborn as sns
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from sqlalchemy import create_engine, MetaData, Table

In [2]:
database_name = 'scooters'    # Fill this in with your scooter database name

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

engine = create_engine(connection_string)

In [3]:
zipcodes = gpd.read_file('../data/zipcodes.geojson')
print(zipcodes.crs)
zipcodes.head( )

EPSG:4326


Unnamed: 0,zip,objectid,po_name,shape_stlength,shape_starea,geometry
0,37115,1,MADISON,178783.0248888682,596553400.5788574,"MULTIPOLYGON (((-86.68725 36.31821, -86.68722 ..."
1,37216,3,NASHVILLE,75820.99782140006,188884682.28344727,"MULTIPOLYGON (((-86.73451 36.23774, -86.73425 ..."
2,37204,9,NASHVILLE,93180.2922504256,200664795.51708984,"MULTIPOLYGON (((-86.77914 36.13424, -86.77923 ..."
3,37027,11,BRENTWOOD,159760.6942933173,174978422.04101562,"MULTIPOLYGON (((-86.81258 36.06319, -86.81263 ..."
4,37064,18,FRANKLIN,28995.828320601937,46969608.005737305,"MULTIPOLYGON (((-87.02197 36.01200, -87.02140 ..."


In [4]:
numb_of_scoot = '''
SELECT startlatitude, startlongitude
FROM trips
WHERE starttime BETWEEN '13:00:00' AND '14:00:00'
'''
result = engine.execute(numb_of_scoot)

scoot_num = pd.read_sql(numb_of_scoot, con = engine)

In [5]:
scoot_num['geometry'] = scoot_num.apply(lambda x: Point((x.startlongitude, 
                                                         x.startlatitude)), 
                                        axis=1)

In [6]:
scoot_num = gpd.GeoDataFrame(scoot_num, crs='EPSG:4326', geometry='geometry')

In [7]:
scoot_num

Unnamed: 0,startlatitude,startlongitude,geometry
0,36.154500,-86.785100,POINT (-86.78510 36.15450)
1,36.149300,-86.802300,POINT (-86.80230 36.14930)
2,36.155585,-86.765836,POINT (-86.76584 36.15559)
3,36.151900,-86.780900,POINT (-86.78090 36.15190)
4,36.160800,-86.778900,POINT (-86.77890 36.16080)
...,...,...,...
11701,36.169735,-86.771711,POINT (-86.77171 36.16974)
11702,36.146200,-86.782800,POINT (-86.78280 36.14620)
11703,36.156600,-86.765500,POINT (-86.76550 36.15660)
11704,36.151000,-86.782600,POINT (-86.78260 36.15100)


In [8]:
start_by_zip = gpd.sjoin(scoot_num, zipcodes, op = 'within')

  if await self.run_code(code, result, async_=asy):


In [9]:
start_by_zip

Unnamed: 0,startlatitude,startlongitude,geometry,index_right,zip,objectid,po_name,shape_stlength,shape_starea
0,36.154500,-86.785100,POINT (-86.78510 36.15450),35,37203,33,NASHVILLE,91285.438580354588,120649702.5546875
1,36.149300,-86.802300,POINT (-86.80230 36.14930),35,37203,33,NASHVILLE,91285.438580354588,120649702.5546875
3,36.151900,-86.780900,POINT (-86.78090 36.15190),35,37203,33,NASHVILLE,91285.438580354588,120649702.5546875
4,36.160800,-86.778900,POINT (-86.77890 36.16080),35,37203,33,NASHVILLE,91285.438580354588,120649702.5546875
6,36.153600,-86.785400,POINT (-86.78540 36.15360),35,37203,33,NASHVILLE,91285.438580354588,120649702.5546875
...,...,...,...,...,...,...,...,...,...
5933,36.221755,-86.851122,POINT (-86.85112 36.22176),27,37218,55,NASHVILLE,253501.29953687743,1071962039.8548584
6845,36.207674,-86.839550,POINT (-86.83955 36.20767),27,37218,55,NASHVILLE,253501.29953687743,1071962039.8548584
7212,36.209421,-86.823620,POINT (-86.82362 36.20942),27,37218,55,NASHVILLE,253501.29953687743,1071962039.8548584
10908,36.216833,-86.845777,POINT (-86.84578 36.21683),27,37218,55,NASHVILLE,253501.29953687743,1071962039.8548584


In [10]:
count_by_zip = start_by_zip.groupby('zip').count()

In [11]:
count_by_zip = count_by_zip.rename(columns={'startlatitude':'numb_started'})

In [12]:
count_by_zip = count_by_zip['numb_started']

In [13]:
count_by_zip = count_by_zip.reset_index()

In [14]:
count_by_zip.sort_values('numb_started', ascending=False)

Unnamed: 0,zip,numb_started
3,37203,4812
12,37212,1330
2,37201,1189
18,37219,1148
8,37208,759
13,37213,656
4,37204,527
6,37206,480
10,37210,449
21,37240,189


In [47]:
date_of_scoot = '''
SELECT EXTRACT(DOW FROM startdate) AS DayOfWeek, startdate
FROM trips
'''
result = engine.execute(date_of_scoot)

scoot_date = pd.read_sql(date_of_scoot, con = engine)

In [48]:
scoot_date

Unnamed: 0,dayofweek,startdate
0,3.0,2019-05-01
1,3.0,2019-05-01
2,3.0,2019-05-01
3,3.0,2019-05-01
4,3.0,2019-05-01
...,...,...
565517,3.0,2019-07-31
565518,3.0,2019-07-31
565519,3.0,2019-07-31
565520,3.0,2019-07-31


In [94]:
grouped = scoot_date.groupby('dayofweek').agg({'startdate':'count','startdate':'nunique'})

In [96]:
grouped = grouped.reset_index()

In [97]:
grouped

Unnamed: 0,dayofweek,startdate
0,0.0,13
1,1.0,13
2,2.0,13
3,3.0,14
4,4.0,13
5,5.0,13
6,6.0,13


In [99]:
total_of_scoot = scoot_date.groupby('dayofweek')['startdate'].count()

In [101]:
total_of_scoot = total_of_scoot.reset_index()

In [105]:
total_of_scoot = total_of_scoot.rename(columns={'startdate':'sumofrented'})

In [106]:
total_of_scoot

Unnamed: 0,dayofweek,sumofrented
0,0.0,103150
1,1.0,65911
2,2.0,55557
3,3.0,61266
4,4.0,62828
5,5.0,88697
6,6.0,128113


In [107]:
total_of_scoot['numbofdays'] = grouped['startdate']

In [108]:
total_of_scoot

Unnamed: 0,dayofweek,sumofrented,numbofdays
0,0.0,103150,13
1,1.0,65911,13
2,2.0,55557,13
3,3.0,61266,14
4,4.0,62828,13
5,5.0,88697,13
6,6.0,128113,13


In [112]:
total_of_scoot['avg_per_dayofweek'] = total_of_scoot['sumofrented']/total_of_scoot['numbofdays']

In [114]:
total_of_scoot['avg_per_dayofweek'] = round(total_of_scoot['avg_per_dayofweek'],2)

In [115]:
total_of_scoot

Unnamed: 0,dayofweek,sumofrented,numbofdays,avg_per_dayofweek
0,0.0,103150,13,7934.62
1,1.0,65911,13,5070.08
2,2.0,55557,13,4273.62
3,3.0,61266,14,4376.14
4,4.0,62828,13,4832.92
5,5.0,88697,13,6822.85
6,6.0,128113,13,9854.85


In [126]:
avg_per_day = total_of_scoot[['dayofweek','avg_per_dayofweek']]

In [127]:
avg_per_day

Unnamed: 0,dayofweek,avg_per_dayofweek
0,0.0,7934.62
1,1.0,5070.08
2,2.0,4273.62
3,3.0,4376.14
4,4.0,4832.92
5,5.0,6822.85
6,6.0,9854.85


In [131]:
avg_per_day['dayofweek'] = avg_per_day['dayofweek'].replace({0.0:'Sunday',1.0:'Monday',2.0:'Tuesday',3.0:'Wednesday',4.0:'Thursday',5.0:'Friday',6.0:'Saturday'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  avg_per_day['dayofweek'] = avg_per_day['dayofweek'].replace({0.0:'Sunday',1.0:'Monday',2.0:'Tuesday',3.0:'Wednesday',4.0:'Thursday',5.0:'Friday',6.0:'Saturday'})


In [132]:
avg_per_day

Unnamed: 0,dayofweek,avg_per_dayofweek
0,Sunday,7934.62
1,Monday,5070.08
2,Tuesday,4273.62
3,Wednesday,4376.14
4,Thursday,4832.92
5,Friday,6822.85
6,Saturday,9854.85
