# Setting Up the Environment & Preparing the Data

In [67]:
# Setting up the environment

import pandas as pd
import geopandas as gpd
import folium
import seaborn as sns
import matplotlib.pyplot as plt
from sqlalchemy import create_engine, text

In [3]:
# Connecting to SQL

database_name = 'scooters'

connection_string = f"postgresql://postgres:postgres@localhost:5433/{database_name}"

engine = create_engine(connection_string)

### Considering the size of the dataset, we need to divide it into batches. Here I'm querying to pull the data by Company from SQL, then saving as a CSV and reading those in. 

query = '''SELECT * FROM scooters WHERE companyname = 'Bird';'''

with engine.connect() as connection: scooters_bird = pd.read_sql(text(query), con = connection)

scooters_bird.to_csv("../data/bird.csv", index = False)

query = '''SELECT * FROM scooters WHERE companyname = 'Bolt';'''

with engine.connect() as connection: scooters_bolt = pd.read_sql(text(query), con = connection)

scooters_bolt.to_csv("../data/bolt.csv", index = False)

query = '''SELECT * FROM scooters WHERE companyname = 'Gotcha';'''

with engine.connect() as connection: scooters_gotcha = pd.read_sql(text(query), con = connection)

scooters_gotcha.to_csv("../data/gotcha.csv", index = False)

query = '''SELECT * FROM scooters WHERE companyname = 'Jump';'''

with engine.connect() as connection: scooters_jump = pd.read_sql(text(query), con = connection)

scooters_jump.to_csv("../data/jump.csv", index = False)

query = '''SELECT * FROM scooters WHERE companyname = 'Lime';'''

with engine.connect() as connection: scooters_lime = pd.read_sql(text(query), con = connection)

scooters_lime.to_csv("../data/lime.csv", index = False)

query = '''SELECT * FROM scooters WHERE companyname = 'Lyft';'''

with engine.connect() as connection: scooters_lyft = pd.read_sql(text(query), con = connection)

scooters_lyft.to_csv("../data/lyft.csv", index = False)

query = '''SELECT * FROM scooters WHERE companyname = 'Spin';'''

with engine.connect() as connection:  scooters_spin = pd.read_sql(text(query), con = connection)

scooters_spin.to_csv("../data/spin.csv", index = False)

query = '''SELECT * FROM trips;'''

with engine.connect() as connection:  trips = pd.read_sql(text(query), con = connection)

trips.to_csv("../data/trips.csv", index = False)

In [15]:
# Read in the CSVs
bird = pd.read_csv('../data/bird.csv')

In [16]:
bolt = pd.read_csv('../data/bolt.csv')

In [17]:
gotcha = pd.read_csv('../data/gotcha.csv')

In [18]:
jump = pd.read_csv('../data/jump.csv')

In [50]:
lime = pd.read_csv('../data/lime.csv')

In [52]:
lyft = pd.read_csv('../data/lyft.csv')

In [45]:
spin = pd.read_csv('../data/spin.csv')

In [19]:
trips = pd.read_csv('../data/trips.csv')

### Using concat to bring all the data back together; formatting datetime

In [53]:
scooters = pd.concat([bird, bolt, gotcha, jump, lime, lyft, spin], ignore_index=True)

In [56]:
scooters.head()

Unnamed: 0,pubdatetime,latitude,longitude,sumdid,sumdtype,chargelevel,sumdgroup,costpermin,companyname
0,2019-05-30 02:18:34.800,36.136514,-86.800949,PoweredBRRXQ,Powered,85.0,scooter,0.15,Bird
1,2019-05-30 02:18:34.800,36.136209,-86.800856,PoweredM76B1,Powered,85.0,scooter,0.15,Bird
2,2019-05-30 02:18:34.800,36.164312,-86.777405,PoweredGS3HE,Powered,43.0,scooter,0.15,Bird
3,2019-05-30 02:18:34.800,36.161616,-86.774109,Powered8MPRF,Powered,88.0,scooter,0.15,Bird
4,2019-05-30 02:18:34.800,36.164426,-86.778829,PoweredXKQYH,Powered,92.0,scooter,0.15,Bird


In [57]:
scooters['pubdatetime'] = pd.to_datetime(scooters['pubdatetime'], format='mixed')

In [77]:
trips['pubtimestamp'] = pd.to_datetime(trips['pubtimestamp'], format='mixed')

# Question 1
### During this period, seven companies offered scooters. How many scooters did each company have in this time frame? Did the number for each company change over time? Did scooter usage vary by company?

In [60]:
# How many scooters did each company have in this time frame?
scooters.groupby('companyname')['sumdid'].nunique()

companyname
Bird      3860
Bolt       360
Gotcha     224
Jump      1210
Lime      1824
Lyft      1735
Spin       805
Name: sumdid, dtype: int64

In [71]:
# Did the number for each company change over time?
scooters_by_day = (scooters
                   .assign(pubdate = scooters['pubdatetime'].dt.date)
                   .groupby(['companyname', 'pubdate'])['sumdid']
                   .nunique()
                   .reset_index())

plt.figure(figsize=(12, 6))
sns.lineplot(data=scooters_by_day, x='pubdate', y='sumdid', hue='companyname', alpha=.6)
sns.move_legend(st, "upper left", bbox_to_anchor=(1, 1))
plt.xticks(rotation=45)
plt.title('Number of Scooters Over Time by Company')
plt.xlabel('Date')
plt.ylabel('Number of Scooters')
plt.legend(title='Company')
plt.tight_layout()
plt.show()
plt.show()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 578 entries, 0 to 577
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   companyname  578 non-null    object
 1   pubdate      578 non-null    object
 2   sumdid       578 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 13.7+ KB


In [85]:
# Did scooter usage vary by company?
trips_by_day = (trips
                .assign(pubdate = trips['pubtimestamp'].dt.date)
                .groupby(['companyname', 'pubdate'])
                .count()
                .reset_index())

plt.figure(figsize=(12, 6))
sns.lineplot(data=trips_by_day, x='pubdate', y='sumdid', hue='companyname', alpha=.6)
sns.move_legend(st, "upper left", bbox_to_anchor=(1, 1))
plt.xticks(rotation=45)
plt.title('Number of Scooter Trips Over Time by Company')
plt.xlabel('Date')
plt.ylabel('Number of Scooter Trips')
plt.legend(title='Company')
plt.tight_layout()
plt.show()
plt.show()

Unnamed: 0,companyname,pubdate,pubtimestamp,triprecordnum,sumdid,tripduration,tripdistance,startdate,starttime,enddate,endtime,startlatitude,startlongitude,endlatitude,endlongitude,triproute,create_dt
0,Bird,2019-05-01,1104,1104,1104,1104,1104,1104,1104,1104,1104,1104,1104,1104,1104,1104,1104
1,Bird,2019-05-02,1221,1221,1221,1221,1221,1221,1221,1221,1221,1221,1221,1221,1221,1221,1221
2,Bird,2019-05-03,2010,2010,2010,2010,2010,2010,2010,2010,2010,2010,2010,2010,2010,2010,2010
3,Bird,2019-05-04,2741,2741,2741,2741,2741,2741,2741,2741,2741,2741,2741,2741,2741,2741,2741
4,Bird,2019-05-05,1987,1987,1987,1987,1987,1987,1987,1987,1987,1987,1987,1987,1987,1987,1987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,SPIN,2019-07-28,435,435,435,435,435,435,435,435,435,435,435,435,435,435,435
524,SPIN,2019-07-29,290,290,290,290,290,290,290,290,290,290,290,290,290,290,290
525,SPIN,2019-07-30,232,232,232,232,232,232,232,232,232,232,232,232,232,232,232
526,SPIN,2019-07-31,190,190,190,190,190,190,190,190,190,190,190,190,190,190,190
