## Shared Micromobility Vehicle Trips Data Analysis

### Data Source and API
- AODP Dataset Access: https://data.austintexas.gov/Transportation-and-Mobility/Shared-Micromobility-Vehicle-Trips/7d8e-dm7r
- API Endpoint: https://data.austintexas.gov/resource/7d8e-dm7r.json
- API Documentation: https://dev.socrata.com/foundry/data.austintexas.gov/7d8e-dm7r

To access the dataset host, install SODA API first:
    `pip install sodapy`

### Data Extraction

- Data Provided
    - trip_id
    - device_id
    - modified_date
    - council_district_start
    - council_district_end
    - vehicle_type
    - trip_duration
    - trip_distance
    - start_time
    - end_time
    - month
    - hour
    - day_of_week
    - year
    - census_geoid_start
    - census_geoid_end

In [1]:
# Dependencies and packages
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math as math
import datetime as dt
import seaborn as sns
import pandas as pd
from sodapy import Socrata

In [2]:
# Output File (CSV)
output_data_file = "Output_Data/shared_mobility.csv"

In [4]:
# url = https://data.austintexas.gov/resource/7d8e-dm7r.json
# Data Extraction:
client = Socrata("data.austintexas.gov", None)

results = client.get("7d8e-dm7r", where = "year= 2020")

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)



In [5]:
results_df.head()

Unnamed: 0,census_geoid_end,census_geoid_start,council_district_end,council_district_start,day_of_week,device_id,end_time,hour,modified_date,month,start_time,trip_distance,trip_duration,trip_id,vehicle_type,year
0,48453001100,48453001100,9,9,6,bb94af41-5750-4e37-b8b6-ae261653ce28,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:30:00.000,851,283,32aa0192-886b-4eee-9279-8d4d4abad430,scooter,2020
1,48453001100,48453001200,9,9,6,659ddac0-dbee-4fc3-9985-87237e5dca1a,2020-01-11T08:45:00.000,8,2020-01-11T17:30:43.000,1,2020-01-11T08:45:00.000,687,221,6ea5ca1c-caa4-463a-a035-d41c71a3f80f,scooter,2020
2,48453001100,48453001100,9,9,6,ec69cacb-aba4-46ad-ba9d-ada30f8d3a63,2020-01-11T08:45:00.000,8,2020-01-11T17:30:43.000,1,2020-01-11T08:45:00.000,688,194,ab3c6c91-bdc1-4229-b480-71294e3b4e88,scooter,2020
3,48453001100,48453001303,9,5,6,cc08e1ca-cac5-4e53-82e3-459ec65688f1,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:15:00.000,2430,786,a5c3d985-df7c-40df-96f8-006309dcaea2,scooter,2020
4,48453001100,48453001305,9,9,6,a304ce98-be5e-4ec3-8b08-f0cdfcfaff58,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:15:00.000,78,637,45dfa1c3-f29f-46c9-9ec8-fc1a83e713df,scooter,2020


In [6]:
# Check for missing values:
results_df.count()

census_geoid_end          1000
census_geoid_start        1000
council_district_end      1000
council_district_start    1000
day_of_week               1000
device_id                 1000
end_time                  1000
hour                      1000
modified_date             1000
month                     1000
start_time                1000
trip_distance             1000
trip_duration             1000
trip_id                   1000
vehicle_type              1000
year                      1000
dtype: int64

In [7]:
missing_vehicle_type = results_df["vehicle_type"].isnull().sum()
print(f"There are {missing_vehicle_type} missing vehicle types.")
missing_month = results_df["month"].isnull().sum()
print(f"There are {missing_month} missing months.")
missing_census_geoid_start = results_df["census_geoid_start"].isnull().sum()
print(f"There are {missing_census_geoid_start} missing census_geoid_start.")

There are 0 missing vehicle types.
There are 0 missing months.
There are 0 missing census_geoid_start.


### Data Cleaning

In [8]:
clean_results_df = results_df.copy()

In [9]:
clean_results_df

Unnamed: 0,census_geoid_end,census_geoid_start,council_district_end,council_district_start,day_of_week,device_id,end_time,hour,modified_date,month,start_time,trip_distance,trip_duration,trip_id,vehicle_type,year
0,48453001100,48453001100,9,9,6,bb94af41-5750-4e37-b8b6-ae261653ce28,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:30:00.000,851,283,32aa0192-886b-4eee-9279-8d4d4abad430,scooter,2020
1,48453001100,48453001200,9,9,6,659ddac0-dbee-4fc3-9985-87237e5dca1a,2020-01-11T08:45:00.000,8,2020-01-11T17:30:43.000,1,2020-01-11T08:45:00.000,687,221,6ea5ca1c-caa4-463a-a035-d41c71a3f80f,scooter,2020
2,48453001100,48453001100,9,9,6,ec69cacb-aba4-46ad-ba9d-ada30f8d3a63,2020-01-11T08:45:00.000,8,2020-01-11T17:30:43.000,1,2020-01-11T08:45:00.000,688,194,ab3c6c91-bdc1-4229-b480-71294e3b4e88,scooter,2020
3,48453001100,48453001303,9,5,6,cc08e1ca-cac5-4e53-82e3-459ec65688f1,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:15:00.000,2430,786,a5c3d985-df7c-40df-96f8-006309dcaea2,scooter,2020
4,48453001100,48453001305,9,9,6,a304ce98-be5e-4ec3-8b08-f0cdfcfaff58,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:15:00.000,78,637,45dfa1c3-f29f-46c9-9ec8-fc1a83e713df,scooter,2020
5,48453001603,48453000203,10,9,6,018ea116-9d22-4c9e-9989-97c9e21fcecb,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:30:00.000,113,390,d65855b4-26d0-4ce6-bce6-673e00382f4f,scooter,2020
6,48453001100,48453000803,9,1,6,758fbc49-c59e-45a5-a85c-354fe2412f21,2020-01-11T08:45:00.000,8,2020-01-11T17:30:43.000,1,2020-01-11T08:30:00.000,1116,312,32560e97-e231-4fc3-bea4-fd303053b5d1,scooter,2020
7,48453001305,48453001305,9,9,6,86cdc82f-b4f9-4158-8193-396c903c3898,2020-01-11T08:45:00.000,8,2020-01-11T17:30:43.000,1,2020-01-11T08:45:00.000,0,61,d11fd6ea-35de-41fe-b39c-ed71c9928706,scooter,2020
8,48453001303,48453001305,5,9,6,5c1ded4c-1939-463d-925a-41f5bcc8cb5e,2020-01-11T09:00:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:00:00.000,903,374,ceaf9ce2-b45a-4773-b65f-1573304fc27b,scooter,2020
9,48453001100,48453001200,9,9,6,f9dc14da-5bac-465a-8844-3549a5cece77,2020-01-11T09:15:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:00:00.000,1979,982,3ade279e-0692-49a3-b735-d4eaa8ca064d,scooter,2020


In [10]:
clean_results_df = clean_results_df.rename(columns = {
    "trip_id": "Trip ID",
    "device_id": "Device ID",
    "modified_date": "Data Modified Date",
    "vehicle_type": "Vehicle Type",
    "trip_duration": "Trip Duration",
    "trip_distance": "Trip Distance",
    "start_time": "Trip Start Time",
    "end_time": "Trip End Time",
    "hour": "Hour",
    "day_of_week": "Day Of Week",
    "month": "Month",
    "year": "Year",
    "census_geoid_start": "GEOID Start",
    "census_geoid_end": "GEOID End",
    "council_district_start": "Start Council District",
    "council_district_end" : "Return Council District"
})

In [11]:
clean_results_df.head()

Unnamed: 0,GEOID End,GEOID Start,Return Council District,Start Council District,Day Of Week,Device ID,Trip End Time,Hour,Data Modified Date,Month,Trip Start Time,Trip Distance,Trip Duration,Trip ID,Vehicle Type,Year
0,48453001100,48453001100,9,9,6,bb94af41-5750-4e37-b8b6-ae261653ce28,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:30:00.000,851,283,32aa0192-886b-4eee-9279-8d4d4abad430,scooter,2020
1,48453001100,48453001200,9,9,6,659ddac0-dbee-4fc3-9985-87237e5dca1a,2020-01-11T08:45:00.000,8,2020-01-11T17:30:43.000,1,2020-01-11T08:45:00.000,687,221,6ea5ca1c-caa4-463a-a035-d41c71a3f80f,scooter,2020
2,48453001100,48453001100,9,9,6,ec69cacb-aba4-46ad-ba9d-ada30f8d3a63,2020-01-11T08:45:00.000,8,2020-01-11T17:30:43.000,1,2020-01-11T08:45:00.000,688,194,ab3c6c91-bdc1-4229-b480-71294e3b4e88,scooter,2020
3,48453001100,48453001303,9,5,6,cc08e1ca-cac5-4e53-82e3-459ec65688f1,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:15:00.000,2430,786,a5c3d985-df7c-40df-96f8-006309dcaea2,scooter,2020
4,48453001100,48453001305,9,9,6,a304ce98-be5e-4ec3-8b08-f0cdfcfaff58,2020-01-11T09:30:00.000,9,2020-01-11T17:30:43.000,1,2020-01-11T09:15:00.000,78,637,45dfa1c3-f29f-46c9-9ec8-fc1a83e713df,scooter,2020


In [12]:
#Find the total number of scooter rides:
scooters = clean_results_df[clean_results_df["Vehicle Type"] == "scooter"]
total_scooters = len(clean_results_df) - scooters.shape[0]
total_scooters

155

In [None]:
#Find the total number of bicycle rides:
bicycles = clean_results_df[clean_results_df["Vehicle Type"] == "bicycle"]
total_bicycles = len(clean_results_df) - bicycles.shape[0]
total_bicycles

In [None]:
#Check to tally the total rides 
total_rides = total_bicycles + total_scooters
total_rides

In [None]:
# Drop all the null values
clean_results_df = clean_results_df.dropna(how='any')
clean_results_df.head()

In [None]:
# Find the number of unique devices
device_id_list =  clean_results_df["Device ID"].value_counts()
device_id_list

In [None]:
# Find the number of unique GEOIDs where the trips started
start_geoid = clean_results_df["GEOID Start"].value_counts()
start_geoid

In [None]:
# Find the number of unique GEOIDs where the trips ended
end_geoid = clean_results_df["GEOID End"].value_counts()
end_geoid

In [13]:
# Change the time and date format for columns - 'Data Modified Date', 'Trip Start time' and 'Trip End Time'
clean_results_df['Trip Start Time'] = pd.to_datetime(clean_results_df['Trip Start Time'])
clean_results_df['Trip End Time'] = pd.to_datetime(clean_results_df['Trip End Time'])
clean_results_df['Data Modified Date'] = pd.to_datetime(clean_results_df['Data Modified Date'])
clean_results_df.head()

Unnamed: 0,GEOID End,GEOID Start,Return Council District,Start Council District,Day Of Week,Device ID,Trip End Time,Hour,Data Modified Date,Month,Trip Start Time,Trip Distance,Trip Duration,Trip ID,Vehicle Type,Year
0,48453001100,48453001100,9,9,6,bb94af41-5750-4e37-b8b6-ae261653ce28,2020-01-11 09:30:00,9,2020-01-11 17:30:43,1,2020-01-11 09:30:00,851,283,32aa0192-886b-4eee-9279-8d4d4abad430,scooter,2020
1,48453001100,48453001200,9,9,6,659ddac0-dbee-4fc3-9985-87237e5dca1a,2020-01-11 08:45:00,8,2020-01-11 17:30:43,1,2020-01-11 08:45:00,687,221,6ea5ca1c-caa4-463a-a035-d41c71a3f80f,scooter,2020
2,48453001100,48453001100,9,9,6,ec69cacb-aba4-46ad-ba9d-ada30f8d3a63,2020-01-11 08:45:00,8,2020-01-11 17:30:43,1,2020-01-11 08:45:00,688,194,ab3c6c91-bdc1-4229-b480-71294e3b4e88,scooter,2020
3,48453001100,48453001303,9,5,6,cc08e1ca-cac5-4e53-82e3-459ec65688f1,2020-01-11 09:30:00,9,2020-01-11 17:30:43,1,2020-01-11 09:15:00,2430,786,a5c3d985-df7c-40df-96f8-006309dcaea2,scooter,2020
4,48453001100,48453001305,9,9,6,a304ce98-be5e-4ec3-8b08-f0cdfcfaff58,2020-01-11 09:30:00,9,2020-01-11 17:30:43,1,2020-01-11 09:15:00,78,637,45dfa1c3-f29f-46c9-9ec8-fc1a83e713df,scooter,2020


In [50]:
type(clean_results_df['GEOID End'][0])

str

In [51]:
type(zip_data['GEOID End'][0])

numpy.int64

In [97]:
csvpath = os.path.join("ZIP_TRACT.csv")


zip_data = pd.read_csv(csvpath)
zip_data

             

zip_data[["zip", "GEOID End"]] = zip_data[["zip", "GEOID End"]].astype(str)

zip_data


zip_data_merge = pd.merge(clean_results_df,zip_data, on= "GEOID End", how ="right" )

zip_data_merge.dropna()

merged_df = zip_data_merge.drop(columns=['res_ratio', 'bus_ratio', 'oth_ratio','tot_ratio']).dropna()

### Visualization of the Clean Data Set

In [85]:
client = Socrata("data.austintexas.gov", None)
results = client.get("i26j-ai4z", limit=900000)
results_df = pd.DataFrame.from_records(results)


#results_df.sort_values("sr_type_code",ascending = False )

results_df.sr_type_code.unique() 

shared_mobility = results_df.loc[results_df['sr_type_desc'] == "Shared Micromobility"]

shared_mobility




Unnamed: 0,:@computed_region_8spj_utxs,:@computed_region_a3it_2a2z,:@computed_region_e9j2_6w3z,:@computed_region_jcrc_4uuy,:@computed_region_m2th_e4b7,:@computed_region_q9nd_rr82,:@computed_region_rxpj_nzrk,sr_closed_date,sr_created_date,sr_department_desc,...,sr_location_x,sr_location_y,sr_location_zip_code,sr_method_received_desc,sr_number,sr_status_date,sr_status_desc,sr_type_code,sr_type_desc,sr_updated_date
25,9,2860,44,43,217,10,50,,2020-01-11T13:10:36.000,Transportation,...,3113386.99938810,10078558.00165750,78705,Phone,20-00013106,2020-01-11T13:10:36.000,Open,DOCKMOBI,Shared Micromobility,2020-01-11T13:10:36.000
30,9,2860,21,43,217,10,65,,2020-01-11T12:48:54.000,Transportation,...,3115680.99888024,10083371.00017130,78751,Phone,20-00013090,2020-01-11T12:48:54.000,Open,DOCKMOBI,Shared Micromobility,2020-01-11T12:48:54.000
35,9,2860,44,43,217,10,50,2020-01-11T13:45:35.000,2020-01-11T12:42:11.000,Transportation,...,3112953.75920598,10078781.93615310,78705,Phone,20-00013084,2020-01-11T13:45:35.000,Closed,DOCKMOBI,Shared Micromobility,2020-01-11T13:45:35.000
41,1,2857,69,7,309,8,1,,2020-01-11T12:29:34.000,Transportation,...,3125928.74844921,10073029.00185090,78702,Spot311 Interface,20-00013070,2020-01-11T12:29:34.000,Open,DOCKMOBI,Shared Micromobility,2020-01-11T12:29:34.000
49,3,2859,37,9,217,3,75,,2020-01-11T12:15:05.000,Transportation,...,3110306.74821084,10058816.00207640,78704,Phone,20-00013050,2020-01-11T12:15:05.000,Open,DOCKMOBI,Shared Micromobility,2020-01-11T12:15:05.000
62,8,3260,93,10,126,2,29,,2020-01-11T11:35:40.000,Transportation,...,3103985.33439226,10070156.05606280,78746,Spot311 Interface,20-00013013,2020-01-11T11:35:40.000,Open,DOCKMOBI,Shared Micromobility,2020-01-11T11:35:40.000
134,9,2860,19,43,217,10,50,,2020-01-11T07:30:03.000,Transportation,...,3116394.74827258,10081325.00020220,78705,Phone,20-00012833,2020-01-11T07:30:03.000,Open,DOCKMOBI,Shared Micromobility,2020-01-11T07:32:07.000
168,9,3640,21,6,217,10,65,2020-01-11T09:53:05.000,2020-01-10T21:56:11.000,Transportation,...,3116137.24766586,10084396.00219220,78751,Phone,20-00012691,2020-01-11T09:53:05.000,Closed,DOCKMOBI,Shared Micromobility,2020-01-11T09:53:05.000
268,9,2860,82,43,217,10,50,2020-01-11T09:58:46.000,2020-01-10T16:08:38.000,Transportation,...,3114306.76800564,10082836.71257000,78705,Phone,20-00012386,2020-01-11T09:58:46.000,Closed,DOCKMOBI,Shared Micromobility,2020-01-11T09:58:46.000
299,9,2860,82,43,217,10,50,,2020-01-10T15:30:11.000,Transportation,...,3114669.24902832,10082991.00054950,78705,Phone,20-00012299,2020-01-10T15:30:11.000,Open,DOCKMOBI,Shared Micromobility,2020-01-10T15:30:11.000


In [95]:
scooter_df = pd.DataFrame({
    "Complaint Number": shared_mobility["sr_number"],
    "Complaint Type": shared_mobility["sr_type_desc"],
    "Complaint Date": shared_mobility["sr_created_date"],
    "complaint_location": shared_mobility["sr_location"],
    "zip": shared_mobility["sr_location_zip_code"],
    "complaint_latitude": shared_mobility["sr_location_lat"],
    "complaint_longitudge": shared_mobility["sr_location_long"]
                                           })

scooter_df

Unnamed: 0,Complaint Number,Complaint Type,Complaint Date,complaint_location,zip,complaint_latitude,complaint_longitudge
25,20-00013106,Shared Micromobility,2020-01-11T13:10:36.000,"702 W 25TH ST, AUSTIN, TX 78705",78705,30.28957867,-97.74516395
30,20-00013090,Shared Micromobility,2020-01-11T12:48:54.000,"504 W 38TH ST, AUSTIN, TX 78751",78751,30.30266192,-97.73754189
35,20-00013084,Shared Micromobility,2020-01-11T12:42:11.000,"2511 PEARL ST, AUSTIN, TX 78705",78705,30.29022193,-97.74651975
41,20-00013070,Shared Micromobility,2020-01-11T12:29:34.000,"1171 PANDORA ST, AUSTIN, TX 78702",78702,30.273572,-97.70585187
49,20-00013050,Shared Micromobility,2020-01-11T12:15:05.000,"CUMBERLAND RD & S CONGRESS AVE, AUSTIN, TX",78704,30.23550751,-97.75636958
62,20-00013013,Shared Micromobility,2020-01-11T11:35:40.000,"2387 ANDREW ZILKER RD, AUSTIN, TX 78746",78746,30.26708079,-97.77555606
134,20-00012833,Shared Micromobility,2020-01-11T07:30:03.000,"101 E 33RD ST, AUSTIN, TX 78705",78705,30.29699197,-97.73543217
168,20-00012691,Shared Micromobility,2020-01-10T21:56:11.000,"4001 GUADALUPE ST, AUSTIN, TX 78751",78751,30.30545021,-97.73602069
268,20-00012386,Shared Micromobility,2020-01-10T16:08:38.000,"3313 KINGS LN, AUSTIN, TX 78705",78705,30.30128133,-97.7419348
299,20-00012299,Shared Micromobility,2020-01-10T15:30:11.000,"621 W 35TH ST, AUSTIN, TX 78705",78705,30.30168222,-97.74077511


In [98]:
combined_df = pd.merge(merged_df,scooter_df, on= "zip", how ="right" )

combined_df.dropna()


Unnamed: 0,GEOID End,GEOID Start,Return Council District,Start Council District,Day Of Week,Device ID,Trip End Time,Hour,Data Modified Date,Month,...,Trip ID,Vehicle Type,Year,zip,Complaint Number,Complaint Type,Complaint Date,complaint_location,complaint_latitude,complaint_longitudge
0,48453001100,48453001100,9,9,6,bb94af41-5750-4e37-b8b6-ae261653ce28,2020-01-11 09:30:00,9,2020-01-11 17:30:43,1,...,32aa0192-886b-4eee-9279-8d4d4abad430,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159
1,48453001100,48453001200,9,9,6,659ddac0-dbee-4fc3-9985-87237e5dca1a,2020-01-11 08:45:00,8,2020-01-11 17:30:43,1,...,6ea5ca1c-caa4-463a-a035-d41c71a3f80f,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159
2,48453001100,48453001100,9,9,6,ec69cacb-aba4-46ad-ba9d-ada30f8d3a63,2020-01-11 08:45:00,8,2020-01-11 17:30:43,1,...,ab3c6c91-bdc1-4229-b480-71294e3b4e88,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159
3,48453001100,48453001303,9,5,6,cc08e1ca-cac5-4e53-82e3-459ec65688f1,2020-01-11 09:30:00,9,2020-01-11 17:30:43,1,...,a5c3d985-df7c-40df-96f8-006309dcaea2,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159
4,48453001100,48453001305,9,9,6,a304ce98-be5e-4ec3-8b08-f0cdfcfaff58,2020-01-11 09:30:00,9,2020-01-11 17:30:43,1,...,45dfa1c3-f29f-46c9-9ec8-fc1a83e713df,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159
5,48453001100,48453000803,9,1,6,758fbc49-c59e-45a5-a85c-354fe2412f21,2020-01-11 08:45:00,8,2020-01-11 17:30:43,1,...,32560e97-e231-4fc3-bea4-fd303053b5d1,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159
6,48453001100,48453001200,9,9,6,f9dc14da-5bac-465a-8844-3549a5cece77,2020-01-11 09:15:00,9,2020-01-11 17:30:43,1,...,3ade279e-0692-49a3-b735-d4eaa8ca064d,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159
7,48453001100,48453001100,9,9,6,c6190308-6c63-4455-a376-26a9fa564c62,2020-01-11 08:45:00,8,2020-01-11 17:30:43,1,...,9b04a627-5930-4260-a34d-1d25d18f8740,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159
8,48453001100,48453001200,9,9,6,e91322a1-48c6-4d83-aee1-cb5020c87388,2020-01-11 09:15:00,9,2020-01-11 17:30:43,1,...,7fb33934-a537-48b7-9431-ca737c4f5499,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159
9,48453001100,48453001100,9,9,6,b56c1b0e-4931-4ab6-bd7b-476669e34273,2020-01-11 09:30:00,9,2020-01-11 17:30:43,1,...,714c0917-3922-4171-81ff-c448bb01c565,scooter,2020,78701,20-00012058,Shared Micromobility,2020-01-10T13:08:13.000,"E 14TH ST & BRAZOS ST, AUSTIN, TX",30.27559638,-97.73852159


In [None]:
# Count trips per day and sort by day:
daily_total = pd.DataFrame(clean_results_df['Day Of Week'].value_counts().sort_index())

# Map day of week for better labels. Data documentation indicates "0=Sunday and so on.""
days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
daily_total['Day'] = days

# Plot
daily_total.plot(kind='bar', x='Day', y='Day Of Week', title='Total Trip Counts by Day of week', figsize = (10,5), rot= 30, legend=False)
plt.ylabel('Number of Trips')
plt.savefig("Plots/trips_per_week.png")
plt.show()

In [None]:
# Count trips per hour and sort by hour:
hourly_total = pd.DataFrame(clean_results_df['Hour'].value_counts().sort_index())
#hours = ['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18', '19','20','21','22','23']
#hours = ['0','1','10','11','12','13','14','15','16','17','18','19','2','20','21','22','23','3','4','5','6','7','8','9']
#hourly_total['Hours'] = hours

hourly_total.reset_index().plot(kind='bar', x='index', y='Hour', title='Total Trip Counts by Hour', figsize = (10,5), legend=False)
plt.xlabel('Hours')
plt.ylabel('Number of Trips')
plt.savefig("Plots/trips_per_hour.png")
plt.show()
#now shows correct plot but xticks not sorted

In [None]:
hourly_total

In [None]:
monthly_total = pd.DataFrame(clean_results_df['Month'].value_counts()).sort_index()
import calendar
mn=[calendar.month_name[int(x)] for x in monthly_total.index.values.tolist()]

monthly_chart = monthly_total.plot.bar(title = "Total Trips per Month ",width = 0.75,figsize = (10,5),rot = 30, legend = False)
monthly_chart.set_xticklabels(mn)
monthly_chart.set_xlabel("Trip Months")
monthly_chart.set_ylabel("Total Trip Count")
plt.savefig("Plots/trips_per_month1.png")
plt.show()
#shows correct plot but xticks not sorted

In [None]:
month_list = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
monthly_total['Months'] = month_list
monthly_total.reset_index().plot(kind='bar', x='index', y='Month', title='Total Trip Counts by months', figsize = (10,5), legend=False)
plt.ylabel('Number of Trips')
plt.savefig("Plots/trips_per_month2.png")
plt.show()
#now shows correct plot but xticks not sorted

In [None]:
# Count how many trips started in each census GEOID tract
census_trip_start['Total Trips'] = pd.DataFrame(clean_results_df['GEOID Start'].value_counts())

# Count how many trips ended in each census tract
census_trip_end = pd.DataFrame(clean_results_df['GEOID End'].value_counts())

plt.bar(clean_results_df["GEOID Start"], census_trip_start ['Total Trips'])
plt.title("Total Count of trips starting per Census Tract")
plt.xlabel("GEOID ID")
plt.ylabel("Total Trips")

plt.show()