In [None]:
# https://data.bts.gov/Research-and-Statistics/Trips-by-Distance/w96p-f2qv
# mobility
#     1. Extract data from https://data.bts.gov/Research-and-Statistics/Trips-by-Distance/w96p-f2qv using API
#     2. Create a Dataframe with columns we will need alone - 
#         level,
#         date,
#         state_code,
#         state_fips,
#         county,
#         county_fips,
#         pop_stay_at_home,
#         pop_not_stay_at_home
#     2. Extract data from https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv
#     3. Create a dataframe with columns we will need alone - 
#         Date,
#         State,
#         County,
#         Cases,
#         Deaths
#     4. Merge the two Dataframes using Date, State and County names to have the following columns in resulting Dataframe
#         Date,
#         State,
#         County,
#         Population Staying at Home,
#         Population Not Staying at Home,
#         Cases,
#         Deaths
#     5. Scatter Plot and check correlation between covid cases and population staying at home
#     6. Map with the data

In [5]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
from sodapy import Socrata
from config import my_app_token, key_id, key_secret

In [6]:
# Mobility data retrieval
client = Socrata("data.bts.gov",
                 my_app_token,
                 username=key_id,
                 password=key_secret)

# Results returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("w96p-f2qv", limit=2028190)

# Convert to pandas DataFrame
mobility_df = pd.DataFrame.from_records(results)
mobility_df

Unnamed: 0,level,date,state_fips,state_code,pop_stay_at_home,pop_not_stay_at_home,trips,trips_1,trips_1_3,trips_3_5,trips_5_10,trips_10_25,trips_25_50,trips_50_100,trips_100_250,trips_250_500,trips_500,county_fips,county
0,State,2020-09-26T00:00:00.000,56,WY,152748.0,424989.0,1851837.0,481030.0,502362.0,232186.0,189575.0,170749.0,129506.0,91931.0,48006.0,4751.0,1741.0,,
1,State,2020-09-26T00:00:00.000,55,WI,1428896.0,4384672.0,17699745.0,3995356.0,4221439.0,2143288.0,2730283.0,2785950.0,1128658.0,463308.0,199544.0,25428.0,6491.0,,
2,State,2020-09-26T00:00:00.000,54,WV,469910.0,1335922.0,5505498.0,1164811.0,1421453.0,712576.0,877639.0,826495.0,315251.0,130587.0,48968.0,6218.0,1500.0,,
3,State,2020-09-26T00:00:00.000,53,WA,2064492.0,5471099.0,20838266.0,5002466.0,5476448.0,2577471.0,3139857.0,3045234.0,1024694.0,367036.0,148252.0,30052.0,26756.0,,
4,State,2020-09-26T00:00:00.000,51,VA,2119514.0,6398171.0,26541579.0,6052939.0,6645521.0,3383616.0,4162364.0,4082167.0,1416871.0,527643.0,222201.0,36443.0,11814.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2028185,County,2019-01-01T00:00:00.000,01,AL,9663,48000,142305,26725,33642,18506,22842,24470,10675,3396,1474,515,60,01009,Blount County
2028186,County,2019-01-01T00:00:00.000,01,AL,3861,18471,54827,11602,15874,7590,6436,7284,4125,1103,540,239,34,01007,Bibb County
2028187,County,2019-01-01T00:00:00.000,01,AL,4782,20023,67658,15524,16677,10550,11674,6416,3686,2450,589,66,26,01005,Barbour County
2028188,County,2019-01-01T00:00:00.000,01,AL,44415,172941,534520,120752,142931,68235,87430,78045,24495,7079,3188,1693,672,01003,Baldwin County


In [7]:
# Set the county-level covid data file path
covid_data_url = os.path.join("Resources", "us-counties.csv")

# Read the county-level covid data from csv file into DataFrame
covid_df = pd.read_csv(covid_data_url)
covid_df.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0
