# Apple & Google Mobility Data
# NYT COVID Cases & Death Data


In [1]:
# Dependencies and Setup
import json
import os
import pandas as pd
import urllib.request
import requests
from config import db_user, db_pwd
from sqlalchemy import create_engine

### Retrieving Apple Mobility Data 

In [2]:
with urllib.request.urlopen('https://covid19-static.cdn-apple.com/covid19-mobility-data/current/v3/index.json') as url:
    json_data = json.loads(url.read().decode())
link = 'https://covid19-static.cdn-apple.com' + json_data['basePath'] + json_data['regions']['en-us']['csvPath']
link

'https://covid19-static.cdn-apple.com/covid19-mobility-data/2017HotfixDev11/v3/en-us/applemobilitytrends-2020-09-21.csv'

### Retrieving the Apple Mobility Trends as .CSV

In [3]:
data_dir = 'Data'

if not os.path.exists(data_dir):
    os.makedirs(data_dir)

path = os.path.join(data_dir, 'applemobilitytrends.csv')
urllib.request.urlretrieve(link, path)

('Data/applemobilitytrends.csv', <http.client.HTTPMessage at 0x7f9dad912150>)

In [4]:
df = pd.read_csv(path, low_memory=False)
df.head()

Unnamed: 0,geo_type,region,transportation_type,alternative_name,sub-region,country,2020-01-13,2020-01-14,2020-01-15,2020-01-16,...,2020-09-12,2020-09-13,2020-09-14,2020-09-15,2020-09-16,2020-09-17,2020-09-18,2020-09-19,2020-09-20,2020-09-21
0,country/region,Albania,driving,,,,100.0,95.3,101.43,97.2,...,186.57,173.76,142.75,144.04,135.62,140.86,151.82,164.99,160.0,126.6
1,country/region,Albania,walking,,,,100.0,100.68,98.93,98.46,...,169.85,154.67,159.4,157.19,160.34,161.07,165.59,157.44,141.41,158.68
2,country/region,Argentina,driving,,,,100.0,97.07,102.45,111.21,...,69.7,42.75,55.7,57.94,61.09,66.01,76.2,75.11,46.99,59.23
3,country/region,Argentina,walking,,,,100.0,95.11,101.37,112.67,...,53.04,34.87,44.74,44.54,50.38,56.07,62.88,58.01,41.11,50.83
4,country/region,Australia,driving,AU,,,100.0,102.98,104.21,108.63,...,83.67,85.82,89.24,93.18,95.04,101.92,102.84,82.47,89.67,91.87


### Filter the data so that only the US is in the DataFrame

In [5]:
df_us = df[df['country'] == 'United States']
df_us.head()

Unnamed: 0,geo_type,region,transportation_type,alternative_name,sub-region,country,2020-01-13,2020-01-14,2020-01-15,2020-01-16,...,2020-09-12,2020-09-13,2020-09-14,2020-09-15,2020-09-16,2020-09-17,2020-09-18,2020-09-19,2020-09-20,2020-09-21
158,city,Akron,driving,,Ohio,United States,100.0,103.06,107.5,106.14,...,177.06,122.8,143.12,147.98,151.71,155.39,179.76,174.1,125.64,138.11
159,city,Akron,transit,,Ohio,United States,100.0,106.69,103.75,100.22,...,52.87,39.71,56.84,54.34,56.62,66.76,69.63,57.57,43.9,60.29
160,city,Akron,walking,,Ohio,United States,100.0,97.23,79.05,74.77,...,154.64,98.27,118.31,119.49,130.82,124.85,147.02,146.18,96.15,118.33
161,city,Albany,driving,,New York,United States,100.0,102.35,107.35,105.54,...,150.18,116.64,128.73,134.72,136.06,138.23,162.51,153.46,119.21,124.41
162,city,Albany,transit,,New York,United States,100.0,100.14,105.95,107.76,...,108.41,85.28,97.72,102.32,93.13,105.48,113.47,106.18,81.42,102.04


### Cleaning Data

In [6]:
# Check the df for NaN 
df_us.columns[df_us.isna().any()].tolist()

['alternative_name', 'sub-region', '2020-05-11', '2020-05-12']

In [7]:
# Look at rows that have the NA value in the 'sub-region' column
df_us[df_us['sub-region'].isna()]

Unnamed: 0,geo_type,region,transportation_type,alternative_name,sub-region,country,2020-01-13,2020-01-14,2020-01-15,2020-01-16,...,2020-09-12,2020-09-13,2020-09-14,2020-09-15,2020-09-16,2020-09-17,2020-09-18,2020-09-19,2020-09-20,2020-09-21
928,city,Washington DC,driving,DC,,United States,100.0,105.82,109.02,110.37,...,134.24,102.86,103.80,106.31,109.00,110.13,131.88,137.82,101.79,100.10
929,city,Washington DC,transit,DC,,United States,100.0,100.78,103.92,105.02,...,43.41,37.23,39.57,39.80,42.40,40.14,48.69,50.39,39.44,41.31
930,city,Washington DC,walking,DC,,United States,100.0,99.07,109.61,104.16,...,124.91,94.32,85.28,85.62,90.80,85.25,116.64,140.92,96.98,83.39
955,sub-region,Alabama,driving,,,United States,100.0,102.90,103.51,109.71,...,184.16,133.76,143.01,129.98,121.82,155.50,187.07,180.35,130.68,141.03
956,sub-region,Alabama,walking,,,United States,100.0,99.93,100.28,104.78,...,205.59,129.79,156.13,145.24,136.80,168.35,202.52,200.07,131.38,156.41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020,sub-region,Wisconsin,driving,,,United States,100.0,109.36,102.32,108.86,...,183.47,149.35,146.53,151.30,155.16,163.76,196.34,201.57,150.25,140.96
2021,sub-region,Wisconsin,walking,,,United States,100.0,120.05,106.33,109.10,...,254.12,183.81,192.46,199.54,209.67,210.55,263.93,287.73,182.39,178.63
2022,sub-region,Wisconsin,transit,,,United States,100.0,103.83,100.25,100.18,...,84.29,73.46,80.09,81.36,86.52,92.00,97.85,94.38,80.48,84.45
2023,sub-region,Wyoming,driving,,,United States,100.0,106.23,105.81,109.13,...,349.39,297.56,284.27,279.84,283.00,300.57,338.43,325.17,280.34,261.06


In [8]:
# Rename - sub-regions to states - and fillna
df_us['sub-region'] = df_us['sub-region'].fillna(df_us['region'])
df_us = df_us.rename(columns={'sub-region': 'state'})
df_us.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,geo_type,region,transportation_type,alternative_name,state,country,2020-01-13,2020-01-14,2020-01-15,2020-01-16,...,2020-09-12,2020-09-13,2020-09-14,2020-09-15,2020-09-16,2020-09-17,2020-09-18,2020-09-19,2020-09-20,2020-09-21
158,city,Akron,driving,,Ohio,United States,100.0,103.06,107.5,106.14,...,177.06,122.8,143.12,147.98,151.71,155.39,179.76,174.1,125.64,138.11
159,city,Akron,transit,,Ohio,United States,100.0,106.69,103.75,100.22,...,52.87,39.71,56.84,54.34,56.62,66.76,69.63,57.57,43.9,60.29
160,city,Akron,walking,,Ohio,United States,100.0,97.23,79.05,74.77,...,154.64,98.27,118.31,119.49,130.82,124.85,147.02,146.18,96.15,118.33
161,city,Albany,driving,,New York,United States,100.0,102.35,107.35,105.54,...,150.18,116.64,128.73,134.72,136.06,138.23,162.51,153.46,119.21,124.41
162,city,Albany,transit,,New York,United States,100.0,100.14,105.95,107.76,...,108.41,85.28,97.72,102.32,93.13,105.48,113.47,106.18,81.42,102.04


In [9]:
# Remove unneccesary columns and columns with NA values
df_us = df_us.drop(columns=['geo_type', 'region', 'alternative_name', 'country', '2020-05-11', '2020-05-12'])
df_us.head()

Unnamed: 0,transportation_type,state,2020-01-13,2020-01-14,2020-01-15,2020-01-16,2020-01-17,2020-01-18,2020-01-19,2020-01-20,...,2020-09-12,2020-09-13,2020-09-14,2020-09-15,2020-09-16,2020-09-17,2020-09-18,2020-09-19,2020-09-20,2020-09-21
158,driving,Ohio,100.0,103.06,107.5,106.14,123.62,105.99,82.68,94.99,...,177.06,122.8,143.12,147.98,151.71,155.39,179.76,174.1,125.64,138.11
159,transit,Ohio,100.0,106.69,103.75,100.22,89.04,74.85,56.99,75.59,...,52.87,39.71,56.84,54.34,56.62,66.76,69.63,57.57,43.9,60.29
160,walking,Ohio,100.0,97.23,79.05,74.77,89.55,88.39,49.3,56.29,...,154.64,98.27,118.31,119.49,130.82,124.85,147.02,146.18,96.15,118.33
161,driving,New York,100.0,102.35,107.35,105.54,128.97,88.28,82.76,102.46,...,150.18,116.64,128.73,134.72,136.06,138.23,162.51,153.46,119.21,124.41
162,transit,New York,100.0,100.14,105.95,107.76,101.39,85.79,82.86,126.71,...,108.41,85.28,97.72,102.32,93.13,105.48,113.47,106.18,81.42,102.04


In [10]:
df_us.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3102 entries, 158 to 4690
Columns: 253 entries, transportation_type to 2020-09-21
dtypes: float64(251), object(2)
memory usage: 6.0+ MB


In [11]:
# Extract the data for driving
apple_driving = df_us.loc[df_us["transportation_type"]== 'driving']
apple_driving.head()

Unnamed: 0,transportation_type,state,2020-01-13,2020-01-14,2020-01-15,2020-01-16,2020-01-17,2020-01-18,2020-01-19,2020-01-20,...,2020-09-12,2020-09-13,2020-09-14,2020-09-15,2020-09-16,2020-09-17,2020-09-18,2020-09-19,2020-09-20,2020-09-21
158,driving,Ohio,100.0,103.06,107.5,106.14,123.62,105.99,82.68,94.99,...,177.06,122.8,143.12,147.98,151.71,155.39,179.76,174.1,125.64,138.11
161,driving,New York,100.0,102.35,107.35,105.54,128.97,88.28,82.76,102.46,...,150.18,116.64,128.73,134.72,136.06,138.23,162.51,153.46,119.21,124.41
164,driving,New Mexico,100.0,101.06,104.03,97.01,118.28,116.32,80.57,94.51,...,143.36,104.82,122.11,124.12,126.45,128.28,149.06,141.44,104.68,115.04
167,driving,Pennsylvania,100.0,104.22,106.86,106.58,128.28,77.73,89.03,104.46,...,165.99,123.6,136.08,140.51,144.71,150.11,173.45,171.38,126.24,130.52
173,driving,Alaska,100.0,105.32,105.99,111.03,126.49,116.76,86.25,95.69,...,179.64,141.68,156.0,159.46,163.78,158.7,185.74,173.04,129.28,150.22


In [13]:
# Extract the data for Washington DC
dc_driving = apple_driving.loc[apple_driving["state"]== 'Washington DC']
dc_driving.head()

Unnamed: 0,transportation_type,state,2020-01-13,2020-01-14,2020-01-15,2020-01-16,2020-01-17,2020-01-18,2020-01-19,2020-01-20,...,2020-09-12,2020-09-13,2020-09-14,2020-09-15,2020-09-16,2020-09-17,2020-09-18,2020-09-19,2020-09-20,2020-09-21
928,driving,Washington DC,100.0,105.82,109.02,110.37,123.98,99.19,92.14,95.47,...,134.24,102.86,103.8,106.31,109.0,110.13,131.88,137.82,101.79,100.1


### Create a dataframe with select columns

In [32]:
# Remove the transport column 
apple = apple_driving.drop(columns=['transportation_type'])
apple

Unnamed: 0,state,2020-01-13,2020-01-14,2020-01-15,2020-01-16,2020-01-17,2020-01-18,2020-01-19,2020-01-20,2020-01-21,...,2020-09-12,2020-09-13,2020-09-14,2020-09-15,2020-09-16,2020-09-17,2020-09-18,2020-09-19,2020-09-20,2020-09-21
158,Ohio,100.0,103.06,107.50,106.14,123.62,105.99,82.68,94.99,97.36,...,177.06,122.80,143.12,147.98,151.71,155.39,179.76,174.10,125.64,138.11
161,New York,100.0,102.35,107.35,105.54,128.97,88.28,82.76,102.46,107.77,...,150.18,116.64,128.73,134.72,136.06,138.23,162.51,153.46,119.21,124.41
164,New Mexico,100.0,101.06,104.03,97.01,118.28,116.32,80.57,94.51,99.66,...,143.36,104.82,122.11,124.12,126.45,128.28,149.06,141.44,104.68,115.04
167,Pennsylvania,100.0,104.22,106.86,106.58,128.28,77.73,89.03,104.46,102.81,...,165.99,123.60,136.08,140.51,144.71,150.11,173.45,171.38,126.24,130.52
173,Alaska,100.0,105.32,105.99,111.03,126.49,116.76,86.25,95.69,105.11,...,179.64,141.68,156.00,159.46,163.78,158.70,185.74,173.04,129.28,150.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4683,Virginia,100.0,100.96,111.03,111.97,136.50,136.17,104.60,117.02,101.29,...,223.42,177.70,159.83,158.32,169.54,151.68,191.07,224.22,160.69,149.63
4684,Maine,100.0,103.05,103.98,82.24,129.29,121.27,95.19,116.93,100.59,...,282.76,237.66,196.92,187.27,194.10,201.92,239.75,268.46,226.68,180.00
4687,Texas,100.0,98.85,97.76,92.21,115.79,128.73,92.48,114.43,106.30,...,194.17,146.27,149.59,153.39,141.40,154.54,210.70,197.63,142.07,121.34
4688,California,100.0,102.98,103.10,114.08,118.83,121.49,93.29,101.86,103.01,...,157.74,131.88,144.92,149.45,147.16,155.48,178.39,170.59,142.61,141.10


In [35]:
# Use the tranpose function to transpose the data so that dates become rows
apple_us = apple.transpose()
apple_us.head()

Unnamed: 0,158,161,164,167,173,177,180,183,191,199,...,4676,4677,4680,4681,4682,4683,4684,4687,4688,4689
state,Ohio,New York,New Mexico,Pennsylvania,Alaska,Michigan,Maryland,California,Georgia,Texas,...,Texas,California,South Carolina,Pennsylvania,Nebraska,Virginia,Maine,Texas,California,Arizona
2020-01-13,100,100,100,100,100,100,100,100,100,100,...,100,100,100,100,100,100,100,100,100,100
2020-01-14,103.06,102.35,101.06,104.22,105.32,100.28,106.04,100.7,105.48,99.17,...,111.01,100.24,100.76,103.69,100.45,100.96,103.05,98.85,102.98,101.82
2020-01-15,107.5,107.35,104.03,106.86,105.99,101.92,108.09,100.63,105.94,103.96,...,103.92,107.65,103.61,106.16,94.25,111.03,103.98,97.76,103.1,106.67
2020-01-16,106.14,105.54,97.01,106.58,111.03,104.53,109.33,100.34,111.42,105.19,...,100.26,105.9,109.78,106.82,131.84,111.97,82.24,92.21,114.08,108.93


In [34]:
# Use the tranpose function to transpose the data so that dates become rows
apple_us = apple_driving.set_index('state')
apple_us.head()

Unnamed: 0_level_0,transportation_type,2020-01-13,2020-01-14,2020-01-15,2020-01-16,2020-01-17,2020-01-18,2020-01-19,2020-01-20,2020-01-21,...,2020-09-12,2020-09-13,2020-09-14,2020-09-15,2020-09-16,2020-09-17,2020-09-18,2020-09-19,2020-09-20,2020-09-21
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Ohio,driving,100.0,103.06,107.5,106.14,123.62,105.99,82.68,94.99,97.36,...,177.06,122.8,143.12,147.98,151.71,155.39,179.76,174.1,125.64,138.11
New York,driving,100.0,102.35,107.35,105.54,128.97,88.28,82.76,102.46,107.77,...,150.18,116.64,128.73,134.72,136.06,138.23,162.51,153.46,119.21,124.41
New Mexico,driving,100.0,101.06,104.03,97.01,118.28,116.32,80.57,94.51,99.66,...,143.36,104.82,122.11,124.12,126.45,128.28,149.06,141.44,104.68,115.04
Pennsylvania,driving,100.0,104.22,106.86,106.58,128.28,77.73,89.03,104.46,102.81,...,165.99,123.6,136.08,140.51,144.71,150.11,173.45,171.38,126.24,130.52
Alaska,driving,100.0,105.32,105.99,111.03,126.49,116.76,86.25,95.69,105.11,...,179.64,141.68,156.0,159.46,163.78,158.7,185.74,173.04,129.28,150.22


In [25]:
dc = apple_us.reset_index()

In [27]:
# dc["index"] = (dc["index"].astype("datetime64")).dt.strftime("%m-%d")
# dc.head()

In [None]:
# Filtering US Transportation Type data by specified date range
start_date = "05-01"
end_date = "05-30"
mask = (low_5['index'] > start_date) & (low_5['index'] <= end_date)
low_5_may_df = low_5.loc[mask]
low_5_may_df.head()

## Store Google CSV into DataFrame

In [20]:
csv_file = "../Resources/google_mob_US.csv"
google_data_df = pd.read_csv(csv_file)
google_data_df.head()

Unnamed: 0.1,Unnamed: 0,State,date,retail_and_recreation,grocery_and_pharmacy,parks,transit_stations,workplaces,residential
0,0,Alabama,2020-02-15,5.0,2.0,39.0,7.0,2.0,-1.0
1,1,Alabama,2020-02-16,0.0,-2.0,-7.0,3.0,-1.0,1.0
2,2,Alabama,2020-02-17,3.0,0.0,17.0,7.0,-17.0,4.0
3,3,Alabama,2020-02-18,-4.0,-3.0,-11.0,-1.0,1.0,2.0
4,4,Alabama,2020-02-19,4.0,1.0,6.0,4.0,1.0,0.0


In [21]:
google_data_df.shape

(456462, 9)

In [22]:
google_data_df.columns

Index(['Unnamed: 0', 'State', 'date', 'retail_and_recreation',
       'grocery_and_pharmacy', 'parks', 'transit_stations', 'workplaces',
       'residential'],
      dtype='object')

In [23]:
# Check the df for NaN 
google_data_df.columns[google_data_df.isna().any()].tolist()

['retail_and_recreation',
 'grocery_and_pharmacy',
 'parks',
 'transit_stations',
 'workplaces',
 'residential']

### Create a new dataframe with select columns

In [38]:
# Create a dataframe with only the columns you need from the original dataframe:
google_df = google_data_df[["State", "date", "retail_and_recreation",
                            "grocery_and_pharmacy", "parks"]].copy()
google_df

Unnamed: 0,State,date,retail_and_recreation,grocery_and_pharmacy,parks
0,Alabama,02-15,5.0,2.0,39.0
1,Alabama,02-16,0.0,-2.0,-7.0
2,Alabama,02-17,3.0,0.0,17.0
3,Alabama,02-18,-4.0,-3.0,-11.0
4,Alabama,02-19,4.0,1.0,6.0
...,...,...,...,...,...
456457,Wyoming,07-29,,,
456458,Wyoming,07-30,,,
456459,Wyoming,07-31,,,
456460,Wyoming,08-03,,,


In [39]:
# Set index to state
google_df = google_df.set_index("date")
google_df.head()

Unnamed: 0_level_0,State,retail_and_recreation,grocery_and_pharmacy,parks
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
02-15,Alabama,5.0,2.0,39.0
02-16,Alabama,0.0,-2.0,-7.0
02-17,Alabama,3.0,0.0,17.0
02-18,Alabama,-4.0,-3.0,-11.0
02-19,Alabama,4.0,1.0,6.0


In [42]:
google_df.reset_index(inplace = True)
google_df

Unnamed: 0,date,State,retail_and_recreation,grocery_and_pharmacy,parks
0,02-15,Alabama,5.0,2.0,39.0
1,02-16,Alabama,0.0,-2.0,-7.0
2,02-17,Alabama,3.0,0.0,17.0
3,02-18,Alabama,-4.0,-3.0,-11.0
4,02-19,Alabama,4.0,1.0,6.0
...,...,...,...,...,...
456457,07-29,Wyoming,,,
456458,07-30,Wyoming,,,
456459,07-31,Wyoming,,,
456460,08-03,Wyoming,,,


In [41]:
# Filter by date
# google_df["date"] = (google_data_df["date"].astype("datetime64")).dt.strftime("%m-%d")
# google_df

## Store NYT COVID cases and deaths CSV into DataFrame

In [None]:
csv_file = "../Resources/COVID-states.csv"
covid_data_df = pd.read_csv(csv_file)
covid_data_df.head()

In [None]:
# Filter by date


### Combine the data into a single dataset

In [None]:
# # Combine the data into a single dataset
# mobility_data_df = pd.merge(apple_data_df, google_data_df, covid_df how ='inner', on = "state")

# # Display the data table for preview
# mobility_data_df

### Connect to local database

In [None]:
# # Create Engine for data
# engine = create_engine(f"postgresql://{db_user}:{db_pwd}@localhost/mobility_db")
# conn = engine.connect()

In [None]:
rds_connection_string = "{db_user}:{db_pwd}@localhost:5432/mobility_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

### Check for tables

In [None]:
# engine.table_names()

### Use pandas to load csv converted DataFrame into database

In [None]:
new_customer_data_df.to_sql(name='customer_name', con=engine, if_exists='append', index=False)

### Confirm data has been added by querying the table
* NOTE: can also check using pgAdmin

In [None]:
# pd.read_sql_query('select * from ____', con=engine).head()

### Confirm data has been added by querying the customer_location table

In [None]:
pd.read_sql_query('select * from _______', con=engine).head()

In [None]:
# Remove the total column to not interfere with the data represented on the plot
df_low_5_states = df_low_5_states.drop(columns=['total'])

In [None]:
# Use the tranpose function to transpose the data so that dates become rows
df_low_5_states_t = df_low_5_states.T

In [None]:
# Plot the mobility data for the top 5 states
df_low_5_states_t.plot.line(figsize=(15, 10))
plt.title('Mobility Trends in the Lowest 5 States',size = 20)
plt.xlabel("Timepoints (days)", size = 25)
plt.ylabel("Change from baseline", size = 25)
plt.hlines(100,0,220,alpha = 0.75)
plt.grid(linestyle ="-", linewidth = 1, alpha = 0.3)
plt.legend(loc = "upper left", fontsize = "large")
plt.savefig('Mobility Trends in the Lowest 5 States.png')

## Filtering the data for the month of May

#### Mobility Data for Top 5 States - Month of May

In [None]:
top_5 = df_top_5_states_t.reset_index()

In [None]:
top_5["index"] = (top_5["index"].astype("datetime64")).dt.strftime("%m-%d")
top_5.head()

In [None]:
# Filtering US Transportation Type data by specified date range
start_date = "05-01"
end_date = "05-30"
mask = (top_5['index'] > start_date) & (top_5['index'] <= end_date)
top_5_may_df = top_5.loc[mask]
top_5_may_df.head()

### Mobility Data for lowest 5 States - Month of May

In [None]:
low_5 = df_low_5_states_t.reset_index()

In [None]:
low_5["index"] = (low_5["index"].astype("datetime64")).dt.strftime("%m-%d")
low_5.head()

In [None]:
# Filtering US Transportation Type data by specified date range
start_date = "05-01"
end_date = "05-30"
mask = (low_5['index'] > start_date) & (low_5['index'] <= end_date)
low_5_may_df = low_5.loc[mask]
low_5_may_df.head()