In [1]:
# Import Dependencies and setup
import pandas as pd
# from sqlalchemy import create_engine
import requests
import json

##### Extraction

In [2]:
# Extract CSV into pandas df
df_ca = pd.read_csv('../resources/california-history.csv')
df_ca.head()

Unnamed: 0,date,state,death,deathConfirmed,deathIncrease,deathProbable,hospitalized,hospitalizedCumulative,hospitalizedCurrently,hospitalizedIncrease,...,totalTestResults,totalTestResultsIncrease,totalTestsAntibody,totalTestsAntigen,totalTestsPeopleAntibody,totalTestsPeopleAntigen,totalTestsPeopleViral,totalTestsPeopleViralIncrease,totalTestsViral,totalTestsViralIncrease
0,2021-03-07,CA,54124.0,,258,,,,4291.0,0,...,49646014,133186,,,,,,0,49646014,133186
1,2021-03-06,CA,53866.0,,418,,,,4513.0,0,...,49512828,218325,,,,,,0,49512828,218325
2,2021-03-05,CA,53448.0,,400,,,,4714.0,0,...,49294503,146818,,,,,,0,49294503,146818
3,2021-03-04,CA,53048.0,,273,,,,4967.0,0,...,49147685,119637,,,,,,0,49147685,119637
4,2021-03-03,CA,52775.0,,278,,,,5110.0,0,...,49028048,130858,,,,,,0,49028048,130858


##### Generate List of Data Fields

In [3]:
# Create json url query and variable for data request
base_url = "https://api.covidtracking.com/v1/states/ny/daily.json"
data_dict = requests.get(base_url).json()

print(json.dumps(data_dict, indent=4, sort_keys=True))
len(data_dict)

[
    {
        "checkTimeEt": "03/06 09:00",
        "commercialScore": 0,
        "dataQualityGrade": null,
        "date": 20210307,
        "dateChecked": "2021-03-06T14:00:00Z",
        "dateModified": "2021-03-06T14:00:00Z",
        "death": 39029,
        "deathConfirmed": null,
        "deathIncrease": 59,
        "deathProbable": null,
        "fips": "36",
        "grade": "",
        "hash": "8b1ad6376deb03844d4e5e3f615197da1b3a214e",
        "hospitalized": null,
        "hospitalizedCumulative": null,
        "hospitalizedCurrently": 4789,
        "hospitalizedDischarged": 150100,
        "hospitalizedIncrease": 0,
        "inIcuCumulative": null,
        "inIcuCurrently": 999,
        "lastUpdateEt": "3/6/2021 14:00",
        "negative": null,
        "negativeIncrease": 0,
        "negativeRegularScore": 0,
        "negativeScore": 0,
        "negativeTestsAntibody": null,
        "negativeTestsPeopleAntibody": null,
        "negativeTestsViral": null,
        "onVentila

371

In [4]:
# Set up dictionary of variables where data from api calls will be stored
ny_data = {'date':[], 'state':[], 'deaths':[], 'daily_hospitalization':[], 
           'icu_hospitalized':[], 'positiveCasesViral':[], 'positive_increase':[], 
           'test_results_total':[], 'test_increase':[]
          }

print('Retrieving NY Covid data')
print('-' * 30)

for data in data_dict:
    try:
        ny_data["date"].append(data["date"])
        ny_data["state"].append(data["state"])
        ny_data["deaths"].append(data["deathIncrease"])
        ny_data["daily_hospitalization"].append(data["hospitalizedIncrease"])
        ny_data["icu_hospitalized"].append(data["inIcuCurrently"])
        ny_data["positiveCasesViral"].append(data["PositiveCasesViral"])
        ny_data["positive_increase"].append(data["positiveIncrease"])
        ny_data["test_results_total"].append(data["totalTestResults"])
        ny_data["test_increase"].append(data["totalTestResultsIncrease"])
        
    except:
        print(f'Incomplete record for {data}. Skipping {data}.')

Retrieving NY Covid data
------------------------------
Incomplete record for {'date': 20210307, 'state': 'NY', 'positive': 1681169, 'probableCases': None, 'negative': None, 'pending': None, 'totalTestResultsSource': 'totalTestEncountersViral', 'totalTestResults': 39695100, 'hospitalizedCurrently': 4789, 'hospitalizedCumulative': None, 'inIcuCurrently': 999, 'inIcuCumulative': None, 'onVentilatorCurrently': 682, 'onVentilatorCumulative': None, 'recovered': None, 'lastUpdateEt': '3/6/2021 14:00', 'dateModified': '2021-03-06T14:00:00Z', 'checkTimeEt': '03/06 09:00', 'death': 39029, 'hospitalized': None, 'hospitalizedDischarged': 150100, 'dateChecked': '2021-03-06T14:00:00Z', 'totalTestsViral': None, 'positiveTestsViral': None, 'negativeTestsViral': None, 'positiveCasesViral': None, 'deathConfirmed': None, 'deathProbable': None, 'totalTestEncountersViral': 39695100, 'totalTestsPeopleViral': None, 'totalTestsAntibody': None, 'positiveTestsAntibody': None, 'negativeTestsAntibody': None, 'to

In [5]:
# Convert JSON to pandas dataframe and print to csv file
data_dict_pd = pd.DataFrame(data_dict)
data_dict_pd.to_csv('../resources/ny_data.csv')

data_dict_pd.head()

Unnamed: 0,date,state,positive,probableCases,negative,pending,totalTestResultsSource,totalTestResults,hospitalizedCurrently,hospitalizedCumulative,...,dataQualityGrade,deathIncrease,hospitalizedIncrease,hash,commercialScore,negativeRegularScore,negativeScore,positiveScore,score,grade
0,20210307,NY,1681169,,,,totalTestEncountersViral,39695100,4789.0,,...,,59,0,8b1ad6376deb03844d4e5e3f615197da1b3a214e,0,0,0,0,0,
1,20210306,NY,1674380,,,,totalTestEncountersViral,39467332,4954.0,,...,,79,0,c7ae8ab7600d0db452429e5692968b185e90d1de,0,0,0,0,0,
2,20210305,NY,1666733,,,,totalTestEncountersViral,39194200,5034.0,,...,,95,0,99bf166f33908a152c9badb3464d1899821323a5,0,0,0,0,0,
3,20210304,NY,1657777,,,,totalTestEncountersViral,38897265,5177.0,,...,,61,0,39d0cec8753202639bdd32abdd228cc6c891d862,0,0,0,0,0,
4,20210303,NY,1650184,,,,totalTestEncountersViral,38627176,5323.0,,...,,75,0,91cc3a4e0e57073b8997de61de8a6e385cf66599,0,0,0,0,0,


In [6]:
response = requests.get(base_url).json()

print(json.dumps(response, indent=4, sort_keys=True))

[
    {
        "checkTimeEt": "03/06 09:00",
        "commercialScore": 0,
        "dataQualityGrade": null,
        "date": 20210307,
        "dateChecked": "2021-03-06T14:00:00Z",
        "dateModified": "2021-03-06T14:00:00Z",
        "death": 39029,
        "deathConfirmed": null,
        "deathIncrease": 59,
        "deathProbable": null,
        "fips": "36",
        "grade": "",
        "hash": "8b1ad6376deb03844d4e5e3f615197da1b3a214e",
        "hospitalized": null,
        "hospitalizedCumulative": null,
        "hospitalizedCurrently": 4789,
        "hospitalizedDischarged": 150100,
        "hospitalizedIncrease": 0,
        "inIcuCumulative": null,
        "inIcuCurrently": 999,
        "lastUpdateEt": "3/6/2021 14:00",
        "negative": null,
        "negativeIncrease": 0,
        "negativeRegularScore": 0,
        "negativeScore": 0,
        "negativeTestsAntibody": null,
        "negativeTestsPeopleAntibody": null,
        "negativeTestsViral": null,
        "onVentila