In [6]:
import requests
import json
import pandas as pd

In [7]:
# owid json on GitHub url
owid_covid19_url = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.json"

In [8]:
# owid json json requests
owid_vaccinations_response = requests.get(owid_covid19_url).json()

In [9]:
### owid json on GitHub of vaccinations - API call and DataFrame creation

json_dict = {
    "country": [],
    "iso_code": [],
    "date": [],
    "daily_vaccinations": [],
    "daily_vaccinations_per_million": []
}

for index, x in enumerate(owid_vaccinations_response):
    results = owid_vaccinations_response[index]

    for index, data in enumerate(results["data"]):
        json_dict["country"].append(results["country"])
        json_dict["iso_code"].append(results["iso_code"])
            
        json_dict["date"].append(results["data"][index]["date"])
            
        try:
            json_dict["daily_vaccinations"].append(results["data"][index]["daily_vaccinations"])
        except(KeyError):
            json_dict["daily_vaccinations"].append("No Data")
                
        try:    
            json_dict["daily_vaccinations_per_million"].append(results["data"][index]["daily_vaccinations_per_million"])
        except(KeyError):
            json_dict["daily_vaccinations_per_million"].append("No Data")
            

raw_owid_vaccinations_data = pd.DataFrame(json_dict)
raw_owid_vaccinations_data.head()

Unnamed: 0,country,iso_code,date,daily_vaccinations,daily_vaccinations_per_million
0,Afghanistan,AFG,2021-02-22,No Data,No Data
1,Afghanistan,AFG,2021-02-23,1367,35
2,Afghanistan,AFG,2021-02-24,1367,35
3,Afghanistan,AFG,2021-02-25,1367,35
4,Afghanistan,AFG,2021-02-26,1367,35


In [10]:
# owid filtered DataFrame to just display April dates

april_owid_vaccinations_data = raw_owid_vaccinations_data.loc[raw_owid_vaccinations_data["date"].str.contains("2021-04"), :]
april_owid_vaccinations_data.head()

Unnamed: 0,country,iso_code,date,daily_vaccinations,daily_vaccinations_per_million
38,Afghanistan,AFG,2021-04-01,3000,77
39,Afghanistan,AFG,2021-04-02,3000,77
40,Afghanistan,AFG,2021-04-03,3000,77
41,Afghanistan,AFG,2021-04-04,3000,77
42,Afghanistan,AFG,2021-04-05,3000,77


In [11]:
#reads the api for confirmed covid cases
cases="https://covid-api.mmediagroup.fr/v1/history?country=all&status=confirmed"
response=requests.get(cases)
temp=response.json()

#reads the api for confirmed covid deaths
deaths="https://covid-api.mmediagroup.fr/v1/history?country=all&status=deaths"
response2=requests.get(deaths)
temp2=response2.json()

In [12]:
the_confirmed_dates=[]
the_confirmed=[]
the_country=[]

#finds the dates and confirmed cases for all the countries
for i in temp:
    the_confirmed_temp=temp[i]['All']['dates']
    for d in the_confirmed_temp:
        the_country.append(i)
        the_confirmed_dates.append(d)
        the_confirmed.append(the_confirmed_temp[d])


In [13]:
the_death_dates=[]
the_death=[]
the_country=[]

for i in temp2:
    the_death_temp=temp2[i]['All']['dates']
    for d in the_death_temp:
        the_country.append(i)
        the_death_dates.append(d)
        the_death.append(the_death_temp[d])

#the total deaths and total confirmed are updated daily but at slightly different times
#if one of the print out below has less rows, that is because it hasn't been updated yet that day
#which will give an error. It should update soon though
print (len(the_country))
print (len(the_confirmed_dates))
print (len(the_confirmed))
print (len(the_death))


90324
90324
90324
90324


In [14]:
#cleans the data and populates the data frame
confirmed_dict={'Location': the_country,'Date':the_confirmed_dates,"Total_Confirmed":the_confirmed,"Total_Deaths":the_death}   
confirmed_df=pd.DataFrame(confirmed_dict)

the_list=[confirmed_df['Location'],confirmed_df['Date'],confirmed_df['Total_Confirmed'],confirmed_df['Total_Deaths']]

daily_confirmed=[]
daily_deaths=[]

for row in range(0,len(the_list[0]),1):
    if row != len(the_list[0])-1:
        if the_list[0][row]==the_list[0][row+1]: #same country
            daily_confirmed.append(the_list[2][row] - the_list[2][row+1])
            daily_deaths.append(the_list[3][row] - the_list[3][row+1])
        else: #at a new country
            daily_confirmed.append(the_list[2][row])
            daily_deaths.append(the_list[3][row])
    else:
        daily_confirmed.append(the_list[2][row])
        daily_deaths.append(the_list[3][row])

        
                

In [15]:
#outputs all the dates for all the countries
print()
print()
print("         Covid Cases All Countries: 01/22/2020 to 04/30/2021")
print("---------------------------------------------------------------------")
confirmed_df["Daily_Confirmed"]=daily_confirmed
confirmed_df["Daily_Deaths"]=daily_deaths
confirmed_df



         Covid Cases All Countries: 01/22/2020 to 04/30/2021
---------------------------------------------------------------------


Unnamed: 0,Location,Date,Total_Confirmed,Total_Deaths,Daily_Confirmed,Daily_Deaths
0,Afghanistan,2021-05-03,60300,2642,178,5
1,Afghanistan,2021-05-02,60122,2637,183,6
2,Afghanistan,2021-05-01,59939,2631,194,6
3,Afghanistan,2021-04-30,59745,2625,169,7
4,Afghanistan,2021-04-29,59576,2618,206,7
...,...,...,...,...,...,...
90319,Global,2020-01-26,2118,56,685,14
90320,Global,2020-01-25,1433,42,492,16
90321,Global,2020-01-24,941,26,286,8
90322,Global,2020-01-23,655,18,98,1


In [16]:
#outputs just april of 2021
print()
print()
print("         Covid Cases All Countries: April 2021)")
print("---------------------------------------------------------------------")
      
april_df=confirmed_df.loc[confirmed_df['Date'].str.contains("2021-04")]
april_df.to_csv("output/april_covid_rates.csv", index=False)
april_df



         Covid Cases All Countries: April 2021)
---------------------------------------------------------------------


Unnamed: 0,Location,Date,Total_Confirmed,Total_Deaths,Daily_Confirmed,Daily_Deaths
3,Afghanistan,2021-04-30,59745,2625,169,7
4,Afghanistan,2021-04-29,59576,2618,206,7
5,Afghanistan,2021-04-28,59370,2611,145,13
6,Afghanistan,2021-04-27,59225,2598,210,6
7,Afghanistan,2021-04-26,59015,2592,172,10
...,...,...,...,...,...,...
89884,Global,2021-04-05,131682894,2859884,489460,7463
89885,Global,2021-04-04,131193434,2852421,552409,6918
89886,Global,2021-04-03,130641025,2845503,527683,8385
89887,Global,2021-04-02,130113342,2837118,633454,10249


In [17]:
#pulls csv files for April
covid_csv_2021=confirmed_df#this was merged from another notebook, so changhing variable name to that it still works
covid_csv=april_df#this was merged from another notebook, so changhing variable name to that it still works
vaccines_csv=raw_owid_vaccinations_data#this was merged from another notebook, so changhing variable name to that it still works

In [30]:
#cleans country names to match
vaccines_csv['country']=vaccines_csv['country'].replace({'United States': 'US'})
covid_csv_2021['Location']=covid_csv_2021['Location'].replace({'Congo (Brazzaville)': 'Congo', 'Congo (Kinshasa)': 'Congo'})
covid_csv['Location']=covid_csv['Location'].replace({'Congo (Brazzaville)': 'Congo', 'Congo (Kinshasa)': 'Congo'})

                                                             

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [19]:
#creates a new df with reduced columns for vaccines
vaccines_df=vaccines_csv[["country",'date','daily_vaccinations','daily_vaccinations_per_million']]

#renames matching columns
vaccine_df_renamed=vaccines_df.rename(columns={"date":"Date","country":"Location"})

#just grabs apil dates
apr_vaccines_df=vaccine_df_renamed.loc[vaccine_df_renamed['Date'].str.contains("2021-04")]
apr_vaccines_df.head()

Unnamed: 0,Location,Date,daily_vaccinations,daily_vaccinations_per_million
38,Afghanistan,2021-04-01,3000,77
39,Afghanistan,2021-04-02,3000,77
40,Afghanistan,2021-04-03,3000,77
41,Afghanistan,2021-04-04,3000,77
42,Afghanistan,2021-04-05,3000,77


In [20]:
#output covid df for visualization
covid_csv.head()

Unnamed: 0,Location,Date,Total_Confirmed,Total_Deaths,Daily_Confirmed,Daily_Deaths
3,Afghanistan,2021-04-30,59745,2625,169,7
4,Afghanistan,2021-04-29,59576,2618,206,7
5,Afghanistan,2021-04-28,59370,2611,145,13
6,Afghanistan,2021-04-27,59225,2598,210,6
7,Afghanistan,2021-04-26,59015,2592,172,10


In [21]:
#merges on two columns for April
merged_df=pd.merge(covid_csv,apr_vaccines_df, on=["Date","Location"], how="inner")

In [22]:
merged_df

Unnamed: 0,Location,Date,Total_Confirmed,Total_Deaths,Daily_Confirmed,Daily_Deaths,daily_vaccinations,daily_vaccinations_per_million
0,Afghanistan,2021-04-22,58312,2561,98,4,8000,206
1,Afghanistan,2021-04-21,58214,2557,177,8,8000,206
2,Afghanistan,2021-04-20,58037,2549,139,3,8000,206
3,Afghanistan,2021-04-19,57898,2546,105,7,8000,206
4,Afghanistan,2021-04-18,57793,2539,72,0,8000,206
...,...,...,...,...,...,...,...,...
4183,Zimbabwe,2021-04-05,36934,1525,11,0,9129,614
4184,Zimbabwe,2021-04-04,36923,1525,12,1,8537,574
4185,Zimbabwe,2021-04-03,36911,1524,8,0,8498,572
4186,Zimbabwe,2021-04-02,36903,1524,7,1,8156,549


In [8]:
#outputs csv for april
merged_df.to_csv("output/april_covid_vacc_merged.csv", index=False)

In [23]:
#merges on two columns going back to January 2021
merged_df_2021=pd.merge(covid_csv_2021,vaccine_df_renamed, on=["Date","Location"], how="inner")

In [24]:
merged_df_2021.head()

Unnamed: 0,Location,Date,Total_Confirmed,Total_Deaths,Daily_Confirmed,Daily_Deaths,daily_vaccinations,daily_vaccinations_per_million
0,Afghanistan,2021-04-22,58312,2561,98,4,8000,206
1,Afghanistan,2021-04-21,58214,2557,177,8,8000,206
2,Afghanistan,2021-04-20,58037,2549,139,3,8000,206
3,Afghanistan,2021-04-19,57898,2546,105,7,8000,206
4,Afghanistan,2021-04-18,57793,2539,72,0,8000,206


In [11]:
merged_df_2021.to_csv("output/2021_covid_vacc_merged.csv", index=False)

In [27]:
merged_df_2021[merged_df_2021['Location']=="US"]#just for fun

Unnamed: 0,Location,Date,Total_Confirmed,Total_Deaths,Daily_Confirmed,Daily_Deaths,daily_vaccinations,daily_vaccinations_per_million
12131,US,2021-05-03,32472201,577566,50560,483,2287393,6839
12132,US,2021-05-02,32421641,577083,29367,323,2418580,7232
12133,US,2021-05-01,32392274,576760,45303,394,2546144,7613
12134,US,2021-04-30,32346971,576366,57922,1038,2548207,7619
12135,US,2021-04-29,32289049,575328,58199,854,2630407,7865
...,...,...,...,...,...,...,...,...
12261,US,2020-12-24,18775557,334536,194204,2908,191001,571
12262,US,2020-12-23,18581353,331628,229618,3421,150606,450
12263,US,2020-12-22,18351735,328207,198011,3399,127432,381
12264,US,2020-12-21,18153724,324808,199049,1919,57909,173


In [56]:
gdp_df=pd.read_csv("gdppc2019.csv")
gdp_df_rename=gdp_df.rename(columns={'country':'Location'})
gdp_df_rename=gdp_df_rename[['Location','gdppc']]
gdp_df_rename.loc[gdp_df_rename.Location=='Venezuela, RB']

Unnamed: 0,Location,gdppc
257,"Venezuela, RB",


In [42]:
merged_2021_GDP=pd.merge(gdp_df_rename, merged_df_2021, on=["Location"], how="right")
#merged_df=pd.merge(covid_csv,apr_vaccines_df, on=["Date","Location"], how="inner")

In [52]:
merged_2021_GDP

Unnamed: 0,Location,gdppc,Date,Total_Confirmed,Total_Deaths,Daily_Confirmed,Daily_Deaths,daily_vaccinations,daily_vaccinations_per_million
0,Afghanistan,507.103432,2021-04-22,58312,2561,98,4,8000,206
1,Afghanistan,507.103432,2021-04-21,58214,2557,177,8,8000,206
2,Afghanistan,507.103432,2021-04-20,58037,2549,139,3,8000,206
3,Afghanistan,507.103432,2021-04-19,57898,2546,105,7,8000,206
4,Afghanistan,507.103432,2021-04-18,57793,2539,72,0,8000,206
...,...,...,...,...,...,...,...,...,...
12920,Venezuela,,2021-02-21,136068,1316,465,4,31,1
12921,Venezuela,,2021-02-20,135603,1312,489,4,31,1
12922,Venezuela,,2021-02-19,135114,1308,333,5,31,1
12923,Venezuela,,2021-02-18,134781,1303,462,6,31,1


In [58]:
len(gdp_df_rename.Location.unique())

264