In [1]:
import pandas as pd
import csv
%load_ext blackcellmagic

In [2]:
us_confirmed = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
us_deaths = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"
global_cases = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
global_deaths = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
global_recovered = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"

In [3]:
us_confirmed_df = pd.read_csv(us_confirmed)
us_deaths_df = pd.read_csv(us_deaths)
global_cases_df = pd.read_csv(global_cases)
global_deaths_df = pd.read_csv(global_deaths)
global_recovered_df = pd.read_csv(global_recovered)

## US DATA

In [4]:
states_data = pd.DataFrame(
    us_deaths_df[
        ["UID", "iso2", "Admin2", "Province_State", "Population", "Lat", "Long_"]
    ]
).rename(columns={"iso2": "Country_Abbrv", "Admin2": "County", "Long_": "Long"})
states_data

Unnamed: 0,UID,Country_Abbrv,County,Province_State,Population,Lat,Long
0,16,AS,,American Samoa,55641,-14.2710,-170.1320
1,316,GU,,Guam,164229,13.4443,144.7937
2,580,MP,,Northern Mariana Islands,55144,15.0979,145.6739
3,630,PR,,Puerto Rico,2933408,18.2208,-66.5901
4,850,VI,,Virgin Islands,107268,18.3358,-64.8963
...,...,...,...,...,...,...,...
3248,84090053,US,Unassigned,Washington,0,0.0000,0.0000
3249,84090054,US,Unassigned,West Virginia,0,0.0000,0.0000
3250,84090055,US,Unassigned,Wisconsin,0,0.0000,0.0000
3251,84090056,US,Unassigned,Wyoming,0,0.0000,0.0000


In [5]:
us_cases_clean = pd.melt(
    us_confirmed_df.drop(
        columns=[
            "iso2",
            "iso3",
            "code3",
            "FIPS",
            "Admin2",
            "Province_State",
            "Country_Region",
            "Lat",
            "Long_",
            "Combined_Key",
        ]
    ),
    id_vars="UID",
    var_name="Date",
).rename(columns={"value": "Confirmed_Cases"})

us_cases_clean

Unnamed: 0,UID,Date,Confirmed_Cases
0,16,1/22/20,0
1,316,1/22/20,0
2,580,1/22/20,0
3,630,1/22/20,0
4,850,1/22/20,0
...,...,...,...
243970,84090053,4/5/20,533
243971,84090054,4/5/20,0
243972,84090055,4/5/20,0
243973,84090056,4/5/20,0


In [6]:
us_deaths_clean = pd.melt(
    us_deaths_df.drop(
        columns=[
            "iso2",
            "iso3",
            "code3",
            "FIPS",
            "Admin2",
            "Province_State",
            "Country_Region",
            "Lat",
            "Long_",
            "Combined_Key",
            "Population",
        ]
    ),
    id_vars="UID",
    var_name="Date",
).rename(columns={"value": "Deaths"})

us_deaths_clean

Unnamed: 0,UID,Date,Deaths
0,16,1/22/20,0
1,316,1/22/20,0
2,580,1/22/20,0
3,630,1/22/20,0
4,850,1/22/20,0
...,...,...,...
243970,84090053,4/5/20,0
243971,84090054,4/5/20,0
243972,84090055,4/5/20,0
243973,84090056,4/5/20,0


In [7]:
us_covid_data = pd.merge(
    us_cases_clean, us_deaths_clean, how="left", on=["UID", "Date"]
)
us_covid_data

Unnamed: 0,UID,Date,Confirmed_Cases,Deaths
0,16,1/22/20,0,0
1,316,1/22/20,0,0
2,580,1/22/20,0,0
3,630,1/22/20,0,0
4,850,1/22/20,0,0
...,...,...,...,...
243970,84090053,4/5/20,533,0
243971,84090054,4/5/20,0,0
243972,84090055,4/5/20,0,0
243973,84090056,4/5/20,0,0


## GLOBAL DATA

In [8]:
global_cases_clean = pd.melt(
    global_cases_df,
    id_vars=["Province/State", "Country/Region", "Lat", "Long"],
    var_name="Date",
).rename(columns={"value": "Confirmed_Cases"})

global_cases_clean

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed_Cases
0,,Afghanistan,33.000000,65.000000,1/22/20,0
1,,Albania,41.153300,20.168300,1/22/20,0
2,,Algeria,28.033900,1.659600,1/22/20,0
3,,Andorra,42.506300,1.521800,1/22/20,0
4,,Angola,-11.202700,17.873900,1/22/20,0
...,...,...,...,...,...,...
19645,,Malawi,-13.254308,34.301525,4/5/20,4
19646,Falkland Islands (Islas Malvinas),United Kingdom,-51.796300,-59.523600,4/5/20,2
19647,Saint Pierre and Miquelon,France,46.885200,-56.315900,4/5/20,1
19648,,South Sudan,6.877000,31.307000,4/5/20,1


In [9]:
global_deaths_clean = pd.melt(
    global_deaths_df,
    id_vars=["Province/State", "Country/Region", "Lat", "Long"],
    var_name="Date",
).rename(columns={"value": "Deaths"})

global_deaths_clean

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Deaths
0,,Afghanistan,33.000000,65.000000,1/22/20,0
1,,Albania,41.153300,20.168300,1/22/20,0
2,,Algeria,28.033900,1.659600,1/22/20,0
3,,Andorra,42.506300,1.521800,1/22/20,0
4,,Angola,-11.202700,17.873900,1/22/20,0
...,...,...,...,...,...,...
19645,,Malawi,-13.254308,34.301525,4/5/20,0
19646,Falkland Islands (Islas Malvinas),United Kingdom,-51.796300,-59.523600,4/5/20,0
19647,Saint Pierre and Miquelon,France,46.885200,-56.315900,4/5/20,0
19648,,South Sudan,6.877000,31.307000,4/5/20,0


In [10]:
global_recovered_clean = pd.melt(
    global_recovered_df,
    id_vars=["Province/State", "Country/Region", "Lat", "Long"],
    var_name="Date",
).rename(columns={"value": "Recovered"})

global_recovered_clean

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Recovered
0,,Afghanistan,33.000000,65.000000,1/22/20,0
1,,Albania,41.153300,20.168300,1/22/20,0
2,,Algeria,28.033900,1.659600,1/22/20,0
3,,Andorra,42.506300,1.521800,1/22/20,0
4,,Angola,-11.202700,17.873900,1/22/20,0
...,...,...,...,...,...,...
18595,,Malawi,-13.254308,34.301525,4/5/20,0
18596,Falkland Islands (Islas Malvinas),United Kingdom,-51.796300,-59.523600,4/5/20,0
18597,Saint Pierre and Miquelon,France,46.885200,-56.315900,4/5/20,0
18598,,South Sudan,6.877000,31.307000,4/5/20,0


In [11]:
global_covid_data = pd.merge(
    pd.merge(
        global_cases_clean,
        global_deaths_clean,
        how="left",
        on=["Province/State", "Country/Region", "Lat", "Long", "Date"],
    ),
    global_recovered_clean,
    how="left",
    on=["Province/State", "Country/Region", "Lat", "Long", "Date"],
)
global_covid_data

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed_Cases,Deaths,Recovered
0,,Afghanistan,33.000000,65.000000,1/22/20,0,0,0.0
1,,Albania,41.153300,20.168300,1/22/20,0,0,0.0
2,,Algeria,28.033900,1.659600,1/22/20,0,0,0.0
3,,Andorra,42.506300,1.521800,1/22/20,0,0,0.0
4,,Angola,-11.202700,17.873900,1/22/20,0,0,0.0
...,...,...,...,...,...,...,...,...
19645,,Malawi,-13.254308,34.301525,4/5/20,4,0,0.0
19646,Falkland Islands (Islas Malvinas),United Kingdom,-51.796300,-59.523600,4/5/20,2,0,0.0
19647,Saint Pierre and Miquelon,France,46.885200,-56.315900,4/5/20,1,0,0.0
19648,,South Sudan,6.877000,31.307000,4/5/20,1,0,0.0
