In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import pycountry
import requests

pd.options.display.max_rows = 200
NCOV19_API = "https://covid19-us-api.herokuapp.com/"

## Getting TimeSeries JHU COVID19 data

In [2]:
metric_type = "confirmed"
TIME_URL = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{metric_type}_global.csv"

df = pd.read_csv(TIME_URL)
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,22,22,22,24,24,40,40,74,84,94
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,55,59,64,70,76,89,104,123,146,174
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,60,74,87,90,139,201,230,264,302,367
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,39,39,53,75,88,113,133,164,188,224
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,0,0,0,1,2,2,3,3,3,4


### Total Countries infected

In [4]:
df.describe(include=["O"])

Unnamed: 0,Province/State,Country/Region
count,76,248
unique,76,175
top,Newfoundland and Labrador,China
freq,1,33


There are total of 175 countries, let's veryify it.

In [8]:
countries = list(set(df["Country/Region"].to_list()))

print(f"Total Countries, {len(countries)}")

Total Countries, 175


## Convert Countries to `alpha-2` code to standardize

In [None]:
def create_country_map(countries):
    for country_dict

## Make functions to parse `df` and get country stats by `metric_type` - `confirmed` vs. `deaths`

In [14]:
def create_country_map(countries: list):
    country_dict = {}
    for country in countries:
        try:
            country_dict[pycountry.countries.search_fuzzy(country)[0].alpha_2] = country
        except:
            pass
    return country_dict

country_dict = create_country_map(countries)
country_dict

{'TR': 'Turkey',
 'PY': 'Paraguay',
 'GW': 'Guinea-Bissau',
 'CZ': 'Czechia',
 'KG': 'Kyrgyzstan',
 'EG': 'Egypt',
 'SZ': 'Eswatini',
 'GQ': 'Equatorial Guinea',
 'PL': 'Poland',
 'BH': 'Bahrain',
 'BZ': 'Belize',
 'PE': 'Peru',
 'CA': 'Canada',
 'BT': 'Bhutan',
 'NI': 'Nicaragua',
 'PK': 'Pakistan',
 'HN': 'Honduras',
 'PG': 'Papua New Guinea',
 'LC': 'Saint Lucia',
 'UZ': 'Uzbekistan',
 'US': 'US',
 'BS': 'Bahamas',
 'ER': 'Eritrea',
 'ET': 'Ethiopia',
 'MN': 'Mongolia',
 'NG': 'Nigeria',
 'SK': 'Slovakia',
 'ZM': 'Zambia',
 'GB': 'United Kingdom',
 'RS': 'Kosovo',
 'EC': 'Ecuador',
 'JO': 'Jordan',
 'ES': 'Spain',
 'HR': 'Croatia',
 'AO': 'Angola',
 'MT': 'Malta',
 'DM': 'Dominica',
 'LT': 'Lithuania',
 'LR': 'Liberia',
 'BG': 'Bulgaria',
 'JP': 'Japan',
 'LY': 'Libya',
 'KW': 'Kuwait',
 'NP': 'Nepal',
 'UY': 'Uruguay',
 'KN': 'Saint Kitts and Nevis',
 'AE': 'United Arab Emirates',
 'GD': 'Grenada',
 'GH': 'Ghana',
 'IL': 'Israel',
 'SI': 'Slovenia',
 'MX': 'Mexico',
 'AG': 'Antigua

In [49]:
def parse_df(metric_type):
    if metric_type.startswith("confirmed"):
        metric_type = "confirmed"
    elif metric_type.startswith("death"):
        metric_type = "deaths"
    else:
        raise ValueError(f"{metric_type} metric type not supported")
        
    url = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{metric_type}_global.csv" 
    df = pd.read_csv(url)
    return df


def get_country_stats(country_alpha, metric_type):
    
    country_alpha = country_alpha.upper()
    metric_type = metric_type.lower()
    
    df = parse_df(metric_type=metric_type)
    
    if country_alpha not in country_dict:
        raise ValueError(f"{country_alpha} not found in our dictionary.")
    country = country_dict[country_alpha]

    df = df[df["Country/Region"] == country]
    df = df.drop(columns=["Lat", "Long", "Country/Region", "Province/State"])
    df = df.sum(axis=0).to_frame().reset_index()
    df = df.rename(columns={0: metric_type.title()})
    df = df.reset_index(drop=True)
    df = df.rename(columns={"index": "Date"})
    return df



def get_combined_stats(country_alpha):
    df1 = get_country_stats(country_alpha, metric_type="confirmed")
    df2 = get_country_stats(country_alpha, metric_type="deaths")
    
    merge = pd.merge(df1, df2, on="Date")
    
    return merge


df = get_combined_stats("US")

In [50]:
fig = px.line(df, x="Date", y="Confirmed")
# fig.update_traces(line_color="#FEC400")
# fig.update_layout(
#         template="plotly_dark",
#         title="U.S. Confirmed Cases",
#         xaxis_title="Confirmed Cases",
#         yaxis_title=None,
#         paper_bgcolor="rgba(0,0,0,0)",
#         plot_bgcolor="rgba(0,0,0,0)",
#         xaxis_showgrid=False,
#         yaxis_showgrid=False,
#     )
fig.show()

In [52]:
fig = px.line(df, x="Date", y="Deaths")
fig.show()