### Note on project

In [1]:
#necessary imports for data collection and visualization
import requests,json
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.io as pio

#import for country code
import country_converter as coco
import pycountry

#additional imports
import warnings
warnings.filterwarnings("ignore")
init_notebook_mode(connected=True)
from IPython.display import HTML


## Obtain Data

In [None]:
#Get daily number of new reported cases of covid19 by country worldwide
response = json.loads(requests.get("https://opendata.ecdc.europa.eu/covid19/casedistribution/json/").text)

In [None]:
print(type(response))

In [None]:
print(response.keys())

In [None]:
print(len(response['records']))

In [None]:
#load dataframe
data = pd.DataFrame(response['records'])

In [None]:
#check the first 10 rows
data.head(10)

## Data Cleaning

In [None]:
#change to datetime format
data['dateRep'] = pd.to_datetime(data['dateRep'], infer_datetime_format=True)

In [None]:
#check column type
data['dateRep'].dtype

In [None]:
#check for unique month column
data['dateRep'].dt.month.unique()

In [None]:
#change to string for a different time format
data['dateRep'] = data['dateRep'].dt.strftime('%d-%b-%Y')

In [None]:
#current date
data['dateRep'].head()

In [None]:
#Get shape of data
data.shape

In [None]:
#quick info on the data 
data.info()

In [None]:
#numeric data description: cases, death, country_population
data.describe().transpose()

In [None]:
#checking for number of uniqe territories, and country code
data[['countriesAndTerritories', 'geoId', 'countryterritoryCode', 'continentExp']].nunique()

## Exploratory Data analysis

***

### Global reported number of cases and deaths till date

In [None]:
#Global cases and death, with mortality rate of covid 19
global_df = pd.DataFrame(data[['cases', 'deaths']].sum()).transpose()
global_df['Mortality rate (per 100)'] = np.round(100*global_df["deaths"]/global_df["cases"],2)
global_df = global_df.rename({0: 'Global'})
global_df.style.background_gradient(cmap='RdYlBu',axis=1).format("{:.2f}").format("{:.0f}",subset=["cases","deaths"])

### Covid 19 **cases** in each continent

In [None]:
cases_and_death_in_each_continent = data.groupby('continentExp')[['cases']].sum() #Get total number of cases per continent
ax = cases_and_death_in_each_continent[['cases']].plot(kind='bar', figsize = (10, 8), color = 'darkblue') #bar plot
for p in ax.patches:
    ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005)) #for annotating the bars
plt.xlabel('Continent', fontsize = 16)
plt.ylabel('Total number of people with confirmed cases/Death (in million)', fontsize = 12)

### Covid 19 **death** in each continent

In [None]:
death_and_cases_per_continent = data.groupby('continentExp')['cases', 'deaths'].sum()

In [None]:
death_and_cases_per_continent['Mortality Rate (per 100)'] = np.round(100 * death_and_cases_per_continent['deaths']/death_and_cases_per_continent['cases'], 2) #calculating mortality rate 

In [None]:
death_and_cases_per_continent.sort_values('cases', ascending = False).style.background_gradient(cmap='Blues', subset=['cases'])\
                        .background_gradient(cmap='Reds',subset=["deaths"])\
                        .background_gradient(cmap='YlOrBr',subset=["Mortality Rate (per 100)"])\


In [None]:
#check for number of de
plt.figure(figsize=(10, 8))
ax2 = data.groupby('continentExp')['deaths'].sum().plot(kind='bar', color = 'red') #grouby continent, calculate total deaths and plot bar
for p2 in ax2.patches:
    ax2.annotate(str(p2.get_height()), (p2.get_x() * 1.005, p2.get_height() * 1.005))
plt.xlabel('Continent', fontsize = 16)
plt.ylabel('Total death', fontsize = 16)
plt.title('Total deaths in each continent', fontsize=20)

##### The America continent has the total number of death

### Trend of covid19 death trend in each continent

In [None]:
#convert initial columns of date, year, and month to datetime object
data['day'] = data['dateRep'].apply(lambda x: int(x.split('-')[0]))
data['month'] = data['dateRep'].apply(lambda x: x.split('-')[1])
data['year'] = data['dateRep'].apply(lambda x: int(x.split('-')[2]))

In [None]:
#check data type
data.dtypes

In [None]:
#pivot table to get the number of deaths in each month relative to each continent
deaths_per_month_per_continent = pd.pivot_table(data = data, values = 'deaths', index = 'month', columns = 'continentExp', fill_value=0, aggfunc = 'sum')

In [None]:
#rearrange index row
deaths_per_month_per_continent = deaths_per_month_per_continent.reindex(["Jan", "Feb", "Mar", 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep'])

In [None]:
deaths_per_month_per_continent = deaths_per_month_per_continent[deaths_per_month_per_continent.sum().sort_values(ascending = False).index]

In [None]:
deaths_per_month_per_continent.style.background_gradient(cmap='Set1').format("{:.3f}")

In [None]:
#plotting the number of death in each month relative per continent
for col in deaths_per_month_per_continent.columns: 
    fig, ax = plt.subplots(figsize = (10, 8))
    ax.plot(deaths_per_month_per_continent.index, deaths_per_month_per_continent[col], marker = 'v', color = 'r', )
    ax.set_xlabel('Month', fontsize = 14)
    ax.set_ylabel('Number of Deaths', fontsize = 14)
    ax.set_title('Covid 19 death in '+str(col), fontsize = 18)

### Country wide reported cases

In [None]:
#get number of cases per country
cases_and_deaths_in_each_country = data.groupby('countriesAndTerritories')['cases', 'deaths'].sum()

In [None]:
#calculate mortality rate for each country
cases_and_deaths_in_each_country['Mortality Rate (per 100)'] = np.round(100 * cases_and_deaths_in_each_country['deaths']/cases_and_deaths_in_each_country['cases'], 2)

In [None]:
cases_and_deaths_in_each_country.sort_values('cases', ascending = False).style.background_gradient(cmap='Blues', subset=['cases'])\
                        .background_gradient(cmap='Reds',subset=["deaths"])\
                        .background_gradient(cmap='YlOrBr',subset=["Mortality Rate (per 100)"])\


### Top 15 countries with confirmed cases and confirmed death

In [None]:
#number of confirmed cases per country
f = plt.figure(figsize=(10,8))
f.add_subplot(111)
plt.axes(axisbelow=True)
#plot first 15 countries based on the number of cases
plt.barh(cases_and_deaths_in_each_country.sort_values('cases', ascending=False).index[:15], cases_and_deaths_in_each_country.sort_values('deaths', ascending=False)['deaths'][:15], color="darkblue") 
plt.gca().invert_yaxis()
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Confirmed Cases (in million)",fontsize=18)
plt.title("Top 15 Countries (Confirmed Cases)",fontsize=20)
plt.grid(alpha=0.3)

In [None]:
f = plt.figure(figsize=(10,8))
f.add_subplot(111)
plt.axes(axisbelow=True)
#plot first 15 countries based on the number of deaths
plt.barh(cases_and_deaths_in_each_country.sort_values('deaths', ascending=False).index[:15], cases_and_deaths_in_each_country.sort_values('deaths', ascending=False)['deaths'][:15], color="red")
plt.gca().invert_yaxis()
plt.tick_params(size=5,labelsize = 13)
plt.xlabel("Confirmed death (in thousand)",fontsize=18)
plt.title("Top 15 Countries (Confirmed deaths)",fontsize=20)
plt.grid(alpha=0.3)

### Correlation analysis for countries

In [None]:
cases_and_deaths_in_each_country.loc[:,:].corr().style.background_gradient(cmap='RdBu_r').format("{:.3f}")

### Correlation analysis for continent

In [None]:
death_and_cases_per_continent.corr().style.background_gradient(cmap='RdBu_r').format("{:.3f}")

### Confirmed Covid19 cases and deaths in each country (log scale

In [None]:
#reset index first and rename the countriesandTerritories column to Country
cases_and_deaths_in_each_country = cases_and_deaths_in_each_country.reset_index().rename(columns= {'countriesAndTerritories' : 'Country'})

In [None]:
#Create country code from column
cases_and_deaths_in_each_country['Country_code'] = cases_and_deaths_in_each_country['Country'].apply(lambda x: x[:3].upper())

In [None]:
cc = coco.CountryConverter()

In [None]:
#Generate country code
cases_and_deaths_in_each_country['Country_code'] = cases_and_deaths_in_each_country['Country'].apply(lambda x: cc.convert(names = x, to = 'ISO3'))

In [None]:
cases_and_deaths_in_each_country

In [None]:
fig1 = px.choropleth(cases_and_deaths_in_each_country, locations=cases_and_deaths_in_each_country['Country_code'],
                    color=np.log10(cases_and_deaths_in_each_country["cases"]), # lifeExp is a column of gapminder
                    hover_name="Country", 
                    hover_data = ['cases'],
                    color_continuous_scale='Blues') # column to add to hover information
fig1.update_geos(fitbounds = 'locations', visible=False)
fig1.update_layout(title_text = 'Confirmed cases of covid 19 (log scale)')
fig1.update_coloraxes(colorbar_title = 'Confirmed cases (log scale)')


In [None]:
fig2 = px.choropleth(cases_and_deaths_in_each_country, locations=cases_and_deaths_in_each_country['Country_code'],
                    color=np.log10(cases_and_deaths_in_each_country["deaths"]), # lifeExp is a column of gapminder
                    hover_name="Country", 
                    hover_data = ['deaths'],
                    color_continuous_scale='Reds') # column to add to hover information
fig2.update_geos(fitbounds="locations", visible=False)
fig2.update_layout(title_text = 'Confirmed covid 19 deaths (log scale)')
fig2.update_coloraxes(colorbar_title = 'Confirmed deaths (log scale)')