In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns # for plotting
from scipy.stats import zscore # for standardizing data

In [26]:
# Function to fetch data about the cases and deaths of a country
def countryData(country):
    df = pd.read_csv('data.csv')
    df_country = df[df.iloc[:,0] == country]
    displayColumns = ['Entity', 'Date', 'Cases', 'Deaths']
    df_display = df_country[displayColumns]
    df_display['Daily Cases'] = abs(df_display['Cases'].diff())
    df_display['Daily Deaths'] = abs(df_display['Deaths'].diff())
    df_display['Date'] = pd.to_datetime(df_display['Date'])
    return df_display

In [27]:
# Function to plot the data of a country
def plotData(df):
    fig, ax1 = plt.subplots()
    
    ax1.plot(df['Date'], df['Daily Cases'], color='blue')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Daily Cases', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')
    
    ax2 = ax1.twinx()
    ax2.plot(df['Date'], df['Daily Deaths'], color='red')
    ax2.set_ylabel('Daily Deaths', color='red')
    ax2.tick_params(axis='y', labelcolor='red')
    
    plt.title('Daily Cases and Deaths in ' + df['Entity'].iloc[0])
    plt.show()

In [28]:
# Function to get the cases and deaths of all countries per continent
def countriesPerContinentData():
    df = pd.read_csv('data.csv')
    deaths = df.groupby(['Entity', 'Continent'])['Deaths'].max()
    cases = df.groupby(['Entity', 'Continent'])['Cases'].max()
    data = np.empty((len(cases), 4), dtype=object) # 4 columns: country, continent, cases, deaths
    for i, ((country, continent), case) in enumerate(cases.iteritems()):
        data[i][0] = country
        data[i][1] = continent
        data[i][2] = case
    for i, ((country, continent), death) in enumerate(deaths.iteritems()):
        data[i][3] = death
    
    data = data[np.argsort(data[:, 1])]
    # print(data)
    return data

In [29]:
# Function to create a histogram of the cases and deaths of all countries per continent
def plotCountriesPerContinentData(data, continent):
    cont = data[data[:, 1] == continent] # Get the data of the continent
    
    # Create a histogram for each continent
    print('Histogram of cases per country in ' + continent)
    countries = cont[:, 0]
    cases = cont[:, 2].astype(int)
    plt.gca().set_xticklabels(countries, rotation=90)
    plt.bar(countries, cases)
    plt.title('Cases per country in ' + cont[0][1])
    plt.xlabel('Country')
    plt.ylabel('Cases')
    plt.show()
        
    print('Histogram of deaths per country in ' + continent)
    countries = cont[:, 0]
    deaths = cont[:, 3].astype(int)
    plt.gca().set_xticklabels(countries, rotation=90)
    plt.bar(countries, deaths)
    plt.title('Deaths per country in ' + cont[0][1])
    plt.xlabel('Country')
    plt.ylabel('Deaths')
    plt.show()

In [30]:
def correlationData():
    df = pd.read_csv("data.csv")
    corr = df.corr() # Calculate the correlation between the columns
    plt.figure(figsize=(12, 8))
    # cmap = sns.cubehelix_palette(rot=-.2)
    sns.heatmap(corr, annot=True, cmap=plt.cm.Reds)
    plt.show()

In [31]:
def main():
    # Cases and deaths of Greece
    greece = countryData('Greece')
    plotData(greece)
    
    # Cases and deaths of Italy
    italy = countryData('Italy')
    plotData(italy)
    
    # Cases and deaths of Europe
    continentData = countriesPerContinentData()
    plotCountriesPerContinentData(continentData, 'Europe')
    
    # Cases and deaths of North America
    plotCountriesPerContinentData(continentData, 'North America')
    
    # Correlation between the columns
    correlationData()

In [None]:
main()