In [74]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [58]:
data = 'WHO-COVID-19-global-data.csv'

In [47]:
def get_country_df(country):
    df = pd.read_csv(data)
    df.columns = [i.strip() for i in df.columns]
    df.rename(columns={'Date_reported' : 'Date'}, inplace = True)
    df['Date'] = pd.to_datetime(df['Date'])
    country_groups = df.groupby('Country')
    country_df = country_groups.get_group(country)
    return country_df

In [366]:
def get_cases(country, how = 'New_cases', start_date = None, end_date = None):
    country_df = get_country_df(country)
    country_df.set_index('Date', inplace = True)

    if start_date is None:
        filt = country_df[how] >= 1

        country_df = country_df[filt]
    
        return country_df.loc[:, [how]]
    
    else:
        return country_df.loc[start_date:end_date, [how]]

In [365]:
def get_deaths(country, how = 'New_deaths', start_date = None, end_date = None):
    country_df = get_country_df(country)
    country_df.set_index('Date', inplace = True)

    if start_date is None:

        filt = country_df[how] >= 1

        country_df = country_df[filt]

        return country_df.loc[:, [how]]

    else:
        return country_df.loc[start_date:end_date, [how]]

In [372]:
def get_country_dictionary(countries, plot, plot_type = 'line', start_date = None, end_date = None):
    country_dictionary = {}
    
    for i in countries:
        if plot.lower() == "new cases":
            if plot_type != 'bar':
                country_dictionary[i] = get_cases(i)
            else:
                country_dictionary[i] = get_cases(i, start_date=start_date, end_date=end_date)
        
        elif plot.lower() == 'total cases':
            if plot_type != 'bar':
                country_dictionary[i] = get_cases(i, 'Cumulative_cases')
            else:
                country_dictionary[i] = get_cases(i, 'Cumulative_cases', start_date=start_date, end_date=end_date)
            
        elif plot.lower() == 'new deaths':
            if plot_type != 'bar':
                country_dictionary[i] = get_deaths(i)
            
            else:
                country_dictionary[i] = get_deaths(i, start_date=start_date, end_date=end_date)
            
        elif plot.lower() == 'total deaths':
            if plot_type != 'bar':
                country_dictionary[i] = get_deaths(i, 'Cumulative_deaths') 
            else:
                country_dictionary[i] = get_deaths(i, 'Cumulative_deaths', start_date=start_date, end_date=end_date)
                
    if plot_type == 'bar':    
        country_dictionary = {
            key: country_dictionary[key] 
            for key in sorted(country_dictionary, key = lambda k: len(country_dictionary[k]))
        }
        
    return country_dictionary

In [374]:
def plot_line(countries, plot = 'new cases', plot_type = 'line'):
    
    if not isinstance(countries, list) and not isinstance(countries, list):
        raise TypeError("Please pass in a list/tuple of Country/Countries")
        return
    
    # country comes in as a list
     
    %matplotlib notebook    
    plt.style.use('seaborn-colorblind')
    plt.figure(figsize=(10,5))
    plt.xticks(rotation = 45)
    
    plt.ylabel(plot.upper())
    plt.xlabel('Month')
    
    words = [(i[0].upper() + i[1:]) for i in plot.strip().split(' ')]
    to_add = '' if plot.lower().strip().split(' ')[0] != 'new' else '/Day'
    plt.title(words[0] + ' COVID-19 ' + words[1] + to_add) 
   
    
    country_dictionary = get_country_dictionary(countries, plot, plot_type)
    
    
    keys = list(country_dictionary.keys())
    
    print(country_dictionary.keys())
    
    i = len(countries) // 2 - 2
    width = 0.75
    x_labels = None
    
    
    if plot_type == 'bar':
        longest = country_dictionary[keys[0]]
        
        start_date, end_date = (longest.index[0], longest.index[-1])
        
        country_dictionary = get_country_dictionary(countries, plot, plot_type, start_date, end_date)
        
        x_labels = np.arange(len(country_dictionary[keys[0]]))
        
        print([len(country_dictionary[i]) for i in keys])
    
    
    for country in countries:   
        dates = country_dictionary[country].index

        if plot.lower() == "new cases":
            y = country_dictionary[country]['New_cases']
        
        elif plot.lower() == 'total cases':
            y = country_dictionary[country]['Cumulative_cases']
            
        elif plot.lower() == 'new deaths':
            y = country_dictionary[country]['New_deaths']
            
        elif plot.lower() == 'total deaths':
            y = country_dictionary[country]['Cumulative_deaths']
        
        if plot_type == 'line':
            plt.plot(dates, y, label = country, alpha = 0.75)
        
        else:
            plt.bar(x_labels  + i * width , y, label = country, width = width, alpha = 0.75)
            
            i += 1
        
    plt.legend()


In [375]:
# plot_line(['India', 'New Zealand', 'United States of America'], 'total deaths')
plot_line(['India', 'Brazil', 'United States of America'], plot_type = 'bar')

<IPython.core.display.Javascript object>

dict_keys(['Brazil', 'India', 'United States of America'])
[210, 210, 210]


In [187]:
india = get_country_df('India')

filt = india['New_cases'] >= 1

ifilt = india[filt]

print(ifilt.loc[ifilt.index[0], 'Date'] - ifilt.loc[ifilt.index[-1], 'Date'])


-236 days +00:00:00


In [282]:
d = {"one": [(1,3),(1,4)], "two": [(1,2),(1,2),(1,3)], "three": [(1,1)]}

d = {key:d[key] for key in sorted(d, key = lambda k: len(d[k])) }

# d = sorted(d)
print(d.keys())
d

dict_keys(['three', 'one', 'two'])


{'three': [(1, 1)], 'one': [(1, 3), (1, 4)], 'two': [(1, 2), (1, 2), (1, 3)]}

In [354]:
india = get_country_df("India")
s, e = (india['Date'].iloc[0],india['Date'].iloc[-1])
india.set_index('Date', inplace = True)
india.loc[s:e, ['Date','New_cases']]

KeyError: 'Passing list-likes to .loc or [] with any missing labels is no longer supported, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike'