# Plotting Country Data

## preparation file - not final product. See Demo plot 1 and demo plot 2 notebooks

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import datetime as dt
import requests
import matplotlib
import matplotlib.pyplot as pp
import mplcursors
import ipywidgets as widgets
from ipywidgets import IntRangeSlider, SelectMultiple
from IPython.display import display


%matplotlib inline

In [2]:
#load country data

data = pd.read_csv("Cleaned_data/final_country_data_by_quarter_elec_mob_strin.csv")
data.columns

Index(['CountryName', 'Year', 'Month', 'Quarter', 'max_StIn', 'min_StIn',
       'max_StInDis', 'min_StInDis', 'ConfirmedCases', 'ConfirmedDeaths',
       'avg_residential_mobility_from_baseline',
       'avg_workplace_mobility_from_baseline', 'TWh', 'Percent_Month_2019',
       'Date', 'fake day', 'Date_Est', 'Deaths_by_100,000',
       'Cases_by_100,000'],
      dtype='object')

In [3]:
#rename the Deaths by 100,000 and Cases by 100,000 column because I was running into some trouble for some reason

data.columns = ['CountryName', 'Year', 'Month', 'Quarter', 'max_StIn', 'min_StIn',
       'max_StInDis', 'min_StInDis', 'ConfirmedCases', 'ConfirmedDeaths',
       'avg_residential_mobility_from_baseline',
       'avg_workplace_mobility_from_baseline', 'TWh', 'Percent_Month_2019',
       'Date', 'fake day', 'Date_Est', 'Deaths_by_100k',
       'Cases_by_100k']
data

Unnamed: 0,CountryName,Year,Month,Quarter,max_StIn,min_StIn,max_StInDis,min_StInDis,ConfirmedCases,ConfirmedDeaths,avg_residential_mobility_from_baseline,avg_workplace_mobility_from_baseline,TWh,Percent_Month_2019,Date,fake day,Date_Est,Deaths_by_100k,Cases_by_100k
0,Austria,2020,April,1,85.19,85.19,85.19,85.19,12640.0,243.0,21.250000,-55.625000,4.556000,86.847122,2020 April 4,4,2020-04-04,2.737411,142.390447
1,Austria,2020,April,2,85.19,81.48,85.19,81.48,14370.0,393.0,20.500000,-57.125000,4.556000,86.847122,2020 April 11,11,2020-04-11,4.427171,161.879013
2,Austria,2020,April,3,81.48,78.70,81.48,78.70,14924.0,494.0,16.714286,-46.142857,4.556000,86.847122,2020 April 18,18,2020-04-18,5.564943,168.119860
3,Austria,2020,April,4,78.70,78.70,78.70,78.70,15364.0,580.0,14.571429,-42.428571,4.556000,86.847122,2020 April 25,25,2020-04-25,6.533739,173.076490
4,Austria,2020,August,1,31.48,31.48,31.48,31.48,21819.0,720.0,0.750000,-27.500000,,,2020 August 4,4,2020-08-04,8.110848,245.792497
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3592,United Kingdom,2019,August,4,,,,,,,,,22.742809,,2019 August 25,25,2019-08-25,,
3593,United Kingdom,2019,September,4,,,,,,,,,22.870830,,2019 September 25,25,2019-09-25,,
3594,United Kingdom,2019,October,4,,,,,,,,,25.767437,,2019 October 25,25,2019-10-25,,
3595,United Kingdom,2019,November,4,,,,,,,,,27.566669,,2019 November 25,25,2019-11-25,,


## Not Using - Plotting by country and feature

This probably looks too clunky. Could modify the list in plot_all_countries function though to only include a handfull of countries that we find most interesting. 

In [4]:
#input country name and feature (e.g. max_StInDis)

def plotcountry(country, feature):
    df = data[data.CountryName == country].sort_values(by = ['Date_Est'], ascending = True)
    pp.plot(df['Date_Est'], df[feature], label=country)
    

In [5]:
def plot_all_countries(feature):
    #create list of all relevant countries
    Europe = ['Belgium', 'Bulgaria', 'Czechia', 'Denmark', 'Germany', 'Estonia', 'Ireland', 'Greece', 'Spain', 'France', 
              'Croatia', 'Italy', 'Cyprus', 'Latvia', 'Lithuania', 'Luxembourg', 'Hungary', 'Netherlands', 'Austria', 
              'Poland', 'Portugal', 'Romania','Slovenia', 'Slovak Republic', 'Finland', 'Sweden', 'United Kingdom']
    
    pp.figure(figsize=(12,2.5))
    
    for country in Europe:
        plotcountry(country, feature)
        
    pp.legend()
        

# Potential Plot for Final Presentation

The plot needs some visual improvements (add things like title, change axis label sizes, etc). The creation of the plot is defined in plot_2features, and then implemented in the cell below. Once we figure out a color scheme we can change it so that imputing feature1_color and feature2_color are no longer options and the color is automatically chosen in the two subplots.
It might also be nice to add some smoothing to make it visually look less clunky, but not absolutely necessary.

### it might be best to pre-set which features they can look at to avoid muddling the message/having it be too unclear. So country would be the only drop down option

In [6]:
def plot_features(country):
    #make sure data is sorted by the approximate date (midpoint of the week in question)
    data2 = data[data.CountryName == country].sort_values(by = ['Date_Est'], ascending = True)
    
    #make sure only the date range we want is included
    df = data2.query('Date_Est> "2020-01-01"')
    
    #make date the index so we can use matplotlib's smart date plotting 
    df.set_index(['Date_Est'], inplace = True)
   

    #create figure
    fig, ax1 = pp.subplots(figsize = (10, 7))

    #overall figure title. Use F-string so that this changes based on the country entered
    fig.suptitle(f'{country}: Changes in Stringency, Mobility, and Electricity Consumption During Covid-19', fontsize=16)


    #create first subplot, both suplots will be on the same X axis. We only have to define the X axis once
    ax1.set_xlabel('Date', size = 13)
    #label the y axis
    ax1.set_ylabel ('Electricity Usage and Stringency Index', size = 13)
    #plot the stringency index
    lns1 = ax1.plot(df['max_StInDis'], color='red', label = 'Stringency Index')
    #plot the electricity usage on the same x-y axes
    lns2 = ax1.plot(df['Percent_Month_2019'], color = 'blue', label = 'Electricity usage as a percentage of same month in previous year')
    #set y axis limits
    ax1.set_ylim(0,120)
    #autofit the x axis to the date
    fig.autofmt_xdate()
    #remove every second date label so it is less squished
    for label in ax1.xaxis.get_ticklabels()[::2]:
        label.set_visible(False)

    
    #create twin axis so we can plot mobility which has a different y-axis scale
    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    ax2.invert_yaxis() #invert the y axis since change in mobility is negative but it will be more understandable plotted this way

    #plot change in mobility and appropriate labels etc.
    color = 'green' 
    lns3 = ax2.plot(df['avg_workplace_mobility_from_baseline'], color=color, label = 'Change in Workplace Mobility From Baseline')
    ax2.set_ylabel('Change in Workplace Mobility From Baseline', size = 13)
    ax2.tick_params(axis='y', labelcolor=color)
    ax2.set_ylim(0,-100)

    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    
    # combine all three lines into one legend
    lns = lns1+lns2+lns3
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc=0)
    
    pp.show()
    
    


In [7]:
#make interactive plot that allows you to plot two features of your choice by a selected country
# Add a slider

Europe = ['Belgium', 'Bulgaria', 'Czech Republic', 'Denmark', 'Germany', 'Estonia', 'Ireland', 'Greece', 'Spain', 'France', 
              'Croatia', 'Italy', 'Cyprus', 'Latvia', 'Lithuania', 'Luxembourg', 'Hungary', 'Netherlands', 'Austria', 
              'Poland', 'Portugal', 'Romania','Slovenia', 'Slovak Republic', 'Finland', 'Sweden', 'United Kingdom']

country_select = widgets.Dropdown(options = Europe, description = 'Country')


# Hook it up to interact
from ipywidgets import interact
interact(
    plot_features,
    country = country_select,
)

interactive(children=(Dropdown(description='Country', options=('Belgium', 'Bulgaria', 'Czech Republic', 'Denma…

<function __main__.plot_features(country)>

# Pretty Plot

In [8]:
df1 = data.sort_values(by = ['Date_Est'], ascending = True)
df2 = df1[(df1['Date_Est'] > "2020-02-17") & (df1['Date_Est'] < "2020-07-01")]
df3 = df2[['Date_Est']].drop_duplicates()
date_list = df3.Date_Est.tolist()
date_list


date_slider = widgets.SelectionSlider(
    options=date_list,
    description='Date',
    disabled=False,
    continuous_update=True,
    orientation='horizontal',
    readout=True
)

In [9]:
def plotyear(date):
    df = data[data.Date_Est == date].sort_values('TWh',ascending = False)
    area = 700
    color = (-1) * df.avg_workplace_mobility_from_baseline
    
    xs = df.max_StInDis
    ys = df.Percent_Month_2019
    ls = df.CountryName
    
    df.plot.scatter(['max_StInDis'], ['Percent_Month_2019'], 
                      s=area,c = color,
                      colormap = matplotlib.cm.get_cmap('Purples_r'), vmin = 55, vmax = 100,
                     linewidths=1,
                      edgecolors= 'black',
                      sharex=False,
                     figsize = (15,8),
                   ylim = (50, 120), xlim = (0, 100))
    
    for x,y,label in zip(xs,ys,ls):

        pp.annotate(label, # this is the text
                     (x,y), # this is the point to label
                     textcoords="offset points", # how to position the text
                     xytext=(0,20), # distance from text to points (x,y)
                     ha='center') # horizontal alignment can be left, right or center
  
#     for level in [4,16,64]:
#         pp.axvline(level, linestyle = ':', color = 'k')



    pp.show()

In [10]:
from ipywidgets import interact
interact(
    plotyear,
    date = date_slider
)

interactive(children=(SelectionSlider(description='Date', options=('2020-02-18', '2020-02-25', '2020-03-04', '…

<function __main__.plotyear(date)>