In [20]:
# Gijs van Alkemade i6247237
# Introduction
# My research question is: Where in the world was the biggest drop in life expectanty since the start of the COVID pandemic? 
# Therefore I will isolate the data appropriate to my question, check what the minimum and maximum life expectancy was in the 
# past few years and portay this in different charts, in a way that you can choose a country.    

In [21]:
import ipywidgets as widgets
from ipywidgets import interact
from ipywidgets import fixed
import altair as alt

In [22]:
#Code
import pandas as pd
covid = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv", parse_dates = ['date'])

mydata = covid.loc[:, ['date', 'location', 'median_age', 'life_expectancy', 'total_cases']]
mydata.head()

Unnamed: 0,date,location,median_age,life_expectancy,total_cases
0,2020-02-24,Afghanistan,18.6,64.83,5.0
1,2020-02-25,Afghanistan,18.6,64.83,5.0
2,2020-02-26,Afghanistan,18.6,64.83,5.0
3,2020-02-27,Afghanistan,18.6,64.83,5.0
4,2020-02-28,Afghanistan,18.6,64.83,5.0


In [23]:
mydata['life_expectancy_binned'] = pd.qcut(mydata.life_expectancy, 5)
mydata['life_expectancy_binned'].unique()

[(53.279, 66.6], NaN, (76.68, 80.56], (80.56, 86.75], (73.0, 76.68], (66.6, 73.0]]
Categories (5, interval[float64, right]): [(53.279, 66.6] < (66.6, 73.0] < (73.0, 76.68] <
                                           (76.68, 80.56] < (80.56, 86.75]]

In [24]:
mydata['median_age_binned'] = pd.qcut(mydata.median_age, 5)
mydata['median_age_binned'].unique()

[(15.099, 20.3], NaN, (32.6, 41.0], (27.6, 32.6], (41.0, 48.2], (20.3, 27.6]]
Categories (5, interval[float64, right]): [(15.099, 20.3] < (20.3, 27.6] < (27.6, 32.6] <
                                           (32.6, 41.0] < (41.0, 48.2]]

In [25]:
mydata = mydata.drop(['life_expectancy_binned', 'median_age_binned'], axis = 1)
mydata.head()

Unnamed: 0,date,location,median_age,life_expectancy,total_cases
0,2020-02-24,Afghanistan,18.6,64.83,5.0
1,2020-02-25,Afghanistan,18.6,64.83,5.0
2,2020-02-26,Afghanistan,18.6,64.83,5.0
3,2020-02-27,Afghanistan,18.6,64.83,5.0
4,2020-02-28,Afghanistan,18.6,64.83,5.0


In [26]:
mydata.isnull().sum()

date                   0
location               0
median_age         27311
life_expectancy    10666
total_cases         2875
dtype: int64

In [27]:
mydata['life_expectancy'] = mydata['life_expectancy'].fillna('unknown')
mydata['median_age'] = mydata['median_age'].fillna('unknown')
mydata['total_cases'] = mydata['total_cases'].fillna('unknown')

In [28]:
mydata.isnull().sum()

date               0
location           0
median_age         0
life_expectancy    0
total_cases        0
dtype: int64

In [29]:
countries = []
for location in mydata.location.unique():
  df = mydata[mydata['location'] == location]
  if df.life_expectancy.min() != df.life_expectancy.max():
    print (location, df.life_expectancy.min(), df.life_expectancy.max())
    countries.append (location)


In [30]:
def select_country_data(location, df):
  return df.loc[df['location'] == location, ]

In [31]:
interact(select_country_data, location="Netherlands", df=fixed(mydata));

interactive(children=(Text(value='Netherlands', description='location'), Output()), _dom_classes=('widget-inte…

In [34]:
def visualise_country_data(location, df):
  
  dataviz = select_country_data(location, df)

  chart = alt.Chart(dataviz).mark_line(color = 'black').encode(
      x = 'date',
      y = 'total_cases',
      tooltip = 'total_cases'
  ).interactive().properties(title='Total Cases')

  chart2 = alt.Chart(dataviz).mark_line(color = 'black').encode(
      x = 'date',
      y = alt.Y('median_age', scale=alt.Scale(domain=(0, 100))),
      tooltip = 'median_age'
  ).interactive().properties(title='Median Age')

  chart3 = alt.Chart(dataviz).mark_line(color = 'black').encode(
      x = 'date',
      y = alt.Y('life_expectancy', scale=alt.Scale(domain=(0, 100))),
      tooltip = 'life_expectancy'
  ).interactive().properties(title='Life Expectancy')

  chart4 = alt.Chart(dataviz).mark_square(color = 'blue').encode(
      x = 'location',
      y = alt.Y('max(life_expectancy)', scale=alt.Scale(domain=(0.00, 100.00))),
      tooltip = 'life_expectancy'
  ).interactive().properties(title='Difference between minimum and maximum life expectancy*')

  chart5 = alt.Chart(dataviz).mark_square(color = 'red').encode(
      x = 'location',
      y = alt.Y('min(life_expectancy)', scale=alt.Scale(domain=(0.00, 100.00))),
      tooltip = 'life_expectancy'
  ).interactive()
  
  chart6 = chart4 + chart5

  display(chart | chart2, chart3 | chart6)
 
  print('*One square means no difference, two squares means difference, no squares means no available data.')

loc_dropdown = widgets.Dropdown(options=sorted(mydata.location.unique()))

interout = widgets.interactive_output (visualise_country_data, {'location' : loc_dropdown, 'df': fixed(mydata)})

widgets.VBox([interout, widgets.VBox([loc_dropdown])])


VBox(children=(Output(), VBox(children=(Dropdown(options=('Afghanistan', 'Africa', 'Albania', 'Algeria', 'Ando…

In [None]:
# Conclusion
# This dataset does not show  differences between life expectancy in any countries since the beginning of COVID, contrasting to what 
# news articles seem to portray. First I selected the data I would need, or that might be interesting to see while answering the question.
# Then I checked for oulyers and typos by creating new columns of the data_binned, and using value.counts to see if there were wrongful
# dots or zeros. Hereafter I checked the unknown data and changed those accordingly. 
# I used a forloop to check whether there were countries in the location list where there was a difference between the minimal and the
# maximal life expectancies, after which I planned to isolate those countries and show their life_expectancy shifts. However, it soon 
# became clear that there are no countries which had any fluctuation in their life expectancy, according to this data set (or at least
# countries with available data to show). 
# It might not be a very exciting outcome, but also the negative outcomes need to be reported. To make this interactive, I wrote a code 
# so that you can choose a country from the location list of the COVID data set, after which the program will show the mlife_expectancy 
# in a line-chart, the minimum and maximum life_expectancy in a point-chart en for good measure the median age and covid cases. 