In [1]:
import numpy as np
import pandas as pd
import altair as alt

In [2]:
data_2019 = pd.read_csv('2019.csv')
data_2018 = pd.read_csv('2018.csv')
data_2017 = pd.read_csv('2017.csv', index_col=False)
data_2016 = pd.read_csv('2016.csv')
data_2015 = pd.read_csv('2015.csv')

data_2019['Year'] = 2019
data_2018['Year'] = 2018
data_2017['Year'] = 2017
data_2016['Year'] = 2016
data_2015['Year'] = 2015

In [3]:
# Concatenate all datasets
df = pd.concat([data_2019, data_2018, data_2017, data_2016, data_2015])
df = df.reset_index(drop=True)

# Selecting the desired columns
df = df[['Year','Overall rank', 'Country or region', 'Continent', 'Score',
                               'GDP per capita', 'Social support', 'Healthy life expectancy',
                               'Freedom to make life choices', 'Generosity', 'Perceptions of corruption']]
df.head()

Unnamed: 0,Year,Overall rank,Country or region,Continent,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,2019,1,Finland,Europe,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2019,2,Denmark,Europe,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,2019,3,Norway,Europe,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,2019,4,Iceland,Europe,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,2019,5,Netherlands,Europe,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [4]:
# rename for easy access
df = df.rename(columns={'Overall rank':'rank',
                                         'Country or region':'country',
                                         'Continent':'continent',
                                         'Score':'score',
                                         'GDP per capita':'gdp_per_capita',
                                         'Social support':'social_support',
                                         'Healthy life expectancy':'life_expectancy',
                                         'Freedom to make life choices':'freedom_to_choice',
                                         'Generosity':'generosity',
                                         'Perceptions of corruption':'perception_of_corruption'})
# removed the overall rank as we already have the happiness score in the data.
df = df.drop(columns = ['rank'])
df.head()

Unnamed: 0,Year,country,continent,score,gdp_per_capita,social_support,life_expectancy,freedom_to_choice,generosity,perception_of_corruption
0,2019,Finland,Europe,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2019,Denmark,Europe,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,2019,Norway,Europe,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,2019,Iceland,Europe,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,2019,Netherlands,Europe,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [5]:
df = df.dropna()
df.head()

Unnamed: 0,Year,country,continent,score,gdp_per_capita,social_support,life_expectancy,freedom_to_choice,generosity,perception_of_corruption
0,2019,Finland,Europe,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2019,Denmark,Europe,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,2019,Norway,Europe,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,2019,Iceland,Europe,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,2019,Netherlands,Europe,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [6]:
# legend chart to for the color encoding of europe and african continents throughout the homework.
legend_chart = alt.Chart(df).mark_text(align='left').encode(
    color=alt.Color('continent:N', scale=alt.Scale(domain=['Europe', 'Africa'], range=['#4287f5', '#d84a05']))
)

In [7]:
df = df[df['continent'] != 'Unknown']
df['continent'].unique()

array(['Europe', 'Oceania', 'North America', 'Asia', 'South America',
       'Africa'], dtype=object)

In [8]:
continents = df['continent'].unique()
continents.sort()
continents

array(['Africa', 'Asia', 'Europe', 'North America', 'Oceania',
       'South America'], dtype=object)

In [9]:
# Selecting a continent from the continents list
select_continent = alt.selection_point(
    name='Select',
    fields=['continent'],
    bind=alt.binding_select(options=continents),
    value='Africa'
)

In [10]:
# base chart to understand the distribution of healthy life expectancy. 
base_chart = alt.Chart(df).mark_bar(color='lightgrey').encode(
    alt.X("life_expectancy:Q", bin=alt.Bin(maxbins=10), title= "Healthy Life Expectancy", axis=alt.Axis(titleFontSize=16)),
    alt.Y("count():Q", title="Frequency of Life Expectancy",axis = alt.Axis(titleFontSize = 16))
)

# Displaying the average of all continents using a line and text.
average_line = alt.Chart(df).mark_rule(stroke='grey', strokeWidth=2).encode(
  alt.X("average(life_expectancy):Q")
)
average_text = average_line.mark_text(
    size = 15,
    align='right',
    baseline='bottom',
    angle= 270,
    dx = 20,
    dy = 0
).encode(
  alt.Text('average(life_expectancy):Q', format='.2f')
)

# preparing new dataset with continent and average life expectancy of each continent.
average_life_expectancy = df.groupby('continent')['life_expectancy'].mean()
new_df = pd.DataFrame({'continent': average_life_expectancy.index, 'average_life_expectancy': average_life_expectancy.values})


# Displaying the average of specific continent using a line and text.
continent_line = alt.Chart(new_df).mark_rule(stroke='#1f77b4', strokeWidth = 2).add_params(
    select_continent
).encode(
    alt.X("average_life_expectancy:Q"),
    opacity=alt.condition(select_continent, alt.value(0.75), alt.value(0.05))
).transform_filter(select_continent)

continent_text1 = continent_line.mark_text(
    color = '#1f77b4',
    size = 15,
    align='right',
    baseline='bottom',
    angle= 270,
    dx = -50,
    dy = 0
).add_params(
    select_continent
).encode(
  alt.Text('average_life_expectancy:Q', format='.2f'),
  opacity=alt.condition(select_continent, alt.value(0.75), alt.value(0.05))
)

# Combine the charts
plot1 = alt.layer(
    base_chart,
    average_line,
    average_text,
    continent_line,
    continent_text1,
  ).properties(
    title = {
      "text" : ['Healthy Life Expectancy Distribution'],
      "subtitle" : "highlighting Continent's Average Life Expectancy",
        "fontSize" : 24,
      "offset" : 15
      },
    width=350,
    height=300
  )

plot1

In [11]:
# Selecting a continent from the continents list
select_continent = alt.selection_point(
    name='Select',
    fields=['continent'],
    bind=alt.binding_select(options=continents),
    value = 'Africa'
)

In [12]:
base_chart2 = alt.Chart(df).mark_line().encode(
    x= alt.X('Year:O', title = 'Year', axis = alt.Axis(labelAngle = 0, titleFontSize = 16)),
    y=alt.Y('average(life_expectancy):Q', title='Average Healthy Life Expectancy', scale = alt.Scale(domain = [0,1.1]),axis = alt.Axis(titleFontSize = 16)),
    color=alt.value('grey')
)

base_text = base_chart2.mark_text(dx = 10, dy = -10, size = 15).encode(
    text = alt.value("Overall")
).transform_filter('datum.Year == 2019')

continent_time = base_chart2.add_params(select_continent).encode(
    x=alt.X('Year:O', title='Year', axis=alt.Axis(labelAngle=0, titleFontSize=16)),
    y=alt.Y('continent_mean:Q', title='Average Healthy Life Expectancy', scale=alt.Scale(domain=[0, 1.1]), axis=alt.Axis(titleFontSize=16)),
    color=alt.Color('continent:N', legend=None),
    opacity=alt.condition(select_continent, alt.value(0.75), alt.value(0.05))
).transform_filter(select_continent).transform_aggregate(
    continent_mean='average(life_expectancy)',
    groupby=['Year', 'continent']
)

continent_text = continent_time.mark_text(dx = 10, dy = -10, size = 15).encode(
    text=alt.Text('continent:N'),
    opacity=alt.condition(select_continent, alt.value(0.95), alt.value(0.05))
).transform_filter('datum.Year == 2019')

plot2 = alt.layer(base_chart2, base_text, continent_time, continent_text).properties(
    width=350,
    height=300,
    title = {
            "text" : ['Average Life Expectancy Over Time'],
            "subtitle" : "highlighting Continent's average over time.",
            "fontSize" : 24,
            "offset" : 15
            }
)
plot2

In [13]:
base_chart = alt.Chart(df).mark_circle(opacity=0.4, color='lightgrey').encode(
    x=alt.X(alt.repeat('column'), type='quantitative',axis = alt.Axis(titleFontSize = 16)),
    y=alt.Y('life_expectancy:Q', title="Healthy Life Expectancy", axis = alt.Axis(titleFontSize = 16)),
    tooltip=['country:N', 'gdp_per_capita:Q', 'life_expectancy:Q'],
    )

points3 = base_chart.add_params(select_continent).mark_circle( opacity=0.7, color='#1f77b4').encode(
    opacity=alt.condition(select_continent, alt.value(0.75), alt.value(0.05))
).transform_filter(select_continent)

reg_gdp = base_chart.transform_regression('gdp_per_capita', 'life_expectancy').mark_line(color='black').encode(
        color=alt.value('black'),
    )

reg_soc_sup = base_chart.transform_regression('social_support', 'life_expectancy').mark_line(color='black').encode(
        color=alt.value('black'),
    )


plot3 = alt.layer(
        base_chart + reg_gdp,
        base_chart + reg_soc_sup,
        points3
    ).properties(
        width=350,
        height=300
    ).repeat(
    column=['gdp_per_capita', 'social_support']
).properties(
    title = {
        "text" : ['GDP Per Capita and Social Support vs. Healthy Life Expectancy'],
        "subtitle" : "Regression line highlighting the relationship.",
        "fontSize" : 24,
        "dx":100,
        "offset":20, 
    }
)

plot3

In [14]:
chart = alt.vconcat(alt.hconcat(alt.hconcat(plot1, plot2), spacing=5).resolve_scale(color='independent'),plot3)
final_plot = chart.properties(
   title = {
        "text" : ['Healthy Life Expectancy Dashboard'],
        "subtitle": 'Highlighting Each Continent.',
        "dx":230,
        "fontSize":30,
        "offset":20, 
    }
)

final_plot

In [15]:
final_plot.save('life_expectancy_dashboard.html')

# Healthy Life Expectancy Analysis Across Various Continents
## About DataSet
Utilized the World Happiness Report datasets from 2015 to 2019, which was sourced from a reputable international organization known as the "Sustainable Development Solutions Network". 
The World Happiness Report is a comprehensive survey that assesses the state of global happiness, ranking countries based on their levels of happiness and well-being.

The dataset includes several key variables such as:

Overall rank: Overall rank of each country or region.<br>
country or region: Country or Region of the world.<br>
Continent: Continent of the country.<br>
Score: The happiness score measured for that country or region.<br>
GDP per capita of that country.<br>
Social support provided in that country.<br>
Healthy life expectancy of that country.<br>
Freedom to make life choices in that country.<br>
Generosity values of that country.<br>
Perceptions of corruption in that country.<br>

The investigation in this analysis involves healthy life expectancy of various continents and possible factors that are influencing the life expectancy.

## DATA Visualization:

### Healthy Life Expectancy Dashboard (Highlighting Each Continent)
Introducing the Healthy Life Expectancy Dashboard, a captivating and visually immersive journey through the factors influencing well-being, with a special focus on each of our world's diverse continents. Weaving together three compelling narratives, this dashboard unravels the profound disparities in happiness and health across the globe, offering insights that resonate with people on a personal level.

### Plot 1: Healthy Life Expectancy Distribution (highlighting Continent's Average Life Expectancy)

The journey begins with Plot 1, which presents a clear visualization of the distribution of healthy life expectancy values. Used a bar chart, making it easy for viewers to grasp the distribution of this quantitative variable. On exploring the bars, we can notice that the majority of countries fall within the range of 0.6 to 0.8 in terms of healthy life expectancy. 
These bars are thoughtfully colored in a neutral light grey, allowing the key data points to take center stage. The overall average healthy life expectancy is prominently marked with a vertical line, and for those seeking clarity, text label is added, making it easy to compare each continent with overall average. Additionally, when you select a specific continent, its average life expectancy is accentuated by a distinct blue vertical line, maintaining a consistent visual language.

### Plot 2: Average Life Expectancy Over Time (highlighting Continent's average over time)
Plot 2 adds depth to the narrative by visualizing the average healthy life expectancy in each continent over the five-year period from 2015 to 2019. This line chart effectively conveys the trends and variations in life expectancy. The x-axis displays the years, treated as ordinal data, while the y-axis represents the average healthy life expectancy, a vital quantitative variable. The lines connecting the data points for each continent make it simple to discern changes in life expectancy. To provide context and allow for comparisons, an overall average line is introduced, serving as a reference point for viewers to gauge how life expectancy in different continents compares to the global average. The overall average life expectancy over years is displayed in a neutral grey color, while the selected continent's data is represented by the color encoding (blue color as used in the previous plot to maintain consistency). A text label is also added based on the selected points so that the plot can be understood by people with color vision problems.

### Plot 3: GDP Per Capita and Social Support vs. Healthy Life Expectancy(witha Regression line Highlighting the Relationship)
Plot 3 delves deeper into the complex relationship between two pivotal factors, GDP per capita and social support, concerning healthy life expectancy. This plot provides a side-by-side comparison of these factors with respect to life expectancy. Points on the chart, represented by a subtle light grey background, showcase the distribution of data points. Selected data points are distinctly colored with blue, adhering to the established color scheme for all plots. A regression line is added in both plots to further highlight the relationship. The continents with high GDP Per Capita and Social Support tend to have higher healthy life expectancy values.


The visualization elegantly conveys the stark differences in life expectancy, presents trends over time, and explores the intricate relationship between socio-economic factors and well-being. These disparities are humanized, making it clear why few continents are consistently outperformed by others (for example, Europe over Africa), shedding light on a vital aspect of global happiness and well-being.
