In [131]:
import numpy as np
import pandas as pd

from glob import glob

# Carregar dados

Dataset disponível em:
https://www.kaggle.com/datasets/unsdsn/world-happiness/data

In [132]:
files=glob('../dados/archive/*.csv')
files

['../dados/archive/2019.csv',
 '../dados/archive/2018.csv',
 '../dados/archive/2016.csv',
 '../dados/archive/2017.csv',
 '../dados/archive/2015.csv']

In [133]:
#Carregando os dados para um dicionário
reports={}
for path in files:
    ano=path.split('/')[-1].split('.')[0] #retorna o ano contido no caminho do arquivo
    reports[ano]=pd.read_csv(path)

# Descrição dos dados

O dataset contém reports sobre o nível de felicidade em 155 países entre os anos de 2015 e 2019. O nível de felicidade foi medido em uma pesquisa em que pessoas eram solicitadas a dar uma nota para suas pŕoprias vidas, na qual 10 seria o melhor cenário possível e 0 o pior. Os reports contam com o score de felicidade obtido por cada país, além de seis indicadores que podem ser usados para explicar o índice de felicidade. Os indicadores são os seguintes:

- GDP Per capita é o PIB per capita do pais;

- Social support é a média das repostas para a seguinte  pergunta: Em caso de dificuldades, você conta com familiares ou amigos que possam te ajudar? 

- Healthy life expectancy é a expectativa de vida;	

- Freedom to make life choices é a média das repostas para a seguinte pergunta: Você está safisfeito com a liberdade que possui para fazer escolhas na sua vida?;
 
- Generosity é o resíduo da regressão entre GDP per capita e a resposta média para a seguinte pergunta: Você doou dinheiro para caridade no mês passado?;

- Perceptions of corruption é a média das respostas para duas perguntas. A primeira delas é se a corrupção está disseminada no governo e a segunda se a corrupção está disseminada no mundo empresarial

Para o cálculo da média das respostas foi atribuído valor 1 para respostas potivivas e valor 0 para respostas negativas.

Explicação detalhada pode ser encontrada em : https://worldhappiness.report/faq/

- Dystopia é um pai imaginário que teria o menor índice de felicidade. Esse país foi criado para servir com parâmetros de comparação com os demais, ou seja, como certo país está em comparação com o país na pior situação possível. Dystopia tem o pior resultado para cada um dos seis indicadores considerados;

- Residuals indicam a extensão com que os incicadores conseguem ou não explicar os indices de felicidade.

# Análise Preliminar

In [134]:
for ano in reports.keys():
    print(ano+f' : {reports[ano].shape}')
    print(reports[ano].columns)
    print('')

#O número de colunas e de linhas não é igual em todos os reports   

2019 : (156, 9)
Index(['Overall rank', 'Country or region', 'Score', 'GDP per capita',
       'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption'],
      dtype='object')

2018 : (156, 9)
Index(['Overall rank', 'Country or region', 'Score', 'GDP per capita',
       'Social support', 'Healthy life expectancy',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption'],
      dtype='object')

2016 : (157, 13)
Index(['Country', 'Region', 'Happiness Rank', 'Happiness Score',
       'Lower Confidence Interval', 'Upper Confidence Interval',
       'Economy (GDP per Capita)', 'Family', 'Health (Life Expectancy)',
       'Freedom', 'Trust (Government Corruption)', 'Generosity',
       'Dystopia Residual'],
      dtype='object')

2017 : (155, 12)
Index(['Country', 'Happiness.Rank', 'Happiness.Score', 'Whisker.high',
       'Whisker.low', 'Economy..GDP.per.Capita.', 'Family',
       'Hea

- Nos reports de 2015,2016 e 2017 o indicador social support foi chamado de family;
- Reports de 2015, 2016 e 2017 contam com a coluna  'Dystopia Residual';
- Reports de 2016 e 2017 contam com o intervalo de confiança do indicado de felicidade;
- Nos reports de 2015 a 2017 a percepção de corrupção está relacionada somente ao setor governamental (Trust (Government Corruption)), enquanto nos de 2018 e 2019 está relacionada tanto ao setor governamental quanto ao setor empresarial.


In [135]:
#Alterando o nome das colunas para que tenham nomes iguais em reports diferentes

reports['2019'].rename(columns={'Country or region':'Country'},inplace=True)
reports['2018'].rename(columns={'Country or region':'Country'},inplace=True)
reports['2016'].rename(columns={'Happiness Rank':'Overall rank','Happiness Score':'Score','Economy (GDP per Capita)':'GDP per capita',
                                'Family':'Social support','Health (Life Expectancy)':'Healthy life expectancy','Freedom':'Freedom to make life choices'},inplace=True)
reports['2017'].rename(columns={ 'Happiness.Rank':'Overall rank','Happiness.Score':'Score','Economy..GDP.per.Capita.':'GDP per capita',
                                'Family':'Social support','Health..Life.Expectancy.':'Healthy life expectancy',
                                'Freedom':'Freedom to make life choices','Trust..Government.Corruption.':'Trust (Government Corruption)'},inplace=True)
reports['2015'].rename(columns={'Happiness Rank':'Overall rank','Happiness.Score':'Score','Economy (GDP per Capita)':'GDP per capita',
                                'Family':'Social support','Health (Life Expectancy)':'Healthy life expectancy',
                                'Freedom':'Freedom to make life choices','Trust (Government Corruption)':'Trust (Government Corruption)'},inplace=True)

In [136]:
#Adicionando uma coluna com o ano em cada um dos reports
for ano in reports.keys():
    reports[ano]['Year']=int(ano)

In [137]:
#Concatenando os dados dos reports anuais para facilitar a análise da evolução anual dos indicadores

reports_2015_a_2019=pd.concat([reports[ano] for ano in reports.keys()],axis=0)

In [138]:
reports_2015_a_2019

Unnamed: 0,Overall rank,Country,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Year,Region,Lower Confidence Interval,Upper Confidence Interval,Trust (Government Corruption),Dystopia Residual,Whisker.high,Whisker.low,Dystopia.Residual,Happiness Score,Standard Error
0,1,Finland,7.769,1.34000,1.58700,0.98600,0.59600,0.15300,0.393,2019,,,,,,,,,,
1,2,Denmark,7.600,1.38300,1.57300,0.99600,0.59200,0.25200,0.410,2019,,,,,,,,,,
2,3,Norway,7.554,1.48800,1.58200,1.02800,0.60300,0.27100,0.341,2019,,,,,,,,,,
3,4,Iceland,7.494,1.38000,1.62400,1.02600,0.59100,0.35400,0.118,2019,,,,,,,,,,
4,5,Netherlands,7.488,1.39600,1.52200,0.99900,0.55700,0.32200,0.298,2019,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,154,Rwanda,,0.22208,0.77370,0.42864,0.59201,0.22628,,2015,Sub-Saharan Africa,,,0.55191,0.67042,,,,3.465,0.03464
154,155,Benin,,0.28665,0.35386,0.31910,0.48450,0.18260,,2015,Sub-Saharan Africa,,,0.08010,1.63328,,,,3.340,0.03656
155,156,Syria,,0.66320,0.47489,0.72193,0.15684,0.47179,,2015,Middle East and Northern Africa,,,0.18906,0.32858,,,,3.006,0.05015
156,157,Burundi,,0.01530,0.41587,0.22396,0.11850,0.19727,,2015,Sub-Saharan Africa,,,0.10062,1.83302,,,,2.905,0.08658


In [139]:
#Selecionando as lonunas de interesse
selected_columns=reports['2019'].columns.to_list()
selected_columns=selected_columns+['Trust (Government Corruption)']
reports_2015_a_2019=reports_2015_a_2019.loc[:,selected_columns]

In [140]:
reports_2015_a_2019

Unnamed: 0,Overall rank,Country,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Year,Trust (Government Corruption)
0,1,Finland,7.769,1.34000,1.58700,0.98600,0.59600,0.15300,0.393,2019,
1,2,Denmark,7.600,1.38300,1.57300,0.99600,0.59200,0.25200,0.410,2019,
2,3,Norway,7.554,1.48800,1.58200,1.02800,0.60300,0.27100,0.341,2019,
3,4,Iceland,7.494,1.38000,1.62400,1.02600,0.59100,0.35400,0.118,2019,
4,5,Netherlands,7.488,1.39600,1.52200,0.99900,0.55700,0.32200,0.298,2019,
...,...,...,...,...,...,...,...,...,...,...,...
153,154,Rwanda,,0.22208,0.77370,0.42864,0.59201,0.22628,,2015,0.55191
154,155,Benin,,0.28665,0.35386,0.31910,0.48450,0.18260,,2015,0.08010
155,156,Syria,,0.66320,0.47489,0.72193,0.15684,0.47179,,2015,0.18906
156,157,Burundi,,0.01530,0.41587,0.22396,0.11850,0.19727,,2015,0.10062


In [141]:
#Organizando para que os os indicadores anuais para cada pais apareçam na sequência
reports_2015_a_2019.sort_values(by=['Country','Year'],inplace=True)
reports_2015_a_2019

Unnamed: 0,Overall rank,Country,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Year,Trust (Government Corruption)
152,153,Afghanistan,,0.319820,0.302850,0.303350,0.234140,0.365100,,2015,0.097190
153,154,Afghanistan,3.360,0.382270,0.110370,0.173440,0.164300,0.312680,,2016,0.071120
140,141,Afghanistan,3.794,0.401477,0.581543,0.180747,0.106180,0.311871,,2017,0.061158
144,145,Afghanistan,3.632,0.332000,0.537000,0.255000,0.085000,0.191000,0.036,2018,
153,154,Afghanistan,3.203,0.350000,0.517000,0.361000,0.000000,0.158000,0.025,2019,
...,...,...,...,...,...,...,...,...,...,...,...
114,115,Zimbabwe,,0.271000,1.032760,0.334750,0.258610,0.189870,,2015,0.080790
130,131,Zimbabwe,4.193,0.350410,0.714780,0.159500,0.254290,0.185030,,2016,0.085820
137,138,Zimbabwe,3.875,0.375847,1.083096,0.196764,0.336384,0.189143,,2017,0.095375
143,144,Zimbabwe,3.692,0.357000,1.094000,0.248000,0.406000,0.132000,0.099,2018,


Avaliar a posição dos países no ranking de felicidade ao longo dos anos de 2015 a 2019

In [29]:
reports['2019']

Unnamed: 0,Overall rank,Country,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.340,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.600,1.383,1.573,0.996,0.592,0.252,0.410
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.380,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298
...,...,...,...,...,...,...,...,...,...
151,152,Rwanda,3.334,0.359,0.711,0.614,0.555,0.217,0.411
152,153,Tanzania,3.231,0.476,0.885,0.499,0.417,0.276,0.147
153,154,Afghanistan,3.203,0.350,0.517,0.361,0.000,0.158,0.025
154,155,Central African Republic,3.083,0.026,0.000,0.105,0.225,0.235,0.035
