In [None]:
'''
La derniere étape de notre projet est d'utiliser les modèles de machine learning choisi pour essayer de prédire 
les données de 2022 puis de comparer les écarts avec le reel.
'''

In [31]:
# Étape 1 : Preprocessing et feature engineering de la base de donnée 2022 pour la rendre exploitable.

# Import des modules et fichiers.
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler

df_2022_full=pd.read_csv('../data/world-hapiness-report-2022.csv',sep=";")

# Suppression de toutes les lignes non relatives à 2022.
df_2022=df_2022_full[(df_2022_full['year']==2022)]

# Ajout des continents.
df_continents=pd.read_csv('../data/pays&continents.csv',sep=';')
df_2022=df_2022.merge(right=df_continents,on='Country name',how='outer')
df_2022.insert(1,'Regional indicator',df_2022.pop('Regional indicator'))

# Suppression des valeurs manquantes.
df_2022=df_2022.dropna()

# Suppression de la colonne des années.
df_2022=df_2022.drop(['year'],axis=1)

# Copie du jeux de données.
df_2022_continents = df_2022.copy()

# Séparation des variables explicatives et de la variable nom de pays de la variable cible.
X_test2022_continents=df_2022_continents.drop(['Life Ladder', 'Country name'],axis=1)
y_test2022=df_2022_continents['Life Ladder']

#On encode les continents de X_2022
X_test2022_continents=pd.get_dummies(X_test2022_continents,columns=['Regional indicator'])

# Normalisation des données.
scaler=MinMaxScaler()
cols=['Log GDP per capita','Social support','Healthy life expectancy at birth','Freedom to make life choices',
      'Generosity','Perceptions of corruption','Positive affect','Negative affect']
X_test2022_continents[cols]=scaler.fit_transform(X_test2022_continents[cols])

# Suppression de la colonne "Regional indicator_Central and Eastern Europe" pour correspondre à l'encodage des données
# fait par le OneHotEncoding.
X_test2022_continents=X_test2022_continents.drop(['Regional indicator_Central and Eastern Europe'],axis=1)

# X_test2022_continents et y_test2022 sont alors respectivements les jeux d'entraînement et de test.

In [32]:
# Étape 2 : Récupération des jeux d'entraînement et de test générés dans les notebooks précédents (données de 2006 
# à 2021).

# Import des données.
X_train=pd.read_csv('../data/X_train.csv')
X_test=pd.read_csv('../data/X_test.csv')
y_train=pd.read_csv('../data/y_train.csv')
y_test=pd.read_csv('../data/y_test.csv')

# Concaténation.
X_train2021=pd.concat([X_train,X_test],axis=0)
y_train2021=pd.concat([y_train,y_test],axis=0)

# Transofrmation du jeu y en tableau python.
y_train2021=y_train2021['Life Ladder'].values

# Normalisation des données.
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
cols=['Log GDP per capita','Social support','Healthy life expectancy at birth','Freedom to make life choices',
      'Generosity', 'Perceptions of corruption','Positive affect','Negative affect']
X_train2021[cols]=scaler.fit_transform(X_train2021[cols])

In [33]:
# Étape 3: Faire tourner le modele de Random forest pour la regression et le tester.

from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor(max_depth=30,max_features='sqrt',min_samples_leaf=1,min_samples_split=2,n_estimators=300)

rf_model.fit(X_train2021,y_train2021)

print("Score sur jeu d'entrainement 2021 :",rf_model.score(X_train2021,y_train2021))
print("Score sur jeu de test 2022 :",rf_model.score(X_test2022_continents,y_test2022))

Score sur jeu d'entrainement 2021 : 0.9857096217930055
Score sur jeu de test 2022 : 0.7983550674260365


In [34]:
# Réalisation d'une prédiction du Life Ladder de 2022 par pays grâce au modèle Random forest.
y_predic2022_continents = rf_model.predict(X_test2022_continents)

# Arrondissement de la prédictions à 2 décimales.
import numpy as np
y_predic2022_continents=np.round(y_predic2022_continents,2)

# Ajout d'une colonne avec les prédictions au jeu de données df_2022.
df_2022_continents['Predictions']=y_predic2022_continents
df_2022_continents=df_2022_continents.drop(cols,axis=1)

# Ajout d'une colonne pour représenter les variations en pourcentages.
df_2022_continents['Ecart en %']=np.round((df_2022_continents['Predictions']-df_2022_continents['Life Ladder'])/df_2022_continents['Life Ladder']*100,1)

In [35]:
df_2022_continents.head(10)

Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
1,Albania,Central and Eastern Europe,5.21,4.93,-5.4
2,Argentina,Latin America and Caribbean,6.26,6.16,-1.6
3,Armenia,Commonwealth of Independent States,5.38,5.01,-6.9
4,Australia,North America and ANZ,7.04,7.04,0.0
5,Austria,Western Europe,7.0,6.73,-3.9
6,Bangladesh,South Asia,3.41,4.09,19.9
7,Belgium,Western Europe,6.86,7.0,2.0
8,Benin,Sub-Saharan Africa,4.22,3.94,-6.6
9,Bolivia,Latin America and Caribbean,5.93,5.35,-9.8
10,Botswana,Sub-Saharan Africa,3.44,4.57,32.8


In [36]:
df_2022_continents.drop('Country name',axis=1).groupby('Regional indicator').agg('mean').sort_values('Ecart en %',ascending=True)

# On constate que le modèle prédit plutot bien avec toutefois des écarts plus importants pour les valeurs extremes.

Unnamed: 0_level_0,Life Ladder,Predictions,Ecart en %
Regional indicator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Latin America and Caribbean,6.211111,5.788889,-6.655556
Southeast Asia,5.53,5.115,-6.6
Commonwealth of Independent States,5.5925,5.215,-6.475
East Asia,5.973333,5.593333,-6.466667
Central and Eastern Europe,6.044545,5.770909,-4.218182
North America and ANZ,6.9075,6.8025,-1.575
Western Europe,6.893333,6.794444,-1.5
South Asia,4.27,4.176667,1.266667
Sub-Saharan Africa,4.289643,4.276071,2.178571
Middle East and North Africa,4.701667,4.841667,10.2


In [37]:
display(df_2022_continents.sort_values('Life Ladder',ascending=True).head(10))
display(df_2022_continents.sort_values('Life Ladder',ascending=True).tail(10))

Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
61,Lebanon,Middle East and North Africa,2.35,3.97,68.9
93,Sierra Leone,Sub-Saharan Africa,2.56,3.51,37.1
21,Congo (Kinshasa),Sub-Saharan Africa,3.21,4.15,29.3
113,Zimbabwe,Sub-Saharan Africa,3.3,4.09,23.9
66,Malawi,Sub-Saharan Africa,3.36,3.75,11.6
6,Bangladesh,South Asia,3.41,4.09,19.9
10,Botswana,Sub-Saharan Africa,3.44,4.57,32.8
19,Comoros,Sub-Saharan Africa,3.55,3.9,9.9
101,Tanzania,Sub-Saharan Africa,3.62,4.14,14.4
32,Ethiopia,Sub-Saharan Africa,3.63,4.35,19.8


Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
63,Lithuania,Central and Eastern Europe,7.04,5.92,-15.9
22,Costa Rica,Latin America and Caribbean,7.08,6.67,-5.8
64,Luxembourg,Western Europe,7.23,6.91,-4.4
83,Norway,Western Europe,7.3,7.43,1.8
78,Netherlands,Western Europe,7.39,7.06,-4.5
98,Sweden,Western Europe,7.43,7.46,0.4
45,Iceland,Western Europe,7.45,7.4,-0.7
26,Denmark,Western Europe,7.55,7.44,-1.5
50,Israel,Middle East and North Africa,7.66,6.8,-11.2
33,Finland,Western Europe,7.73,7.5,-3.0


In [38]:
display(df_2022_continents.sort_values('Ecart en %',ascending=False).head(10))
display(df_2022_continents.sort_values('Ecart en %',ascending=True).head(10))

Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
61,Lebanon,Middle East and North Africa,2.35,3.97,68.9
93,Sierra Leone,Sub-Saharan Africa,2.56,3.51,37.1
10,Botswana,Sub-Saharan Africa,3.44,4.57,32.8
21,Congo (Kinshasa),Sub-Saharan Africa,3.21,4.15,29.3
113,Zimbabwe,Sub-Saharan Africa,3.3,4.09,23.9
6,Bangladesh,South Asia,3.41,4.09,19.9
32,Ethiopia,Sub-Saharan Africa,3.63,4.35,19.8
101,Tanzania,Sub-Saharan Africa,3.62,4.14,14.4
39,Ghana,Sub-Saharan Africa,4.19,4.7,12.2
66,Malawi,Sub-Saharan Africa,3.36,3.75,11.6


Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
20,Congo (Brazzaville),Sub-Saharan Africa,5.81,4.35,-25.1
42,Guinea,Sub-Saharan Africa,5.32,4.15,-22.0
77,Nepal,South Asia,5.47,4.31,-21.2
30,El Salvador,Latin America and Caribbean,6.49,5.44,-16.2
80,Nicaragua,Latin America and Caribbean,6.39,5.36,-16.1
63,Lithuania,Central and Eastern Europe,7.04,5.92,-15.9
112,Vietnam,Southeast Asia,6.27,5.3,-15.5
71,Mexico,Latin America and Caribbean,7.04,5.95,-15.5
73,Mongolia,East Asia,5.79,4.91,-15.2
92,Senegal,Sub-Saharan Africa,4.91,4.17,-15.1


In [39]:
'''
On pourrait se demander si lorsqu'il y a des valeurs un peu plus extremes, le continent n'entraîne pas le modele 
vers une réponse plus cohérente en fonction du continents. 
On réalise donc un essai sur les mêmes données amputées de la variable continent.
'''

"\nOn pourrait se demander si lorsqu'il y a des valeurs un peu plus extremes, le continent n'entraîne pas le modele \nvers une réponse plus cohérente en fonction du continents. \nOn réalise donc un essai sur les mêmes données amputées de la variable continent.\n"

In [40]:
# Séparation des variables explicatives de la variable cible et suppression des noms de pays.
X_test2022_no_continents=df_2022.drop(['Life Ladder','Country name','Regional indicator'],axis=1)
y_test2022_no_continents=df_2022['Life Ladder']

#On normalise les variables explicatives
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
cols=['Log GDP per capita','Social support','Healthy life expectancy at birth','Freedom to make life choices','Generosity',
      'Perceptions of corruption','Positive affect','Negative affect']
X_test2022_no_continents[cols]=scaler.fit_transform(X_test2022_no_continents[cols])

#X_test2022_no_continents et y_test2022_no_continents sont alors respectivements nos variables explicatives et notre variable cible de notre jeu de test

In [41]:
# Suppression variables des continents du jeu de données 2021.
X_train2021_no_continents=X_train2021.drop(["Regional indicator_Commonwealth of Independent States"
                              ,"Regional indicator_East Asia"
                              ,"Regional indicator_Latin America and Caribbean"
                              ,"Regional indicator_Middle East and North Africa"
                              ,"Regional indicator_North America and ANZ"
                              ,"Regional indicator_South Asia"
                              ,"Regional indicator_Southeast Asia"
                              ,"Regional indicator_Sub-Saharan Africa"
                              ,"Regional indicator_Western Europe"],axis=1)

#On normalise les variables explicatives
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
cols=['Log GDP per capita','Social support','Healthy life expectancy at birth','Freedom to make life choices','Generosity',
      'Perceptions of corruption','Positive affect','Negative affect']
X_train2021_no_continents[cols]=scaler.fit_transform(X_train2021_no_continents[cols])

In [42]:
from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor(max_depth=30,max_features='sqrt',min_samples_leaf=1,min_samples_split=2,n_estimators=300)

rf_model.fit(X_train2021_no_continents,y_train2021)

print("Score sur jeu d'entrainement 2021 :",rf_model.score(X_train2021_no_continents,y_train2021))
print("Score sur jeu de test 2022 :",rf_model.score(X_test2022_no_continents,y_test2022))

Score sur jeu d'entrainement 2021 : 0.9836695140001757
Score sur jeu de test 2022 : 0.7396604788190977


In [43]:
# Réalisation d'une prédiction du Life Ladder de 2022 par pays grâce au modèle Random forest.
y_predic2022_no_continents = rf_model.predict(X_test2022_no_continents)

# Arrondissement de la prédictions à 2 décimales.
import numpy as np
y_predic2022_no_continents=np.round(y_predic2022_no_continents,2)

# Copie du jeu de donnée 2022 et suppression des continents.
df_2022_no_continents = df_2022.copy()

# Ajout d'une colonne avec les prédictions au jeu de données df_2022.
df_2022_no_continents['Predictions']=y_predic2022_no_continents
df_2022_no_continents=df_2022_no_continents.drop(cols,axis=1)

# Ajout d'une colonne pour représenter les variations en pourcentages.
df_2022_no_continents['Ecart en %']=np.round((df_2022_no_continents['Predictions']-df_2022_no_continents['Life Ladder'])/df_2022_no_continents['Life Ladder']*100,1)

In [44]:
df_2022_no_continents

Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
1,Albania,Central and Eastern Europe,5.21,4.88,-6.3
2,Argentina,Latin America and Caribbean,6.26,6.04,-3.5
3,Armenia,Commonwealth of Independent States,5.38,4.97,-7.6
4,Australia,North America and ANZ,7.04,6.91,-1.8
5,Austria,Western Europe,7.00,6.66,-4.9
...,...,...,...,...,...
108,United States,North America and ANZ,6.69,6.07,-9.3
109,Uruguay,Latin America and Caribbean,6.67,6.37,-4.5
110,Uzbekistan,Commonwealth of Independent States,6.02,5.18,-14.0
112,Vietnam,Southeast Asia,6.27,5.21,-16.9


In [45]:
df_2022_no_continents.drop('Country name',axis=1).groupby('Regional indicator').agg('mean').sort_values('Ecart en %',ascending=True)

#Grâce à ce df on s'aperçoit que notre modele prédit plutot bien, il a un peu plus de mal a prédire les valeurs extremes

Unnamed: 0_level_0,Life Ladder,Predictions,Ecart en %
Regional indicator,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Latin America and Caribbean,6.211111,5.55,-10.538889
Commonwealth of Independent States,5.5925,5.0825,-8.85
Southeast Asia,5.53,5.07,-7.4
East Asia,5.973333,5.6,-6.4
Central and Eastern Europe,6.044545,5.77,-4.227273
North America and ANZ,6.9075,6.675,-3.425
Western Europe,6.893333,6.778333,-1.666667
South Asia,4.27,4.156667,0.933333
Sub-Saharan Africa,4.289643,4.282857,2.217857
Middle East and North Africa,4.701667,4.798333,9.6


In [46]:
display(df_2022_no_continents.sort_values('Life Ladder',ascending=True).head(10))
display(df_2022_no_continents.sort_values('Life Ladder',ascending=True).tail(10))

Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
61,Lebanon,Middle East and North Africa,2.35,3.98,69.4
93,Sierra Leone,Sub-Saharan Africa,2.56,3.45,34.8
21,Congo (Kinshasa),Sub-Saharan Africa,3.21,4.09,27.4
113,Zimbabwe,Sub-Saharan Africa,3.3,4.08,23.6
66,Malawi,Sub-Saharan Africa,3.36,3.76,11.9
6,Bangladesh,South Asia,3.41,4.09,19.9
10,Botswana,Sub-Saharan Africa,3.44,4.62,34.3
19,Comoros,Sub-Saharan Africa,3.55,3.95,11.3
101,Tanzania,Sub-Saharan Africa,3.62,4.19,15.7
32,Ethiopia,Sub-Saharan Africa,3.63,4.24,16.8


Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
63,Lithuania,Central and Eastern Europe,7.04,5.93,-15.8
22,Costa Rica,Latin America and Caribbean,7.08,6.62,-6.5
64,Luxembourg,Western Europe,7.23,6.87,-5.0
83,Norway,Western Europe,7.3,7.42,1.6
78,Netherlands,Western Europe,7.39,7.07,-4.3
98,Sweden,Western Europe,7.43,7.35,-1.1
45,Iceland,Western Europe,7.45,7.39,-0.8
26,Denmark,Western Europe,7.55,7.31,-3.2
50,Israel,Middle East and North Africa,7.66,6.58,-14.1
33,Finland,Western Europe,7.73,7.43,-3.9


In [47]:
display(df_2022_no_continents.sort_values('Ecart en %',ascending=False).head(10))
display(df_2022_no_continents.sort_values('Ecart en %',ascending=True).head(10))

Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
61,Lebanon,Middle East and North Africa,2.35,3.98,69.4
93,Sierra Leone,Sub-Saharan Africa,2.56,3.45,34.8
10,Botswana,Sub-Saharan Africa,3.44,4.62,34.3
21,Congo (Kinshasa),Sub-Saharan Africa,3.21,4.09,27.4
113,Zimbabwe,Sub-Saharan Africa,3.3,4.08,23.6
6,Bangladesh,South Asia,3.41,4.09,19.9
32,Ethiopia,Sub-Saharan Africa,3.63,4.24,16.8
101,Tanzania,Sub-Saharan Africa,3.62,4.19,15.7
39,Ghana,Sub-Saharan Africa,4.19,4.76,13.6
66,Malawi,Sub-Saharan Africa,3.36,3.76,11.9


Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
20,Congo (Brazzaville),Sub-Saharan Africa,5.81,4.41,-24.1
71,Mexico,Latin America and Caribbean,7.04,5.36,-23.9
80,Nicaragua,Latin America and Caribbean,6.39,4.91,-23.2
30,El Salvador,Latin America and Caribbean,6.49,5.01,-22.8
77,Nepal,South Asia,5.47,4.23,-22.7
42,Guinea,Sub-Saharan Africa,5.32,4.21,-20.9
43,Honduras,Latin America and Caribbean,5.93,4.75,-19.9
41,Guatemala,Latin America and Caribbean,6.15,4.97,-19.2
37,Georgia,Commonwealth of Independent States,5.29,4.28,-19.1
9,Bolivia,Latin America and Caribbean,5.93,4.91,-17.2


In [48]:
# Pour terminer, on aggrège les deux jeux de données.
df_2022 = df_2022_continents
df_2022

Unnamed: 0,Country name,Regional indicator,Life Ladder,Predictions,Ecart en %
1,Albania,Central and Eastern Europe,5.21,4.93,-5.4
2,Argentina,Latin America and Caribbean,6.26,6.16,-1.6
3,Armenia,Commonwealth of Independent States,5.38,5.01,-6.9
4,Australia,North America and ANZ,7.04,7.04,0.0
5,Austria,Western Europe,7.00,6.73,-3.9
...,...,...,...,...,...
108,United States,North America and ANZ,6.69,6.19,-7.5
109,Uruguay,Latin America and Caribbean,6.67,6.33,-5.1
110,Uzbekistan,Commonwealth of Independent States,6.02,5.36,-11.0
112,Vietnam,Southeast Asia,6.27,5.30,-15.5


In [49]:
# Changement de nom des colonnes
new_names =  {'Predictions' : 'Prédictions avec continents',
              'Ecart en %'  : 'Écart en % avec continents'}

df_2022 = df_2022.rename(new_names, axis = 1)
df_2022

Unnamed: 0,Country name,Regional indicator,Life Ladder,Prédictions avec continents,Écart en % avec continents
1,Albania,Central and Eastern Europe,5.21,4.93,-5.4
2,Argentina,Latin America and Caribbean,6.26,6.16,-1.6
3,Armenia,Commonwealth of Independent States,5.38,5.01,-6.9
4,Australia,North America and ANZ,7.04,7.04,0.0
5,Austria,Western Europe,7.00,6.73,-3.9
...,...,...,...,...,...
108,United States,North America and ANZ,6.69,6.19,-7.5
109,Uruguay,Latin America and Caribbean,6.67,6.33,-5.1
110,Uzbekistan,Commonwealth of Independent States,6.02,5.36,-11.0
112,Vietnam,Southeast Asia,6.27,5.30,-15.5


In [50]:
df_2022['Prédictions sans continents'] = df_2022_no_continents['Predictions']
df_2022['Écart en % sans continents'] = df_2022_no_continents['Ecart en %']
df_2022

Unnamed: 0,Country name,Regional indicator,Life Ladder,Prédictions avec continents,Écart en % avec continents,Prédictions sans continents,Écart en % sans continents
1,Albania,Central and Eastern Europe,5.21,4.93,-5.4,4.88,-6.3
2,Argentina,Latin America and Caribbean,6.26,6.16,-1.6,6.04,-3.5
3,Armenia,Commonwealth of Independent States,5.38,5.01,-6.9,4.97,-7.6
4,Australia,North America and ANZ,7.04,7.04,0.0,6.91,-1.8
5,Austria,Western Europe,7.00,6.73,-3.9,6.66,-4.9
...,...,...,...,...,...,...,...
108,United States,North America and ANZ,6.69,6.19,-7.5,6.07,-9.3
109,Uruguay,Latin America and Caribbean,6.67,6.33,-5.1,6.37,-4.5
110,Uzbekistan,Commonwealth of Independent States,6.02,5.36,-11.0,5.18,-14.0
112,Vietnam,Southeast Asia,6.27,5.30,-15.5,5.21,-16.9


In [51]:
# Ajout d'une colonne pour représenter les différences de pourcentages.
df_2022['Différences valeurs écarts'] = df_2022['Prédictions sans continents']-df_2022['Prédictions avec continents']
df_2022

Unnamed: 0,Country name,Regional indicator,Life Ladder,Prédictions avec continents,Écart en % avec continents,Prédictions sans continents,Écart en % sans continents,Différences valeurs écarts
1,Albania,Central and Eastern Europe,5.21,4.93,-5.4,4.88,-6.3,-0.05
2,Argentina,Latin America and Caribbean,6.26,6.16,-1.6,6.04,-3.5,-0.12
3,Armenia,Commonwealth of Independent States,5.38,5.01,-6.9,4.97,-7.6,-0.04
4,Australia,North America and ANZ,7.04,7.04,0.0,6.91,-1.8,-0.13
5,Austria,Western Europe,7.00,6.73,-3.9,6.66,-4.9,-0.07
...,...,...,...,...,...,...,...,...
108,United States,North America and ANZ,6.69,6.19,-7.5,6.07,-9.3,-0.12
109,Uruguay,Latin America and Caribbean,6.67,6.33,-5.1,6.37,-4.5,0.04
110,Uzbekistan,Commonwealth of Independent States,6.02,5.36,-11.0,5.18,-14.0,-0.18
112,Vietnam,Southeast Asia,6.27,5.30,-15.5,5.21,-16.9,-0.09


In [52]:
# Ajout d'une colonne pour représenter les différences de pourcentages.
df_2022['% variations écarts'] = df_2022['Écart en % sans continents']-df_2022['Écart en % avec continents']
df_2022

Unnamed: 0,Country name,Regional indicator,Life Ladder,Prédictions avec continents,Écart en % avec continents,Prédictions sans continents,Écart en % sans continents,Différences valeurs écarts,% variations écarts
1,Albania,Central and Eastern Europe,5.21,4.93,-5.4,4.88,-6.3,-0.05,-0.9
2,Argentina,Latin America and Caribbean,6.26,6.16,-1.6,6.04,-3.5,-0.12,-1.9
3,Armenia,Commonwealth of Independent States,5.38,5.01,-6.9,4.97,-7.6,-0.04,-0.7
4,Australia,North America and ANZ,7.04,7.04,0.0,6.91,-1.8,-0.13,-1.8
5,Austria,Western Europe,7.00,6.73,-3.9,6.66,-4.9,-0.07,-1.0
...,...,...,...,...,...,...,...,...,...
108,United States,North America and ANZ,6.69,6.19,-7.5,6.07,-9.3,-0.12,-1.8
109,Uruguay,Latin America and Caribbean,6.67,6.33,-5.1,6.37,-4.5,0.04,0.6
110,Uzbekistan,Commonwealth of Independent States,6.02,5.36,-11.0,5.18,-14.0,-0.18,-3.0
112,Vietnam,Southeast Asia,6.27,5.30,-15.5,5.21,-16.9,-0.09,-1.4


In [53]:
df_2022.describe()

Unnamed: 0,Life Ladder,Prédictions avec continents,Écart en % avec continents,Prédictions sans continents,Écart en % sans continents,Différences valeurs écarts,% variations écarts
count,103.0,103.0,103.0,103.0,103.0,103.0,103.0
mean,5.591359,5.412233,-1.661165,5.349515,-2.68932,-0.062718,-1.028155
std,1.229449,1.059031,12.666323,1.048795,13.30882,0.149432,2.533633
min,2.35,3.51,-25.1,3.45,-24.1,-0.59,-9.9
25%,4.675,4.495,-7.7,4.405,-9.0,-0.11,-1.8
50%,5.89,5.44,-3.3,5.23,-4.3,-0.04,-0.7
75%,6.465,6.175,1.05,6.14,1.45,0.015,0.4
max,7.73,7.5,68.9,7.43,69.4,0.3,5.2


In [54]:
# On constate que les prédictions sont moins précises si l'on enlève les données relatives aux continents.