In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from chart_studio import plotly
import cufflinks as cf
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import dash
from dash import Dash, dcc, html, Output, Input

from plotly.offline import download_plotlyjs,init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

![Development](https://pbs.twimg.com/profile_images/378800000681839340/6030142c07aacf486bccb3337314cf6c_400x400.jpeg) ![]()

* [Introduction](#section-one)
* [Human Development Groups](#section-two)
* [The ten richest and ten poorest countries](#section-three)
* [Comparison](#section-four)
* [Conclusion](#section-five)

<a id="section-one"></a>
# Introduction

In [None]:
#reading the data.
dfdevelopment = pd.read_csv("../input/human-development-index-dataset/Human Development Index - Full.csv")

In [None]:
#Getting an overview of the data. 
dfdevelopment.head()

In [None]:
#Getting an overview of the missing value's
dfdevelopment[dfdevelopment.isna().any(axis=1)]

There are 171 rows with missing values, but there are 880 columns.That means cleaning this up will cost a lot of time and not the results .I will check from graph to graph if there are values I can't use.

<a id="section-two"></a>
# Human Development Groups

The dataset already divided the countries in different levels of development.

In [None]:
#Let's get a better overview ofthe information given in the column Human Development Groups
v = dfdevelopment["Human Development Groups"].unique()

print(f"The dataset divided the countries into {len(v)} groups in terms of Human development.")
print(f"The different groups are: {v[0], v[1], v[2], v[3], v[4]}")


In [None]:
#Let's see how many nan values there are in the "Human Development Groups" column and which countries these are
check_nan = dfdevelopment[dfdevelopment["Human Development Groups"].isnull()]
print("There is no information available for the following countries on this subject.")
print(check_nan["Country"])

We are missing information for four countries. In this case I don't consider that as a problem.
Plotly will handle this and just leave the countries blank on the choropleth. 
Now I am going to plot this information on the world map with Plotly. 

In [None]:
#Plotting a graph with Plotly
fig1 = px.choropleth(dfdevelopment, locations="ISO3",
                     title="Human development 2021.",
                     color="Human Development Groups",
                     hover_name="Country",
                     hover_data =["Life Expectancy at Birth (2021)",'Gross National Income Per Capita (2021)','Mean Years of Schooling (2021)'],
                     color_discrete_map={"Very High": "green","High":'yellow',"Medium":'orange',"Low":'red'},
                     projection="natural earth")


fig1.show()

The least developed countries are almost all in Africa and the Middle East.

<a id="section-three"></a>
# The ten richest and the ten poorest countries

The gross national income(GNI) per capita is an indicator of income developed by the World Bank. The GNI per capita is the dollar value of a country's final income in a year, divided by its population. It should be reflecting the average before tax income of one country's citizens.

Let's find out which ten countries have the highest GNI per capita, and which ten countries have the lowest GNI per capita.

In [None]:
#First let's check if there are countries with NaN values for the GNI in 2021.
check_nan = dfdevelopment[dfdevelopment["Gross National Income Per Capita (2021)"].isnull()]
check_nan

There are two countries with a lot of missing values. They are also lacking information for the column: "Gross National Income Per Capita (2021)". This can be annoying for my next research. Because they will probably end up on top or at the bottom of the list after I sort the values. That means that they will count as a country with the highest or as a country with the lowest GNI. I am going to delete these rows.

In [None]:
#Deleting the unwanted rows, by using the indexnumber as visible above.
dfdevelopment = dfdevelopment.drop(labels=[108, 142], axis=0)

In [None]:
#Selecting the right data.
gni_2021 = dfdevelopment.sort_values('Gross National Income Per Capita (2021)')
#The ten countries with the lowest GNI.
low_gni = gni_2021.head(10)
#The then countries with the highest GNI
high_gni = gni_2021.tail(10)

In [None]:
#Plotting two graphs to get an overview of the GNI for the ten poorest countries and the ten richest countries.
fig2 = px.bar(low_gni, x="Country", y='Gross National Income Per Capita (2021)', 
       labels={'Gross National Income Per Capita (2021)':"GNI per capita($)", "Country": "Country"},
       title="Countries with the lowest GNI in 2021.")
fig2.show()
    
fig3 = px.bar(high_gni, x="Country", y='Gross National Income Per Capita (2021)', 
       labels={'Gross National Income Per Capita (2021)':"GNI per capita($)", "Country": "Country"},
       title="Countries with the highest GNI in 2021.")
fig3.show()

<a id="section-four"></a>
# Comparison

Now I will compare the ten countries with the highest GNI with the ten countries with the lowest GNI.

In [None]:
#First I have to merge the two DataFrame's, I made earlier, while plotting the barplots above.
frames = [low_gni, high_gni]
result = pd.concat(frames)

In [None]:
#Now I make a list so I can make many scatterplots and only write the code once. 
#By adding a new value to this list my code beneath will automatically, without any adjusment make a new scatterplot.(after running)
#In this scatterplot displays the relationship between a value from this list and the GNI,in this case.
#But you can easily adjust it with your own values.
l = ['Life Expectancy at Birth (2021)',
'Mean Years of Schooling (2021)',
'Maternal Mortality Ratio (deaths per 100,000 live births) (2021)',
'Adolescent Birth Rate (births per 1,000 women ages 15-19) (2021)',
'Material footprint per capita (tonnes) (2021)',
'Carbon dioxide emissions per capita (production) (tonnes) (2021)']

In [None]:
#Plotting the scatterplots.
for i in range(len(l)):
    fig4 = px.scatter(result, x="Gross National Income Per Capita (2021)", y=l[i], color='Human Development Groups', 
        title=l[i],
        hover_name="Country",
        labels={l[2]: "deaths per 100,000 live births (2021)",l[3]:"births per 1,000 women ages 15-19 (2021)",
               l[5]:"Carbon dioxide emissions per capita(tonnes) (2021)"},
        color_discrete_map={"Very High": "green","High":'yellow',"Medium":'orange',"Low":'red'})
        
        
    #customizing the scatter plot
    fig4.update_traces(marker=dict(size=12,
                                  line=dict(width=2,
                                            color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    fig4.update_layout(showlegend=True)

    fig4.show()

<a id="section-five"></a>
# Conclusion

The conclusion we can make is that on-the-one hand, people who live in the ten poorest countries live shorter, tend to go to school shorter, have babies at an earlier age and die more often during the birth of their child.
On-the-other hand they have a lifestyle that is way less polluting the earth.