<a href="https://colab.research.google.com/github/estebanlecalvez/training-datamining-mds/blob/master/20200424_12_Tokyo_final2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Objectifs**

*   Faire un graphique du top 10 des pays les plus grand
*   Puis un top 10 des pays avec le plus d'habitant au mètre carré


**Import**

In [0]:
import pandas as pd
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
import plotly.offline as ofli
from IPython.display import display
import ipywidgets as widgets

**Authentification**

**Source : Kaggle =>** https://www.kaggle.com/sudalairajkumar/undata-country-profiles/data#country_profile_variables.csv

From SRK, last updated in 2018

In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
downloaded = drive.CreateFile({'id':"1f-yl7fIkJYyTwJRRqeza5kWompmClYNE"})
downloaded.GetContentFile('country_profile_variables.csv')

In [0]:
data = pd.read_csv("country_profile_variables.csv")
data

**Filtrage des données**

In [0]:
data = pd.read_csv("country_profile_variables.csv", usecols=["country", "Surface area (km2)", "Population density (per km2, 2017)"])
# for index, row in data.iterrows():
#   print(row["Surface area (km2)"])

# Suppression de la ligne Holy see inutile
data = data.loc[data['country'] != 'Holy See']

# Tri de la surface des pays
data['Surface area (km2)'] = pd.to_numeric(data['Surface area (km2)'])
data['Population density (per km2, 2017)'] = pd.to_numeric(data['Population density (per km2, 2017)'])
data.rename(columns = {'Surface area (km2)':'SURFACE', 'Population density (per km2, 2017)':'DENSITY', 
                              'country':'COUNTRY'}, inplace = True) 

# Suppression de tout sauf les 10 pays ayant le plus de surface
data

**Affichage graphique** : Séparation de data en deux tableaux (un densité et un surface)

In [0]:
#Tri de deux tableaux (surface / densite au km²)
limit = 10
ordered_densite = data
ordered_densite = ordered_densite.sort_values(by="DENSITY",ascending=True)
ordered_densite = ordered_densite.tail(limit)
ordered_surface = data
ordered_surface = ordered_surface.sort_values(by="SURFACE",ascending=True)
ordered_surface = ordered_surface.tail(limit)
ordered_surface

**Affichage graphique :** On crée 2 nouveaux tableaux pour les titres qui seront affichés sur les barres

In [0]:
ordered_surface_titles = []
ordered_densite_titles = []
# Afin d'afficher un joli titre, on est recrée un tableau de chaines de caractères
# dans avec les nombres avec des espaces + km²
for surface in ordered_surface.iterrows():
  adding_commas=format(surface[1][1], "8,d")
  replacing_with_spaces=adding_commas.replace(",", " ")
  ordered_surface_titles.append(replacing_with_spaces+" km²")

# On fait pareil pour la densité
for density in ordered_densite.iterrows():
  adding_commas=format(int(density[1][2]), "8,d")
  replacing_with_spaces=adding_commas.replace(",", " ")
  ordered_densite_titles.append(replacing_with_spaces+" h/km²")

bars=[
  go.Bar(text=ordered_surface_titles,
         textposition='auto',
         name="Surface", 
         showlegend=True,
         y=ordered_surface["COUNTRY"]+"  ", 
         x=ordered_surface["SURFACE"],
         orientation='h', 
         marker_color='rgb(25, 84, 5)',
         marker_line_color='rgb(50, 84, 50)'
        ),
  go.Bar(text=ordered_densite_titles,
            textposition='auto',
         name="Densité", 
         showlegend=True,
         y=ordered_densite["COUNTRY"]+"  ",
         x=ordered_densite["DENSITY"],
         orientation='h',
         visible=False, 
         marker_color='rgb(255, 195, 118)',
         marker_line_color='rgb(170, 68, 1)'
         ),
]
layout = go.Layout(
    plot_bgcolor='rgb(255, 255, 255)',
  title='Top '+str(limit)+' biggest Countries',
  updatemenus=list([
    dict(
         showactive=True, 
         direction = "left",
         type="buttons", 
         active=0, 
         buttons=[
          {'label': 'Surface', 'method': 'update', 'args': [{'visible': [True, False]}]},
          {'label': 'Densite', 'method': 'update', 'args': [{'visible': [False, True]}]},
        ],
        pad={"r": 10, "t": 10},
        x=0.987,
        xanchor="left",
        y=1.2,
        yanchor="top"
    )
  ]),
  annotations=[dict(
    text = 'Updated in 2018 from The World Bank',
    showarrow = False,
    xref = 'paper', x = 0.95,
    yref = 'paper', y = -0.15),]
)


fig =go.Figure(bars, layout)
fig.update_layout(height=limit*60)
fig.show()



**Bouton de téléchargement**

In [0]:
def clicked(arg):
  ofli.iplot(fig,image='png',image_width=1920, image_height=limit*60, filename='Top '+str(limit)+' biggest Countries')

button_download = widgets.Button(description = 'Export Graph as png')
button_download.on_click(clicked)
display(button_download)