<a href="https://colab.research.google.com/github/AntoineLgx/training-datamining-mds/blob/master/2020424_14_NAIROBI_final2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Issues : 
https://github.com/CashStory/training-datamining-mds/issues/14

Pitch : 
https://drive.google.com/file/d/1AISsc-lc4-94Dj7LOQKBj3L7VgChkejG/view

*** Imports :**

In [26]:
import math
import pandas as pd
from datetime import datetime
from plotly.offline import iplot, plot, download_plotlyjs, init_notebook_mode
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


*** Variables :**

In [0]:
URL = "https://www.ilo.org/ilostat-files/Documents/Excel/MBI_33_EN.xlsx"

SELECTED_YEAR = 2019

*** Retrieving data :**

In [0]:
retrieved_data = pd.read_excel(URL, sep='\t', parse_dates=[0],
                     names=['COUNTRY','b','GENDER','YEAR','e','AGRICULTURE - FORESTRY - FISHING','MINING - QUARRYING','MANUFACTURING','UTILITIES','CONSTRUCTION',
                            'WHOLESALE - RETAIL - REPAIR VEHICLES','TRANSPORT - STORAGE - COMMUNICATION','ACCOMODATION - FOOD SERVICES','FINANCE - INSURANCE',
                            'REAL ESTATE - BUSINESS - ADMINISTRATION','PUBLIC ADMINISTRATION - DEFENCE - SOCIAL SECURITY','EDUCATION','HUMAN HEALTH - SOCIAL WORK',
                            'OTHER SERVICES','t','u','v','w','x','y','z','aa','ab','ac','ad','ae','af','ag','ah'])



*** Data selection :**

In [29]:
# Select only rows with data
data = retrieved_data.drop([0, 1, 2, 3, 4], axis = 0)

# Select rows we want ("Total" of gender and no male nor female)
data = data[data['GENDER'] == "Total"]

# Select by year
data = data[data['YEAR'] == SELECTED_YEAR]

# Select only columns with interest
df = data[['COUNTRY', 'AGRICULTURE - FORESTRY - FISHING','MINING - QUARRYING','MANUFACTURING','UTILITIES','CONSTRUCTION',
              'WHOLESALE - RETAIL - REPAIR VEHICLES','TRANSPORT - STORAGE - COMMUNICATION','ACCOMODATION - FOOD SERVICES','FINANCE - INSURANCE',
              'REAL ESTATE - BUSINESS - ADMINISTRATION','PUBLIC ADMINISTRATION - DEFENCE - SOCIAL SECURITY','EDUCATION','HUMAN HEALTH - SOCIAL WORK',
              'OTHER SERVICES']]
               
df = df.set_index('COUNTRY')
df

Unnamed: 0_level_0,AGRICULTURE - FORESTRY - FISHING,MINING - QUARRYING,MANUFACTURING,UTILITIES,CONSTRUCTION,WHOLESALE - RETAIL - REPAIR VEHICLES,TRANSPORT - STORAGE - COMMUNICATION,ACCOMODATION - FOOD SERVICES,FINANCE - INSURANCE,REAL ESTATE - BUSINESS - ADMINISTRATION,PUBLIC ADMINISTRATION - DEFENCE - SOCIAL SECURITY,EDUCATION,HUMAN HEALTH - SOCIAL WORK,OTHER SERVICES
COUNTRY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
World,26.84,0.65,13.97,0.8,7.64,14.5,6.08,4.24,1.57,4.62,4.36,5.28,4.08,5.38
World: Low income,60.39,1.46,5.56,0.27,3.4,12.16,2.66,1.7,0.32,1.11,1.82,2.46,1.25,5.46
World: Lower-middle income,37.7,0.62,12.47,0.59,9.03,14.16,6.04,2.81,1.01,2.38,2.73,4.37,1.55,4.57
World: Upper-middle income,20.88,0.58,17.49,0.92,7.25,15.65,5.71,5.35,1.44,4.48,5.71,5.49,3.3,5.75
World: High income,2.78,0.48,13.2,1.19,7.89,13.77,8.62,5.81,3.55,10.96,5.73,7.94,12.01,6.07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Viet Nam,37.36,0.37,18.42,0.51,8.34,12.84,3.99,4.96,0.76,1.65,3.23,3.96,1.04,2.58
Western Sahara,20.16,0.41,12.6,0.86,11.68,14.76,6.19,2.58,0.94,2.25,11.6,8.92,3.42,3.61
Yemen,28.98,0.17,4.63,0.15,5.11,25.41,9.49,2.04,0.28,0.51,14.45,5.49,1.43,1.87
Zambia,48.84,1.87,4.29,0.51,4.17,13.71,3.89,1.45,0.29,1.94,1.19,3.2,1.44,13.21


*** Data formatting :**

In [30]:
# Initialization of variables and functions

SECTORS = []
for col in df.columns :
  SECTORS.append(col)

indexstock = ['World', "France"]

#***********************************************************************************
# Initialization of graphs

fig = make_subplots(rows=2, cols=2, 
                    specs=[[{'type': 'polar'}, {'type': 'polar'}],[{"colspan": 2}, None]],
                    shared_xaxes=True, shared_yaxes=False, row_heights=[0.3, 0.7],
                    vertical_spacing=0.11, horizontal_spacing=0.15)

#***********************************************************************************
# Add polar graphs

fig.add_trace(go.Scatterpolar(r = df.loc[indexstock[0]], theta = SECTORS, fill = 'toself', name = indexstock[0], 
                                  marker_color='mediumpurple'), row = 1, col = 1)
fig.add_trace(go.Scatterpolar(r = df.loc[indexstock[1]], theta = SECTORS, fill = 'toself', name = indexstock[1], 
                                  marker_color='indianred') ,row = 1, col = 2)

#***********************************************************************************
# Horizontal group bar graph 

# print(df.loc['World'])
fig.add_trace(go.Bar(x=df.loc[indexstock[0]], y=SECTORS, orientation='h', name=indexstock[0], text=df.loc[indexstock[0]], 
                     textposition='auto', marker_color='mediumpurple'), row = 2, col = 1)
fig.add_trace(go.Bar(x=df.loc[indexstock[1]], y=SECTORS, orientation='h', name=indexstock[1], text=df.loc[indexstock[1]], 
                     textposition='auto', marker_color='indianred'), row = 2, col = 1)

#***********************************************************************************
# Setting layout

fig.update_layout(title_text="Differences of repartition of employement by country and sector au " + str(datetime.today()) + " (en %)",
                  title_x=0.5, width=1600, height=1000, 
                  showlegend=True, legend=dict(x=-.2, y=0.8),
                  polar=dict(radialaxis=dict(visible=True)),
                  barmode='group')

#***********************************************************************************
# Creationg buttons

buttons_country_1 = []
for index in df.index:
  buttons_country_1.append(dict(method='restyle', label=index, args=[{'r':[df.loc[index]], 'x':[df.loc[index]], 'name':[index, index], 'text':[df.loc[index]]}, [0, 2]]))
buttons_country_2 = []
for index in df.index:
  buttons_country_2.append(dict(method='restyle', label=index, args=[{'r':[df.loc[index]], 'x':[df.loc[index]], 'name':[index, index], 'text':[df.loc[index]]}, [1, 3]]))

fig.update_layout(updatemenus=[dict(buttons=buttons_country_1, direction="down", pad={"r": 1, "t": 1}, showactive=True, x=0.04, xanchor="left", y=0.69, yanchor="top"),
                              dict(buttons=buttons_country_2, direction="down", pad={"r": 1, "t": 1}, showactive=True, x=0.6, xanchor="left", y=0.69, yanchor="top")])

#***********************************************************************************
# Display graph

fig.show()

#***********************************************************************************
# Export as HTML file

Tickets_plot = fig
plot(Tickets_plot, filename="/content/gdrive/My Drive/datamining/employement_by_sector_and_country.html", auto_open=False)



'/content/gdrive/My Drive/datamining/employement_by_sector_and_country.html'