In [None]:
# I - Libraries
# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
import squarify 
import pandas as pd
import geopandas as gpd

# II- Download the reference tables
lands = pd.read_csv('/home/hmdg/Documents/Data_Science/Repos/Lands/data/ldown_c.csv')
countries_ref = pd.read_csv('/home/hmdg/Documents/Data_Science/Repos/Lands/data/countries_ref.csv')
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# III Merging the dfs
# It's neccessary to merge the databases of lands(lands types) and world (geographic reference), 
# using countries_ref as a reference table, to obtain the main df with all the necessary data.

##########################################################
# The study is done just whith the data of the 2016 year #
##########################################################
lands = lands[lands.year==2016]

# Merge the three tables:
l_c = countries_ref.merge(lands,left_on='name_l', right_on='country')
w_l_c = world.merge(l_c,
                    left_on = world['name'].str.strip().str.lower(), 
                    right_on=l_c['name_w'].str.strip().str.lower()).drop(columns=['name', 'key_0'], axis=1)


# Replace the string of types in order of simplify their names
lrep = {'Land area (thousand hectares)':'LA',
        'Arable land (thousand hectares)': 'AL', 'Arable land (% of total land area)':'AL_%',
        'Forest cover (thousand hectares)':'FC', 'Forest cover (% of total land area)':'FC_%',
        'Permanent crops (thousand hectares)':'PC','Permanent crops (% of total land area)':'PC_%',
        'Important sites for terrestrial biodiversity protected (% of total sites protected)':'IB%'}

# Final df with al the data            
w_l_c['type'] = w_l_c['type'].map(lrep) 

# IV --- Adding the relative values - percentage - of every type of land.
# It's needed the data in absolutes values of the 'Important sites for terrestrial biodiversity protected'.
# in order to obtain the general percentage of this indicator for the world.
# That is the only indicator that is comming just in percentage.


# Create the df with the total percentage of all the indicators(LA, AL, PC, FC and IB)
IB_abs = w_l_c[['name_w', 'value', 'type']]
IB_abs = IB_abs.pivot(columns='type', index='name_w')
IB_abs.columns= IB_abs.columns.droplevel()
IB_abs['IB'] = 0.01 * IB_abs['IB%']*IB_abs['LA']
IB_abs = IB_abs[['LA', 'AL', 'PC', 'FC', 'IB']].sum()
IB_abs = pd.DataFrame(IB_abs)
IB_abs['perc'] = 100 * IB_abs[0].div(IB_abs.iloc[0].item())
IB_abs = IB_abs.reset_index()


# Function to plot the percentage distribution
%matplotlib inline

def sq_dist(df):
    fig = plt.figure(figsize=(7, 5)) 
    df = df.loc[1:]
    size = df['perc'].to_list()
    labels = df.apply(lambda x: str(x['type']) + "\n (" + str(round(x['perc'],1)) + "%)", axis=1)


    colors = ['lightsalmon', 'indianred', 'lightblue', 'cornflowerblue']

    ax = squarify.plot(sizes=size, 
                       alpha=0.8, 
                       color=colors, 
                       label=labels,
                       text_kwargs={'fontsize':13, 'family':'sans-serif', 'style':'italic', 'weight':'bold',
                          'multialignment':'center'},
                       bar_kwargs=dict(linewidth=1.2, edgecolor="white"))

    plt.title('World Land Distribution')
    plt.axis('off')
    plt.tight_layout()
    plt.savefig('/home/hmdg/Documents/Data_Science/Repos/Lands/images/squari_dist_old.png', bbox_inches='tight')
    return plt.show()

sq_dist(IB_abs)

