# Global Energy Consumption Analytics



<div class=h1> Introduction </div>

Global energy consumption is the amount of power used providing by different type of non-renewable and renewable energy source.

World energy supply and consumption is global production and preparation of fuel, power generation, energy transport and energy consumption. Many countries need energy supply and consumption for its developpment, for its economic and also for its industrialization. 

Energy consumption for some countries in the world is very high than others countries. For example, China consumes energy 1.67 time than USA, 2 times  than Canada and most 5 times than the rest of the world. 

Several developping countries uses renewable energy like Hydo, Biofuel etc... that is good for climate change.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels as sm
from statsmodels.tsa import seasonal
import scipy 
import path
from sklearn.decomposition import FactorAnalysis, PCA 
from sklearn.preprocessing import MinMaxScaler, RobustScaler, MaxAbsScaler, Normalizer 
from sklearn.pipeline import Pipeline
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram
from scipy.cluster.hierarchy import ward, dendrogram, complete, single, average
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from ipywidgets import widgets, VBox
from warnings import filterwarnings 

In [None]:
sns.set('poster')
plt.style.use('seaborn-dark-palette')
filterwarnings('ignore')
plt.rcParams['axes.edgecolor'] = 'black'
plt.rcParams['axes.labelcolor'] = 'black'
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlecolor'] ='black'
plt.rcParams['axes.titleweight'] = 'bold'
plt.rcParams['figure.autolayout'] = True
plt.rcParams['figure.edgecolor'] = 'black'
plt.rcParams['figure.frameon'] = True
plt.rcParams['font.family'] ='DejaVu Sans'
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['legend.edgecolor'] = 'black'
plt.rcParams['legend.fancybox'] = True
plt.rcParams['legend.frameon'] = True
plt.rcParams['legend.shadow'] = True

In [None]:
file1 = '../input/renewable-power-generation/Continent_Consumption_TWH.csv'
file2 = '../input/renewable-power-generation/Country_Consumption_TWH.csv'
file3 = '../input/renewable-power-generation/nonRenewablesTotalPowerGeneration.csv'
file4 = '../input/renewable-power-generation/renewablePowerGeneration97-17.csv'
file5 = '../input/renewable-power-generation/renewablesTotalPowerGeneration.csv'
file6 = '../input/renewable-power-generation/top20CountriesPowerGeneration.csv'

<div class=h1> I. Continent energy consumption </div>

**Energy consumption** is the amount of energy or power used. In this dataset, we have four organizations **OECD**, **BRICS**, **CIS** and **Middle-East**. Also, it contains 6 continents.  

In [None]:
energ_cont_conps = pd.read_csv('Continent_Consumption_TWH.csv')

In [None]:
energ_cont_conps.head()

In [None]:
energ_cont_conps.info()

In [None]:
continent_name = list(set(energ_cont_conps.columns) - set(['World','OECD','CIS','Middle-East',
                                                           'BRICS', 'Year']))
organization_name = ['OECD','CIS','Middle-East','BRICS']

<div class=h2> I.1. World energy consumption </div>
Let's see how energy consumption evolves in the world. 

In [None]:
energ_cont_conps.set_index('Year', inplace=True)

In [None]:
g=energ_cont_conps.World.plot(figsize=(15,5), title='World energy consumption',legend=True)
g.text(2008, 115000, 'Abrupt\nchange caused by\n economic crisis 2008-2009.', 
       bbox=dict(facecolor='red', alpha=0.85))
g.text(2018, 153000, 'Covid19 pandemic', bbox=dict(facecolor='red', alpha=0.85))

plt.ylabel('TWh')
plt.show()

Energy consumption in the world increases each year. But, we see two abrupt changes at near 2009 (economic crisis) and 2020 (Covid19 pandemic). The distance between this two abrupt changes is of 11 years.

In [None]:
world_eccr = energ_cont_conps.World.pct_change().mul(100)
#100*(energ_cont_conps.World.diff(1)/energ_cont_conps.World)

In [None]:
gg=world_eccr.plot(figsize=(15,5), title='Change rate of the world\n energy consumption.')
gg.text(1998, -4, 'negative change rate caused\nby economic crisis 2008-2009.', 
        bbox=dict(facecolor='yellow', alpha=0.85))
gg.text(2014.5, 3, 'Covid19 falls the world\nenergy consumption\nto -4%.',
        bbox=dict(facecolor='yellow'))
plt.ylabel('%')
plt.show()

In [None]:
decompose_wec = seasonal.seasonal_decompose(energ_cont_conps.World, period=10)

In [None]:
_, axc = plt.subplots(3, 1, figsize=(15, 15), sharex=True) 
plt.suptitle('World energy consumption time series decomposition', fontsize=30)
decompose_wec.observed.plot(ax=axc[0], title='Observed')
decompose_wec.trend.plot(ax=axc[1], title='Trend')
decompose_wec.seasonal.plot(ax=axc[2], title='Seasonal')
axc[0].set_ylabel('TWh')
plt.show()

<div class=h2> I.2. Organization energy consumption </div>

Each country in the world is in some organization like OECD, BRICS, CIS and Middle-East. For example, BRICS is an organization that have as country Brazil, Russia, India, Chine and South-Africa.

In [None]:
organisation = energ_cont_conps[organization_name]

In [None]:
org = organisation.plot(figsize=(18,12))
plt.ylabel('TWh')
org.text(2020.5, 60000, 'OECD < BRICS.\nCovid19 pandemic has more affected\nOECD than BRICS.', 
         bbox=dict(facecolor='red'), fontsize=11)
org.text(2000, 19000, 
'Huge gap between (OECD, BRICS)\nand (CIS, Midlle-East)\nBRICS increases considerably its energy consumption.',
         bbox=dict(facecolor='yellow'), fontsize=11)
org.text(2002, 55000, 'OECD was affected by\n economic crisis 2008-2009.',
         bbox=dict(facecolor='red'), fontsize=11)
plt.vlines(2015, 11000, 62000, linestyles='dashed')
plt.vlines(2009, 46000, 62000, linestyles='dashed')
plt.title('Organization energy consumption', fontsize=30)
plt.show()

In [None]:
organisation.corr().style.background_gradient('PuOr')

BRICS and Middle-East are similar that means Middle-East and BRICS have same energy consumption (China buys Oil from Saudi Arabia, Turkey buys gazprom from Russia). OECD and CIS are opposite in energy consumption.

In [None]:
plt.figure(figsize=(15, 5))
sns.regplot(x='BRICS', y='Middle-East', data=organisation)
plt.text(40000, 8000, 'If BRICS enery consumption is up\nthen for Middle-East is also up.', 
         bbox=dict(facecolor='yellow'), fontsize=12)
plt.title('Energy consumption between BRICS and Middle-East')
plt.show()

In [None]:
plt.figure(figsize=(15, 5))
sns.regplot(x='OECD', y='CIS', data=organisation, order=3)
plt.text(59000, 15000, 'If OECD enery consumption is up\nthen for  CIS  is down.', 
         bbox=dict(facecolor='yellow'), fontsize=12)
plt.title('Energy consumption between OECD and CIS')
plt.show()

In [None]:
fig = plt.figure(figsize=(14, 10))
fig.subplots_adjust(wspace=0.5, hspace=0.5)
d_org = organisation[organisation.index.isin([2019, 2020])]
for i, u in enumerate(organization_name):
    ax = fig.add_subplot(2, 2, i+1)
    d_org[u].plot(kind='bar', title=f'{u}.', ax=ax, sharex=True, edgecolor='black')
    ax.set_ylabel('TWh')
    for i, a in enumerate(d_org[u].tolist()):
        ax.text(i, a, str(round(a, 2)), ha='center', bbox=dict(facecolor='yellow'), fontsize=13)
    #ax.text(2019, 40000, str(cr))
plt.suptitle('Energy consumption for year 2019 and 2020', fontsize=25)
plt.show()

In [None]:
crg = d_org.pct_change().mul(100).iloc[-1]
crg.plot.bar(title='Impact of Covid19 pandemic on Energy consumption.', figsize=(15,7), edgecolor='black')
plt.ylabel('Change rate (%)')
for i, a in enumerate(crg):
    plt.text(i, a, str(round(a,2)), ha='center', bbox=dict(facecolor='y'), fontsize=15)
plt.show()

In [None]:
fig_ = plt.figure(figsize=(14, 10))
fig_.subplots_adjust(wspace=0.5, hspace=0.5)
d_org_ = organisation[organisation.index.isin([2008, 2009])]
for i, u in enumerate(organization_name):
    ax = fig_.add_subplot(2, 2, i+1)
    d_org_[u].plot(kind='bar', title=f'{u}.', ax=ax, sharex=True, edgecolor='black')
    ax.set_ylabel('TWh')
    for i, a in enumerate(d_org_[u].tolist()):
        ax.text(i, a, str(round(a, 2)), ha='center', bbox=dict(facecolor='yellow'), fontsize=13)
    #ax.text(2019, 40000, str(cr))
plt.suptitle('Energy consumption for year 2008 and 2009.', fontsize=25)
plt.show()

In [None]:
crg_ = d_org_.pct_change().mul(100).iloc[-1]
crg_.plot.bar(title='Impact of Economic crisis 2008-2009 on Energy consumption.',
              figsize=(15,7), edgecolor='black')
plt.ylabel('Change rate (%)')
for i, a in enumerate(crg_):
    plt.text(i, a, str(round(a,2)), ha='center', bbox=dict(facecolor='y'), fontsize=15)
plt.show()

This chart shows clearly the impact of economic crisis 2008-2009 on OECD and CIS. But we see BRICS and Middle-East have good performance.

<div class=h2> I.3. Continent </div>

In [None]:
continent = energ_cont_conps[continent_name]

In [None]:
continent.plot(figsize=(15,12), title='Energy consumption in each continent.')
plt.text(1990, 45000, 'Big gap between Asia and North America continent\nand also other continent.', 
         bbox=dict(facecolor='green', alpha=0.75), fontsize=14)
plt.ylabel('TWh')
plt.show()

Let's make correlation. 

In [None]:
continent.corr().style.background_gradient('Reds')

In [None]:
from scipy import stats, linalg

def partial_corr(C):
    """
    Returns the sample linear partial correlation coefficients between pairs of variables in C, controlling 
    for the remaining variables in C.
    Parameters
    ----------
    C : array-like, shape (n, p)
        Array with the different variables. Each column of C is taken as a variable
    Returns
    -------
    P : array-like, shape (p, p)
    P[i, j] contains the partial correlation of C[:, i] and C[:, j] controlling
        for the remaining variables in C.
    """
    
    C = np.asarray(C)
    p = C.shape[1]
    P_corr = np.zeros((p, p), dtype=np.float)
    for i in range(p):
        P_corr[i, i] = 1
        for j in range(i+1, p):
            idx = np.ones(p, dtype=np.bool)
            idx[i] = False
            idx[j] = False
            beta_i = linalg.lstsq(C[:, idx], C[:, j])[0]
            beta_j = linalg.lstsq(C[:, idx], C[:, i])[0]

            res_j = C[:, j] - C[:, idx].dot( beta_i)
            res_i = C[:, i] - C[:, idx].dot(beta_j)

            corr = stats.pearsonr(res_i, res_j)[0]
            P_corr[i, j] = corr
            P_corr[j, i] = corr
            
        return P_corr

In [None]:
corrpartial = pd.DataFrame(partial_corr(continent[sorted(continent.columns.tolist())]),
                           columns=sorted(continent.columns.tolist()), 
                            index=sorted(continent.columns.tolist()))
corrpartial.style.background_gradient('viridis')

In [None]:
plt.figure(figsize=(15,5))
sns.regplot(x='Africa', y='Asia', data=continent, robust=True)
plt.title('Energy consumption (TWh)  Africa-Asia.')
plt.text(7000, 30000, 'China vs Africa.', bbox=dict(facecolor='red'))
plt.show()

In [None]:
fig1 = plt.figure(figsize=(14, 10))
fig1.subplots_adjust(wspace=0.5, hspace=0.5)
d_cont = continent[continent.index.isin([2019, 2020])]
for i, u in enumerate(continent_name):
    ax = fig1.add_subplot(2, 3, i+1)
    d_cont[u].plot(kind='bar', title=f'{u}', ax=ax, sharex=True, edgecolor='black')
    ax.set_ylabel('TWh')
    for i, a in enumerate(d_cont[u].tolist()):
        ax.text(i, a, str(round(a, 2)), ha='center', bbox=dict(facecolor='yellow'), fontsize=14)
    #ax.text(2019, 40000, str(cr))
plt.suptitle('Energy consumption for year 2019 and 2020', fontsize=25)
plt.show()

In [None]:
cr = d_cont.pct_change().mul(100).iloc[-1]
cr.plot.bar(title='Impact of Covid19 pandemic on Energy consumption.', figsize=(15,7), edgecolor='black')
plt.ylabel('Change rate (%)')
for i, a in enumerate(cr):
    plt.text(i, a, str(round(a,2)), ha='center', bbox=dict(facecolor='y'))
plt.show()

Covid19 pandemic impacts considerably energy consumption of the six continents. Asia with china looses only -0.47% of its energy consumption against -6.74% for Europe and -7.48% for North America.

In [None]:
fig1_ = plt.figure(figsize=(14, 10))
fig1_.subplots_adjust(wspace=0.5, hspace=0.5)
d_cont_ = continent[continent.index.isin([2008, 2009])]
for i, u in enumerate(continent_name):
    ax = fig1_.add_subplot(2, 3, i+1)
    d_cont_[u].plot(kind='bar', title=f'{u}.', ax=ax, sharex=True, edgecolor='black')
    ax.set_ylabel('TWh')
    for i, a in enumerate(d_cont_[u].tolist()):
        ax.text(i, a, str(round(a, 2)), ha='center', bbox=dict(facecolor='yellow'), fontsize=12)
plt.suptitle('Energy consumption for year 2008-2009', fontsize=25)
plt.show()

In [None]:
cr_ = d_cont_.pct_change().mul(100).iloc[-1]
cr_.plot.bar(title='Impact of Economic crisis 2008-2009 on Energy consumption.', 
             figsize=(15,7), edgecolor='black')
plt.ylabel('Change rate (%)')
for i, a in enumerate(cr_):
    plt.text(i, a, str(round(a,2)), ha='center', bbox=dict(facecolor='y'))
plt.show()

<div class=h1> II. Country energy consumption </div>
In this section, we study energy consumption in each country and see how each country improves its power used.

In [None]:
country = pd.read_csv('Country_Consumption_TWH.csv')
country.set_index('Year',inplace=True)

In [None]:
country.head()

In [None]:
country.info()

This datasets have 44 countries.

In [None]:
europe = ['Belgium', 'Czechia','France','Germany','Italy','Netherlands','Poland','Portugal',
         'Romania','Spain', 'Sweden','Norway','Turkey', 'Kazakhstan','Russia','Ukraine','United Kingdom',
         'Uzbekistan']
asia = ['China','Indonesia','Japan','Malaysia','South Korea','Taiwan','Thailand','India','Iran',
       'Kuwait','Saudi Arabia','United Arab Emirates']
africa = ['Algeria','Egypt','Nigeria','South Africa']
oceania = ['New Zealand', 'Australia']
north_america = ['United States','Canada']
latin_america = list(set(country.columns) - set(europe+asia+oceania+north_america+africa))

<div class=h2> II.1. Energy consumption visualization by region </div>

### Europe

In [None]:
country[europe].plot(subplots=True, figsize=(18, 18), layout=(6,3), sharex=True)
plt.show()

### Asia

In [None]:
country[asia].plot(subplots=True, figsize=(20, 15), layout=(4,3), sharex=True)
plt.show()

### Latina and North America

In [None]:
country[latin_america+north_america].plot(subplots=True, layout=(2, 4), figsize=(20, 10))
plt.show()

Covid19 pandemic impacts energy production.

### Africa and Oceania

In [None]:
country[africa+oceania].plot(subplots=True, layout=(2,3), figsize=(15, 10))
plt.show()

<div class=h2>II.2. The impact of Covid19 pandemic on Energy consumption.</div>
    
We are going to study the impact of Covid19 on energy consumption for each country. To do so, we compute the change rate of energy consumption between 2019 and 2020. After, we compare with the impact of economic crisis 2008-2009 on energy consumption. Let's go.

In [None]:
impact_2020 = country[country.index.isin([2019, 2020])]
impact_2008 = country[country.index.isin([2008, 2009])]

In [None]:
lost_ec_2020 = impact_2020.pct_change().mul(100).iloc[1]
#100*(impact_2020.diff(1)/impact_2020).iloc[1]

In [None]:
ig = lost_ec_2020.plot.bar(figsize=(18,8), title='Impact of Covid19 pandemic on energy consumption.')
plt.ylabel('Change rate (%)')
for i, u in enumerate(lost_ec_2020):
    ig.text(i, u, str(round(u)), bbox=dict(facecolor='y'), ha='center', fontsize=12)
plt.text(18, -20, 'LOST', bbox=dict(facecolor='red'))
plt.text(18, 3, 'GAIN', bbox=dict(facecolor='g'))
plt.show()

Only just 3 countries have positive change rate China, Nigeria and Kuwait. Only one countries of the BRICS outperforms but all countries in OECD is down.  

In [None]:
#for 2008
lost_ec_2008 = impact_2008.pct_change().mul(100).iloc[1]
#100*(impact_2008.diff(1)/impact_2008).iloc[1]

In [None]:
jg = lost_ec_2008.plot.bar(figsize=(18,8), title='Impact of economic crisis 2008-2009 on energy consumption.')
plt.ylabel('Change rate (%)')
for i, u in enumerate(lost_ec_2008):
    jg.text(i, u, str(round(u)), bbox=dict(facecolor='y'), ha='center', fontsize=12)
plt.text(20, -15, 'LOST', bbox=dict(facecolor='red'))
plt.text(18, 3, 'GAIN', bbox=dict(facecolor='g'))
plt.show()

We have 11 countries that its change rate is positive.

<div class=h1> III. Renewable and Non-Renewable energy </div>

In [None]:
non_renewable = pd.read_csv('nonRenewablesTotalPowerGeneration.csv')
renewable = pd.read_csv('renewablesTotalPowerGeneration.csv')

### Non-Renewable

In [None]:
non_renewable.head()

In [None]:
non_renewable.set_index('Mode of Generation', inplace=True) 

In [None]:
non_renewable.drop(labels='Total', inplace=True)

In [None]:
ng = non_renewable.plot.bar(figsize=(15, 8), edgecolor='black')
plt.title('Type of non-renewable energy source.')
for u, w in enumerate(non_renewable['Contribution (TWh)']):
    ng.text(u, w, str(w), bbox=dict(facecolor='y'), ha='center', fontsize=14)
plt.show()

Coal produces most non-renewable energy than Natural Gas, Nuclear, Oil, etc... 

### Renewable energy

In [None]:
renewable.head()

In [None]:
renewable.set_index('Mode of Generation', inplace=True) 

In [None]:
renewable.drop(labels='Total', inplace=True)

In [None]:
rg = renewable.plot.bar(figsize=(15, 8), edgecolor='black')
plt.title('Type of renewable energy source.')
for u, w in enumerate(renewable['Contribution (TWh)']):
    rg.text(u, w, str(w), bbox=dict(facecolor='y'), fontsize=15, ha='center')
plt.show()

<div class=h1> VI. Top 20 countries power generation </div>

In [None]:
topcountries = pd.read_csv('top20CountriesPowerGeneration.csv')

In [None]:
topcountries.head(20)

In [None]:
topcountries.info()

Top 20 countries use 4 renewables energies. Let's visualize the total renewable energy generated. 

In [None]:
topcountries.describe()

In [None]:
topcountries.drop(columns='Total (TWh)').plot.box(subplots=True, figsize=(15,5),
                                                  title='Type of renewable boxplot')
plt.show()

In [None]:
plt.figure(figsize=(15,5))
sns.histplot(topcountries['Solar PV (TWh)'], bins=15)
plt.title('Solar PV distribution')
plt.show()

In [None]:
gt=topcountries.plot(x='Country', y='Total (TWh)', figsize=(15, 10), kind='bar')
plt.hlines(topcountries['Total (TWh)'].mean(), -1, 20, linestyles='dashed', label='Mean')
plt.title('Total power generation.')
plt.ylabel('TWh')
gt.text(4, 1000, 'Only China, USA, Brazil and Canada have total power generation greater than 250 TWh.',
       bbox=dict(facecolor='yellow'), fontsize=12)
gt.vlines(1, 750, 1819, linestyles='dashdot', label='Gap')
gt.text(1.2, 1500, f'Gap={1061.321} TWh.', bbox=dict(facecolor='red'), alpha=0.5, fontsize=12)
plt.legend(loc='best')
plt.show()

<div class=h2> VI.1. Which countries using which type of renewable energy? </div>

We are going to find which type of renewable energy China use and also the 19 remaining countries. 

In [None]:
data = topcountries.drop(columns=['Total (TWh)']).set_index('Country')

In [None]:
pcapipe = Pipeline([('scaler', MaxAbsScaler()), ('pca', PCA(n_components=0.95, whiten=True, random_state=0))])

In [None]:
X = pcapipe.fit_transform(data)

In [None]:
pca = pcapipe['pca']

In [None]:
components = pd.DataFrame(pca.components_, index=['PC1','PC2','PC3'], columns=data.columns)

In [None]:
components.style.background_gradient('viridis')

In [None]:
plt.matshow(components.values, cmap='viridis')
plt.grid(False)
plt.yticks([0, 1, 2], ["PC1", "PC2", 'PC3'])
#plt.colorbar()
plt.xticks(range(len(components.columns)), components.columns, rotation=60, ha='left')
plt.xlabel("Feature")
plt.ylabel("Principal components")
plt.show()

In [None]:
_, axp1 = plt.subplots(1, 1, figsize=(15, 10), sharex=True)
axp1.scatter(X[:, 0], X[:, 1], label='Figure I')
for i, u in enumerate(data.index.tolist()):
    axp1.annotate(u, (X[i, 0], X[i, 1]), (X[i, 0] + 0.05, X[i, 1] - 0.01), textcoords='offset points')

axp1.set_title('PCA: type of renewable energy source.')
axp1.set_ylabel('PC2')
axp1.set_xlabel('PC1')
axp1.legend(loc='best')
axp1.text(1.5, 0.15, 
"""All values < 0 along PC1 axis are the value where\nits Biofuel(TWh) < 51.15 and Solar PV(TWh) < 19.83.\n
All values < 0 along PC2 axis are the value where\nits Geothermal(TWh) < 2.67.
          """,
         fontsize=15, bbox=dict(facecolor='red'))
plt.show()

In [None]:
_, axp2 = plt.subplots(1, 1, figsize=(15, 10), sharex=True)
axp2.scatter(X[:, 0], X[:, 2], label='Figure II')
for i, u in enumerate(data.index.tolist()):
    axp2.annotate(u, (X[i, 0], X[i, 2]), (X[i, 0] + 0.05, X[i, 1] - 0.01), textcoords='offset points')
axp2.set_title('PCA: type of renewable energy source.')
axp2.set_xlabel('PC1')
axp2.set_ylabel('PC3')
axp2.legend(loc='best')
axp2.text(1.5, 1, 
          'USA is the leader in Geothermal energy.\nChina is the leader in Hydro and moreless Biofuel.',
         fontsize=15, bbox=dict(facecolor='red'))
plt.show()

In [None]:
plt.figure(figsize=(15,10))
plt.scatter(X[:, 1], X[:, 2], label='Score')
for i, u in enumerate(data.index.tolist()):
    plt.annotate(u, (X[i, 1], X[i, 2]), (X[i, 1] + 0.05, X[i, 2] - 0.01), textcoords='offset points')
plt.xlabel('PC2')
plt.ylabel('PC3')
plt.title('Type of renewable energy')
plt.show()

## Clustering



In [None]:
def ward_hierarchical_clustering(feature_matrix):
    cosine_distance = 1 - cosine_similarity(feature_matrix)
    linkage_matrix = ward(cosine_distance)
    #linkage_matrix = single(cosine_distance)
    #linkage_matrix = complete(cosine_distance)
    #linkage_matrix = average(cosine_distance)
    return linkage_matrix

In [None]:
def plot_hierarchical_clusters(linkage_matrix, data, p=100, figure_size=(11,11)):
    # set size
    fig, ax1 = plt.subplots(figsize=figure_size)
    name = data.index.tolist()
    # plot dendrogram
    R = dendrogram(linkage_matrix, orientation="left", labels=name,
    truncate_mode='lastp', p=p, no_plot=True, ax=ax1, distance_sort=True)
    cluster = R['leaves_color_list']
    temp = {R["leaves"][ii]: name[ii]+'__'+cluster[ii] for ii in range(len(R["leaves"]))}
    
    def llf(xx):
        return "{}".format(temp[xx])
    
    ax = dendrogram(linkage_matrix, truncate_mode='lastp', orientation="left", p=p, leaf_label_func=llf,
    leaf_font_size=20.,)
    
    plt.axvline(x=5, linestyle='--', color='black', label="3 clusters")
    plt.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off')
    plt.tight_layout()
    plt.legend(loc='upper left')
    plt.savefig('country_renewable_usage_hierachical_clusters.png', dpi=200)

In [None]:
dist = ward_hierarchical_clustering(X)

In [None]:
plot_hierarchical_clusters(dist, data)

<div class=h2>VI.2. Which types of renewable are improving the fastest?</div>

In [None]:
totalpower = pd.read_csv('renewablePowerGeneration97-17.csv')

In [None]:
totalpower.head()

In [None]:
totalpower.set_index('Year', inplace=True)

In [None]:
totalpower.plot(figsize=(15,8), title='Type of renewable: time series', subplots=True, layout=(2,2))
plt.show()

In [None]:
growth = totalpower.pct_change()

In [None]:
growth.plot(figsize=(15,8), title='Type of renewable: change rate', subplots=True,
                        layout=(2,2))
plt.show()

Solar PV improves faster.

In [None]:
cg = growth['Solar PV (TWh)'].plot(figsize=(15,5), legend=True, title='Change rate')
cg.hlines(growth['Solar PV (TWh)'].mean(), 1990, 2020, label='Mean', linestyle='dashed')
cg.set_ylabel('Change rate')
plt.legend(loc='best')
plt.show()