In [1]:
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.basemap import Basemap
import pandas as pd
import numpy as np
import matplotlib.patches
from matplotlib import cm
import geopandas as gpd

In [2]:
# Load the hydropower data
path = "/home/shah0012/GloHydroRes/Output_data/"
glohydrores_data = pd.read_excel(path + "glohydrores_v1.xlsx",  header = 0, sheet_name = "data")

print(glohydrores_data.head())

         ID      country                                               name  \
0  GHR00001  Afghanistan       Kajaki Hydroelectric Power Plant Afghanistan   
1  GHR00002  Afghanistan      Mahipar Hydroelectric Power Plant Afghanistan   
2  GHR00003  Afghanistan   Naghlu Dam Hydroelectric Power Plant Afghanistan   
3  GHR00004  Afghanistan  Nangarhar (Darunta) Hydroelectric Power Plant ...   
4  GHR00005  Afghanistan  Pul-e-Khumri Hydroelectric Power Plant Afghani...   

   capacity_mw  plant_lat  plant_lon plant_type  \
0       151.00    32.3220    65.1190        STO   
1        66.00    34.5560    69.4787        STO   
2       100.00    34.6410    69.7170        ROR   
3        11.55    34.4847    70.3633        STO   
4         6.00    35.9416    68.7100        ROR   

                                   plant_type_source    year plant_source  \
0  https://www.power-technology.com/data-insights...  1975.0          WRI   
1    https://globalenergyobservatory.org/geoid/40541  1967.0    

In [4]:
countries_continent_names = pd.read_csv("/home/shah0012/GloHydroRes/Input_data/Countryshapefile/Countries-Continents.csv")
countries_continent_dict = dict(zip(countries_continent_names["Country"], countries_continent_names["Continent"]))

# Assigning continent names to the countries in the shape file country id data. This is one not entirely true as some countries are have different names in continent data and glohydrores data. Few countries are not present in the 
glohydrores_data["continent"] = glohydrores_data.country.map(countries_continent_dict)
print(glohydrores_data.country[pd.isna(glohydrores_data["continent"])])

986                 Burkina Faso
987                 Burkina Faso
988                 Burkina Faso
2614              Czech Republic
2615              Czech Republic
                  ...           
7557    United States of America
7771                    eSwatini
7772                    eSwatini
7773                    eSwatini
7774                    eSwatini
Name: country, Length: 1458, dtype: object


In [None]:
# divide the hydropower dataframe based on different type of hydropower plants dataframe
hydropower_data_PS =  glohydrores_data[glohydrores_data["plant_type"] == "PS"]
hydropower_data_STO =  glohydrores_data[glohydrores_data["plant_type"] == "STO"]
hydropower_data_ROR =  glohydrores_data[glohydrores_data["plant_type"].isin(["ROR", "Canal"])] 

In [None]:
# Categoried each hydropower plant based on its capacity. Created three categories <5 MW, 5-500 MW, >500 MW
hydropower_data_PS["categories"] = pd.cut(hydropower_data_PS["capacity_mw"], bins=[0, 5, 500, hydropower_data_PS["capacity_mw"].max()], labels=["<5 (MW)", "5-500 (MW)", ">500 (MW)"])
hydropower_data_STO["categories"] = pd.cut(hydropower_data_STO["capacity_mw"], bins=[0, 5, 500, hydropower_data_STO["capacity_mw"].max()], labels=["<5 (MW)", "5-500 (MW)", ">500 (MW)"])
hydropower_data_ROR["categories"] = pd.cut(hydropower_data_ROR["capacity_mw"], bins=[0, 5, 500, hydropower_data_ROR["capacity_mw"].max()], labels=["<5 (MW)", "5-500 (MW)", ">500 (MW)"])


In [None]:
PS_plants_stats = hydropower_data_PS.groupby(["categories", "continent"])["capacity_mw"].sum().reset_index()
PS_plants_stats["number_of_plants"]  = hydropower_data_PS.groupby(["categories", "continent"])["plant_source_ID"].count().reset_index()["plant_source_ID"] # Number of plants in each category
PS_plants_stats["capacity_gw"] = PS_plants_stats["capacity_mw"]/1000 # Convert capacity to GW

STO_plants_stats = hydropower_data_STO.groupby(["categories", "continent"])["capacity_mw"].sum().reset_index()
STO_plants_stats["number_of_plants"]  = hydropower_data_STO.groupby(["categories", "continent"])["plant_source_ID"].count().reset_index()["plant_source_ID"]
STO_plants_stats["capacity_gw"] = STO_plants_stats["capacity_mw"]/1000


ROR_plants_stats = hydropower_data_ROR.groupby(["categories", "continent"])["capacity_mw"].sum().reset_index()
ROR_plants_stats["number_of_plants"]  = hydropower_data_ROR.groupby(["categories", "continent"])["plant_source_ID"].count().reset_index()["plant_source_ID"]
ROR_plants_stats["capacity_gw"] = ROR_plants_stats["capacity_mw"]/1000

In [None]:
continent_name = PS_plants_stats.continent.unique()


def data_covered_plot_func(col_name, all_df):
    not_null_plants =  all_df[
        (all_df[col_name].notnull())]

    installed_capacity_group_plant_count = all_df.groupby(["categories", "continent"])["plant_source_ID"].count().reset_index()
    not_null_plants = not_null_plants.groupby(["categories", "continent"])["plant_source_ID"].count().reset_index()
    final_df = pd.merge(installed_capacity_group_plant_count, not_null_plants, on = ["categories", "continent"], how = "left")
    print(final_df)
    final_df["proportion_covered"] = (final_df["plant_source_ID_y"] / final_df["plant_source_ID_x"])*100
    print(final_df)
    return final_df



def display_figures(ax,df):
    show=df.number_of_plants.to_list()
    i=0
    for p in ax.patches:
        h=p.get_height()
        if (h>=0):
            value=show[i]
            ax.text(p.get_x()+p.get_width()/2,h+0.01, value, ha='center', size = 10)
            i=i+1



import seaborn as sns
fig, axes = plt.subplots(3, 4, figsize=[15,15])

sns.barplot(x='continent', y='capacity_gw', ax = axes[0,0], data= ROR_plants_stats, hue = 'categories')
display_figures(axes[0,0], ROR_plants_stats)
axes[0,0].set_xlabel(" ")
axes[0,0].set_ylabel("ROR + Canal\nCapacity (GW)", size = 15)
axes[0,0].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[0,0].tick_params(axis = "y", labelsize = 12) 
axes[0,0].legend().remove()

installed_capacity_group_plant_count = data_covered_plot_func("head_m", hydropower_data_ROR)
sns.barplot(x='continent', y='proportion_covered', ax = axes[0,1], data= installed_capacity_group_plant_count, hue = 'categories')
axes[0,1].set_xlabel(" ")
axes[0,1].set_ylabel("Data available (%)", size = 15) 
axes[0,1].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[0,1].tick_params(axis = "y", labelsize = 12)
axes[0,1].legend().remove()


installed_capacity_group_plant_count = data_covered_plot_func("res_vol_km3", hydropower_data_ROR)
sns.barplot(x='continent', y='proportion_covered', ax = axes[0,2], data= installed_capacity_group_plant_count, hue = 'categories')
axes[0,2].set_xlabel(" ")
axes[0,2].set_ylabel("Data available (%)", size = 15) 
axes[0,2].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[0,2].tick_params(axis = "y", labelsize = 12)
axes[0,2].legend().remove()


installed_capacity_group_plant_count = data_covered_plot_func("res_area_km2", hydropower_data_ROR)
sns.barplot(x='continent', y='proportion_covered', ax = axes[0,3], data= installed_capacity_group_plant_count, hue = 'categories')
axes[0,3].set_xlabel(" ")
axes[0,3].set_ylabel("Data available (%)", size = 15) 
axes[0,3].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[0,3].tick_params(axis = "y", labelsize = 12)
axes[0,3].legend().remove()


sns.barplot(x='continent', y='capacity_gw', ax = axes[1,0], data= STO_plants_stats, hue = 'categories')
display_figures(axes[1,0], STO_plants_stats)
axes[1,0].set_xlabel(" ")
axes[1,0].set_ylabel("STO\nCapacity (GW)", size = 15)
axes[1,0].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[1,0].tick_params(axis = "y", labelsize = 12)
axes[1,0].legend().remove()



installed_capacity_group_plant_count = data_covered_plot_func("head_m", hydropower_data_STO)
sns.barplot(x='continent', y='proportion_covered', ax = axes[1,1], data= installed_capacity_group_plant_count, hue = 'categories')
axes[1,1].set_xlabel(" ")
axes[1,1].set_ylabel("Data available (%)", size = 15) 
axes[1,1].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[1,1].tick_params(axis = "y", labelsize = 12)
axes[1,1].legend().remove()

installed_capacity_group_plant_count = data_covered_plot_func("res_vol_km3", hydropower_data_STO)
sns.barplot(x='continent', y='proportion_covered', ax = axes[1,2], data= installed_capacity_group_plant_count, hue = 'categories')
axes[1,2].set_xlabel(" ")
axes[1,2].set_ylabel("Data available (%)", size = 15) 
axes[1,2].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[1,2].tick_params(axis = "y", labelsize = 12)
axes[1,2].legend().remove()


installed_capacity_group_plant_count = data_covered_plot_func("res_area_km2", hydropower_data_STO)
sns.barplot(x='continent', y='proportion_covered', ax = axes[1,3], data= installed_capacity_group_plant_count, hue = 'categories')
axes[1,3].set_xlabel(" ")
axes[1,3].set_ylabel("Data available (%)", size = 15) 
axes[1,3].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[1,3].tick_params(axis = "y", labelsize = 12)
axes[1,3].legend().remove()




sns.barplot(x='continent', y='capacity_gw', ax = axes[2,0], data= PS_plants_stats, hue = 'categories')
display_figures(axes[2,0], PS_plants_stats)
axes[2,0].set_xlabel(" ")
axes[2,0].set_ylabel("PS\nCapacity (GW)", size = 15)
axes[2,0].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[2,0].tick_params(axis = "y", labelsize = 12)
axes[2,0].legend().remove()



installed_capacity_group_plant_count = data_covered_plot_func("head_m", hydropower_data_PS)
sns.barplot(x='continent', y='proportion_covered', ax = axes[2,1], data= installed_capacity_group_plant_count, hue = 'categories')
axes[2,1].set_xlabel(" ")
axes[2,1].set_ylabel("Data available (%)", size = 15) 
axes[2,1].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[2,1].tick_params(axis = "y", labelsize = 12)
axes[2,1].legend().remove()


installed_capacity_group_plant_count = data_covered_plot_func("res_vol_km3", hydropower_data_PS)
sns.barplot(x='continent', y='proportion_covered', ax = axes[2,2], data= installed_capacity_group_plant_count, hue = 'categories')
axes[2,2].set_xlabel(" ")
axes[2,2].set_ylabel("Data available (%)", size = 15) 
axes[2,2].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[2,2].tick_params(axis = "y", labelsize = 12)
#axes[2,2].set_yticks()
axes[2,2].legend().remove()


installed_capacity_group_plant_count = data_covered_plot_func("res_area_km2", hydropower_data_PS)
sns.barplot(x='continent', y='proportion_covered', ax = axes[2,3], data= installed_capacity_group_plant_count, hue = 'categories')
axes[2,3].set_xlabel(" ")
axes[2,3].set_ylabel("Data available (%)", size = 15) 
#axes[2,3].set_xticklabels(continent_name, rotation = 45)
axes[2,3].tick_params(axis = "x", labelrotation = 45, labelsize = 12)
axes[2,3].tick_params(axis = "y", labelsize = 12)

axes[2,3].legend().remove()

handles, labels = axes[0,0].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncol=3, title="Categories", frameon = False,  fontsize = 15, title_fontsize = 15)


plt.tight_layout(pad=2);

plt.savefig("/home/shah0012/GloHydroRes/Figure/bar_chart_hydropower_cat_capacity.pdf", bbox_inches='tight');