# Generate simple plot for (every) indicator
This **pipeline** shows how to generate a simple sinlge plot beginning with any .csv file in the *source* folder. It is a generic version, which means that for each file minor changes will be needed.

## Preliminars

In [None]:
# Imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Call desired data
df_meta = pd.read_csv('Indicators_metadata.csv')
source_name = 'wbank_Indic'
df = pd.read_csv('source_data/Energy/' + source_name + '.csv')

### Preview of the DataFrame

In [None]:
# Check column names
df.columns

In [None]:
df.head()

In [None]:
# Rename column Country column
df=df.rename(columns={'Country Name':'Country'})

In [None]:
# Check country name available
df.Country.unique()

In [None]:
# Check column characteristics
df.describe()

### Set Index temporarily to ease manipulation and guarantee final homogeneity

In [None]:
index = ['Country']
df=df.set_index(index)

### Select countries and time range

In [None]:
# Assign selected Countries (carefull with the names: Case sensitive, etc.)
sel_c = ['United States', 'India', 'Germany','China']
# Assign selected Years or interval
sel_y = [2000,2019]#one member more
sel_years = [*range(sel_y[0],sel_y[1])] #using unpacking(*) operator
sel_years_txt = [str(x) for x in sel_years]

In [None]:
# Apply selection criteria
df_p = df[(df['Time']>sel_y[0])&(df['Time']<sel_y[1])]
df_p=df_p[df_p.index.isin(sel_c)]

In [None]:
df.describe()

### Reform the DF to a suitable form

In [None]:
# Rename columns to improve coding readability (USING 'KEYs' as referenced in the metadata file)
df_p=df_p.rename(columns={'Total electricity output (GWh) [4.1.1_TOTAL.ELECTRICITY.OUTPUT]':'a_eprod'})
df_p=df_p.rename(columns={'Total final energy consumption (TFEC) (TJ) [1.1_TOTAL.FINAL.ENERGY.CONSUM]':'a_econ'})
df_p=df_p.rename(columns={'Energy intensity level of primary energy (MJ/2011 USD PPP) [6.1_PRIMARY.ENERGY.INTENSITY]':'a_inten'})
df_p=df_p.rename(columns={'Renewable electricity share of total electricity output (%) [4.1_SHARE.RE.IN.ELECTRICITY]':'a_sharo'})
df_p=df_p.rename(columns={'Renewable energy share of TFEC (%) [2.1_SHARE.TOTAL.RE.IN.TFEC]':'a_shart'})
df_p=df_p.rename(columns={'Time':'Years'})

In [None]:
df_p.columns

In [None]:
# De-select undesired columns
df_p=df_p[['Years','a_eprod', 'a_econ', 'a_inten', 'a_sharo', 'a_shart']]

In [None]:
# Reset index to return DF to a tidy state
df_p=df_p.reset_index()

In [None]:
# Melt to a Long format
#df_p=df_p.reset_index().melt(id_vars='Country')
#df_p2=df_p.melt()

In [None]:
df_p.head()

In [None]:
df_p.a_econ[1]

### 2.4 Do further necessary adjustments

In [None]:
# Show column types
display(df_p.dtypes) 

In [None]:
# Coerce column types when needed
df_p.Years=df_p.Years.astype(int, copy=False)#avoiding a warning (carefull)
df_p['a_eprod']=pd.to_numeric(df_p['a_eprod'],errors='coerce') # review, other notation, eg. df_p.to_num..., dont work
df_p['a_econ']=pd.to_numeric(df_p['a_econ'],errors='coerce')
df_p['a_inten']=pd.to_numeric(df_p['a_inten'],errors='coerce')
df_p['a_sharo']=pd.to_numeric(df_p['a_sharo'],errors='coerce')
df_p['a_shart']=pd.to_numeric(df_p['a_shart'],errors='coerce')

In [None]:
df_p.describe()

### Extract corresponding metadata 

For this file in particular **5 plots** can be obtain. With their corresponding indicators

In [None]:
# call metadata of the indicator
df_meta_temp = df_meta.set_index('KEY')
# Select the indicator to plot
Key_name = 'a_sharr'

In [None]:
df_meta_temp.index

In [None]:
Units_ind= df_meta_temp['UNIT'][Key_name]
Origin_ind= df_meta_temp['SITE'][Key_name]
Name_ind =  df_meta_temp['INDICATOR'][Key_name]
#Key_ind =  df_meta_temp['KEY'][]
Desc_ind =  df_meta_temp['DESCRIPTION'][Key_name]

### Plot the indicator

In [None]:
# Here some features of the seaborn package or an eventual coupling 
# with R to use the GGPlot library is to be introduced
# Use Guillermo's preferences for graph aesthetics
sns.set_context('notebook')
sns.set_style('darkgrid')
sns.set(font_scale=1.5)

In [None]:
df_p.columns

In [None]:
df_p.head()

In [None]:
df_p[Key_name].sum()

In [None]:
# Simple Plot
plt.figure()
sns.pointplot(x='Years', y=Key_name, data=df_p, hue='Country')
plt.ylabel(Units_ind)
#plt.xlabel('Month')
plt.legend(loc=1)

In [None]:
# Sophisticated Plot

# Set figure size (width, height) in inches 
fig, ax = plt.subplots(figsize = ( 15 , 6 )) 
  
# Plot the scatterplot 
sns.pointplot(ax = ax , x='Years', y=Key_name, data=df_p, hue='Country')
#sns.scatterplot( ax = ax , x='Year', y='value', data=df_graph) 

# Set Subtitle 
text_ind = '\n'.join(('Taken from:', Origin_ind))#'\n'.join((Desc_ind, " ", Origin_ind))
plt.text(x=0.2, y=0.8, s=text_ind, fontsize=15, transform=ax.transAxes,  bbox=dict(facecolor='wheat', alpha=0.4))

# Set label for x-axis 
ax.set_xlabel( 'Years' , size = 12 ) 
  
# Set label for y-axis 
ax.set_ylabel( Units_ind , size = 12 ) 
  
# Set title for plot 
ax.set_title( Name_ind , size = 24 ) 
  
# Display figure 
plt.show() 

# Save the figure
plt.savefig(Key_name+'.jpg')