# Global Entities - Map Visualizer

#### Version 1.0

### TODO

- Plotly basic map plot
- Plotly with group level filter
- Plotly with entity filter for group
- Plotly lineplot/scatterplot/heatmap for selected entity
- Plotly Dash app

### Observations

## Setup

### Import Packages

In [52]:
from datetime import datetime, date
from pprint import pprint
from collections import OrderedDict
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# Set ipython's max row display
# pd.set_option('display.max_row', 1000)
# Set iPython's max column width
pd.set_option('display.max_columns', 50)

sns.set_style("darkgrid")
plt.rcParams['figure.figsize'] = [20, 10]

### Functions

In [53]:
def masked_heatmap():
    mask = np.zeros_like(corr)
    mask[np.triu_indices_from(mask)] = True
    fig,ax = plt.subplots()
    sns.heatmap(corr,center = 0, fmt = ".3f", square = True, annot= True, mask=mask)

## Global Variables

#### Dataset Parameters

In [54]:
file_name = '../data/silver_tables/yearly_values_per_entity.csv'

year_range = [1960, 2022]
metric_agg = 'last_year'
metric_agg = 'growth_rate'
metric_agg = '?'

In [55]:
country_list = ['United Arab Emirates', 'Japan', 'Ghana'] #For country level analysis

In [56]:
key_columns = [
    'Annual CO2 emissions', 'Annual change in primary energy consumption (%)',
    'Per capita electricity (kWh)',
    'Fossil fuels per capita (kWh)', 'Fossil fuels (% equivalent primary energy)',
    'Renewables per capita (kWh - equivalent)', 'Renewables (% electricity)',
    'Solar Generation - TWh', 'prod of Electricity from solar (TWh)',
]

corr_columns = [
    'Annual CO2 emissions', 'Annual change in primary energy consumption (%)',
    'Per capita electricity (kWh)',
    'Fossil fuels per capita (kWh)',
    'Renewables per capita (kWh - equivalent)',
]

## Data Prep

In [57]:
df_original = pd.read_csv(file_name)

In [58]:
df_original = df_original.loc[
    (df_original['Year'] > year_range[0])
    & (df_original['Year'] < year_range[1])
]

In [59]:
df_original.head(2)

Unnamed: 0,Entity,Year,Code,Region,Income group,Lending category,Fossil fuels per capita (kWh),Oil Consumption - TWh,Economy,"Coal (TWh, direct energy)",Geo Biomass Other - TWh,Wind Generation - TWh,Fossil fuels (% equivalent primary energy),prod of Other renewables including bioenergy (TWh),Coal Production - TWh,Gas Production - TWh,"Gas (TWh, direct energy)",Per capita electricity (kWh),Renewables (% electricity),Annual CO2 emissions,Fossil fuels (TWh),Hydro Generation - TWh,Oil Production - TWh,Population density (people per sq. km of land area),Coal Consumption - TWh,Renewables per capita (kWh - equivalent),prod of Electricity from solar (TWh),Annual change in primary energy consumption (%),Solar Generation - TWh,Land area (sq. km),Gas Consumption - TWh,"Oil (TWh, direct energy)",prod of Electricity from hydro (TWh),Fossil fuels (% growth),prod of Electricity from wind (TWh),Entity_Category
34,Aruba,1961,ABW,Latin America & Caribbean,High income,,,,Aruba,,,,,,,,,,,645553.0,,,,307.966667,,,,,,180.0,,,,,,Country
35,Aruba,1962,ABW,Latin America & Caribbean,High income,,,,Aruba,,,,,,,,,,,708942.0,,,,312.411111,,,,,,180.0,,,,,,Country


In [60]:
df_original.describe()

Unnamed: 0,Year,Fossil fuels per capita (kWh),Oil Consumption - TWh,"Coal (TWh, direct energy)",Geo Biomass Other - TWh,Wind Generation - TWh,Fossil fuels (% equivalent primary energy),prod of Other renewables including bioenergy (TWh),Coal Production - TWh,Gas Production - TWh,"Gas (TWh, direct energy)",Per capita electricity (kWh),Renewables (% electricity),Annual CO2 emissions,Fossil fuels (TWh),Hydro Generation - TWh,Oil Production - TWh,Population density (people per sq. km of land area),Coal Consumption - TWh,Renewables per capita (kWh - equivalent),prod of Electricity from solar (TWh),Annual change in primary energy consumption (%),Solar Generation - TWh,Land area (sq. km),Gas Consumption - TWh,"Oil (TWh, direct energy)",prod of Electricity from hydro (TWh),Fossil fuels (% growth),prod of Electricity from wind (TWh)
count,13231.0,4270.0,4299.0,57.0,4230.0,4230.0,4270.0,7050.0,1397.0,2514.0,57.0,5828.0,5665.0,12651.0,4270.0,4292.0,2553.0,11042.0,4273.0,4292.0,7113.0,9354.0,4230.0,11978.0,4296.0,57.0,7178.0,4255.0,7113.0
mean,1991.724662,32590.368226,1015.737954,28584.640556,5.525746,6.126806,86.30782,3.366912,1915.282764,941.158999,21492.190695,3924.760158,29.633481,216480900.0,2346.057603,62.145414,1707.803287,270.028485,756.214677,4.578573,1.344705,4.533664,2.250895,646746.7,564.805312,39160.740543,38.012262,3.538322,3.658208
std,17.303559,34472.620328,4706.367669,10007.255369,34.808495,61.993965,15.585076,27.092075,6167.601614,3615.938174,9564.826179,5117.133819,32.033628,1755023000.0,11082.105692,308.745114,5858.493747,1308.026452,3785.186997,13.474483,22.090646,31.757811,28.611929,1812242.0,2785.492795,8988.1991,240.56417,26.995583,47.898591
min,1961.0,155.807388,0.097381,16060.80957,0.0,0.0,12.804697,0.0,0.339596,0.0,6303.798828,0.0,0.0,3664.0,0.097381,0.0,0.0,0.098625,0.0,0.0,0.0,-95.005081,0.0,10.0,0.0,17989.603516,0.0,-49.590828,0.0
25%,1977.0,11678.77417,65.349743,20363.539062,0.0,0.0,80.717112,0.0,37.385506,45.443999,14118.032227,572.42717,1.366416,601219.0,117.750929,0.43075,102.90004,19.113774,3.39596,0.172228,0.0,-0.781557,0.0,18270.0,8.854822,33680.054688,0.017904,-1.007167,0.0
50%,1992.0,25363.719727,142.951569,25963.681641,0.046381,0.0,91.689793,0.0,125.336014,144.380302,20265.484375,2469.344605,16.124605,5064147.0,283.652481,4.616636,344.998383,63.247178,32.584167,0.806088,0.0,2.361172,0.0,107400.0,53.573006,38151.847656,1.56,2.493978,0.0
75%,2007.0,40195.003906,418.514847,40175.359375,1.373498,0.070833,97.799118,0.36875,815.064575,404.810089,29315.390625,5388.259033,53.125,42628260.0,967.343674,23.1209,1131.237549,149.543597,169.332794,2.851986,0.002,6.958872,0.005899,472710.0,241.780823,47172.609375,9.96275,6.371659,0.006
max,2021.0,308704.21875,53368.628906,45161.207031,762.782654,1861.939819,100.000015,762.782654,46550.605469,40368.828125,40374.605469,56781.60156,100.0,36702500000.0,136131.46875,4345.990234,52181.949219,21388.6,45161.207031,153.883406,1032.501221,1553.10498,1032.501221,16389950.0,40374.605469,53368.628906,4345.990234,1553.10498,1861.939819


In [61]:
df = df_original.copy()

## Scoring

#### Validation

In [62]:
df[
    df["Income group"].isnull()].query(
    "Year == 2020 and Entity_Category == 'Country'"
    )['Entity'].value_counts()

Montserrat                         1
Reunion                            1
Guadeloupe                         1
Saint Pierre And Miquelon          1
Anguilla                           1
Ussr                               1
Falkland Islands                   1
Western Sahara                     1
Bonaire Sint Eustatius And Saba    1
Mayotte                            1
Niue                               1
Cook Islands                       1
Venezuela                          1
Wallis And Futuna                  1
Kosovo                             1
Martinique                         1
Saint Helena                       1
French Guiana                      1
Name: Entity, dtype: int64

In [63]:
df.query("Year == 2020 and Entity_Category == 'Country'")["Income group"].value_counts(dropna=False)

High income            75
Lower middle income    54
Upper middle income    53
Low income             28
NaN                    18
Name: Income group, dtype: int64

In [64]:
df["Income group"].unique()

# Land area (sq. km)## 
# Annual CO2 emissions
## Income group

# nan,    -----------------> x0
# "High income" -----------> x1
# "Upper middle income", --> x1.5
# "Lower middle income", --> x1.75
# "Low income",  ----------> x2



# Score = (Annual CO2 emissions * Land Area) * Income Factor

array(['High income', 'Low income', 'Lower middle income', nan,
       'Upper middle income'], dtype=object)

### Map Income Group to Income Factor number

In [72]:
def income_group_mapping(text):
    if text == "High income":
        return 1
    if text == "Upper middle income":
        return 1.25
    if text == "Lower middle income":
        return 1.75
    if text == "Low income":
        return 2
    return 1

In [73]:
df["income_factor"] = df["Income group"].apply(lambda x: income_group_mapping(x))

In [74]:
df["income_factor"].describe()

count    13231.000000
mean         1.360630
std          0.377602
min          1.000000
25%          1.000000
50%          1.250000
75%          1.750000
max          2.000000
Name: income_factor, dtype: float64

In [75]:
df.head(2)

Unnamed: 0,Entity,Year,Code,Region,Income group,Lending category,Fossil fuels per capita (kWh),Oil Consumption - TWh,Economy,"Coal (TWh, direct energy)",Geo Biomass Other - TWh,Wind Generation - TWh,Fossil fuels (% equivalent primary energy),prod of Other renewables including bioenergy (TWh),Coal Production - TWh,Gas Production - TWh,"Gas (TWh, direct energy)",Per capita electricity (kWh),Renewables (% electricity),Annual CO2 emissions,Fossil fuels (TWh),Hydro Generation - TWh,Oil Production - TWh,Population density (people per sq. km of land area),Coal Consumption - TWh,Renewables per capita (kWh - equivalent),prod of Electricity from solar (TWh),Annual change in primary energy consumption (%),Solar Generation - TWh,Land area (sq. km),Gas Consumption - TWh,"Oil (TWh, direct energy)",prod of Electricity from hydro (TWh),Fossil fuels (% growth),prod of Electricity from wind (TWh),Entity_Category,income_factor,prioritization_score
34,Aruba,1961,ABW,Latin America & Caribbean,High income,,,,Aruba,,,,,,,,,,,645553.0,,,,307.966667,,,,,,180.0,,,,,,Country,1.0,145249425.0
35,Aruba,1962,ABW,Latin America & Caribbean,High income,,,,Aruba,,,,,,,,,,,708942.0,,,,312.411111,,,,,,180.0,,,,,,Country,1.0,159511950.0


### Calculate Prioritization Score

#### Score Calculation Formula
*Per Entity:*  
`prioritization_score` = `Annual CO2 emissions` * `Land area (sq. km)` * `income_factor`

In [95]:
entities_wo_area = df.drop_duplicates(['Code']).loc[df['Land area (sq. km)'].isna()]['Entity']

In [96]:
df.loc[df['Entity'].isin(
    entities_wo_area)][
    ['Entity', 'Year', 'Land area (sq. km)']].sort_values(
    'Land area (sq. km)')

Unnamed: 0,Entity,Year,Land area (sq. km)
11895,Luxembourg,2000,2574.46
11915,Luxembourg,2020,2574.46
11914,Luxembourg,2019,2574.46
11913,Luxembourg,2018,2574.46
11912,Luxembourg,2017,2574.46
...,...,...,...
21403,Wallis And Futuna,2016,
21404,Wallis And Futuna,2017,
21405,Wallis And Futuna,2018,
21406,Wallis And Futuna,2019,


In [101]:
df["prioritization_score"] = (
    df["Annual CO2 emissions"] * df['Land area (sq. km)']
)

In [102]:
df.sort_values(by='prioritization_score', ascending=False)

Unnamed: 0,Entity,Year,Code,Region,Income group,Lending category,Fossil fuels per capita (kWh),Oil Consumption - TWh,Economy,"Coal (TWh, direct energy)",Geo Biomass Other - TWh,Wind Generation - TWh,Fossil fuels (% equivalent primary energy),prod of Other renewables including bioenergy (TWh),Coal Production - TWh,Gas Production - TWh,"Gas (TWh, direct energy)",Per capita electricity (kWh),Renewables (% electricity),Annual CO2 emissions,Fossil fuels (TWh),Hydro Generation - TWh,Oil Production - TWh,Population density (people per sq. km of land area),Coal Consumption - TWh,Renewables per capita (kWh - equivalent),prod of Electricity from solar (TWh),Annual change in primary energy consumption (%),Solar Generation - TWh,Land area (sq. km),Gas Consumption - TWh,"Oil (TWh, direct energy)",prod of Electricity from hydro (TWh),Fossil fuels (% growth),prod of Electricity from wind (TWh),Entity_Category,income_factor,prioritization_score
3969,China,2020,CHN,East Asia & Pacific,Upper middle income,IBRD,23784.003906,7984.452637,China,,135.625000,466.500000,83.505882,135.625000,22364.935547,1940.092163,,5459.258789,28.087379,1.066789e+10,34232.882812,1321.708984,2265.163574,149.723553,22882.255859,4.056631,261.100006,2.541566,261.100006,9424702.9,3366.172607,,1321.708984,1.602399,466.500000,Country,1.25,1.005417e+17
3968,China,2019,CHN,East Asia & Pacific,Upper middle income,IBRD,23499.349609,7915.177734,China,,112.725227,405.299988,84.277794,112.725227,22154.439453,1767.414429,,5277.177246,26.848570,1.048999e+10,33692.988281,1272.537964,2228.633545,149.367573,22694.042969,3.768683,224.000000,3.628755,224.000000,9424702.9,3083.768555,,1272.537964,2.445459,405.299988,Country,1.25,9.886503e+16
3967,China,2018,CHN,East Asia & Pacific,Upper middle income,IBRD,23036.990234,7534.662109,China,,93.725227,365.799988,85.251244,93.725227,21352.949219,1614.188599,,5057.009277,25.610914,1.028999e+10,32888.707031,1198.886963,2201.838135,148.838644,22514.785156,3.460874,176.899994,3.951335,176.899994,9424702.9,2839.260010,,1198.886963,2.862144,365.799988,Country,1.25,9.698009e+16
3963,China,2014,CHN,East Asia & Pacific,Upper middle income,IBRD,22106.707031,6142.516602,China,,46.268219,159.762695,88.808533,46.268219,21680.763672,1311.808472,,4183.150879,22.249449,9.985583e+09,30937.318359,1059.691650,2458.919189,145.560051,22911.171875,2.538375,23.512159,2.919328,23.512159,9424701.3,1883.631348,,1059.691650,1.355195,159.762695,Country,1.25,9.411114e+16
3962,China,2013,CHN,East Asia & Pacific,Upper middle income,IBRD,21929.755859,5907.469727,China,,37.132999,138.264130,90.179047,37.132999,22034.134766,1218.107178,,3947.121094,20.129911,9.952744e+09,30523.664062,909.612244,2441.826416,144.645433,22897.412109,2.178754,8.373886,3.762960,8.373886,9424701.3,1718.785400,,909.612244,3.184760,138.264130,Country,1.25,9.380164e+16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21406,Wallis And Futuna,2019,WLF,,,,,,,,,,,,,,,,,2.643700e+04,,,,,,,,,,,,,,,,Country,1.00,
21407,Wallis And Futuna,2020,WLF,,,,,,,,,,,,,,,,,2.647400e+04,,,,,,,,,,,,,,,,Country,1.00,
21550,Yemen,2021,YEM,Middle East & North Africa,Low income,IDA,,,"Yemen, Rep.",,,,,,,3.977500,,,,,,,32.096210,,,,,,,,,,,,,Country,2.00,
21688,South Africa,2021,ZAF,Sub-Saharan Africa,Upper middle income,IBRD,21795.685547,289.166656,South Africa,,0.431334,8.188991,94.631485,0.431334,1541.734863,,,4113.701660,7.334341,,1308.656494,1.401010,,,980.955627,0.785076,7.898076,0.518334,7.898076,,38.534122,,1.401010,0.948727,8.188991,Country,1.25,


In [45]:
df[["Entity", "Code"]].drop_duplicates()

Unnamed: 0,Entity,Code
59263,Aruba,ABW
59529,Afghanistan,AFG
59795,Angola,AGO
60061,Albania,ALB
60327,United Arab Emirates,ARE
...,...,...
1910623,Eritrea,ERI
2278501,Gibraltar,GIB
2300047,Kosovo,OWID_KOS
2379049,Wallis And Futuna,WLF


In [46]:
df["income_factor"].value_counts(dropna=False)

1.25    1134540
1.75     826487
1.50     811063
2.00     438108
1.00     279303
Name: income_factor, dtype: int64

In [85]:
df_original.query("Year == 2020")

## Land area (sq. km)
## Annual CO2 emissions
## Income group

Unnamed: 0,Entity,Year,Code,Region,Income group,Lending category,Fossil fuels (% equivalent primary energy),Fossil fuels (% growth),Gas Production - TWh,Land area (sq. km),Unnamed: 0.1.1.1,Coal Consumption - TWh,Renewables per capita (kWh - equivalent),Wind Generation - TWh,prod of Other renewables including bioenergy (TWh),Fossil fuels per capita (kWh),Fossil fuels (TWh),Oil Production - TWh,Coal Production - TWh,Hydro Generation - TWh,Unnamed: 0.1.1,prod of Electricity from solar (TWh),prod of Electricity from hydro (TWh),Unnamed: 0.1,"Gas (TWh, direct energy)",Annual change in primary energy consumption (%),Geo Biomass Other - TWh,"Coal (TWh, direct energy)",Per capita electricity (kWh),Oil Consumption - TWh,Renewables (% electricity),Population density (people per sq. km of land area),Solar Generation - TWh,Gas Consumption - TWh,"Oil (TWh, direct energy)",Annual CO2 emissions,prod of Electricity from wind (TWh),Entity_Category
71,Afghanistan,2020,AFG,South Asia,Low income,IDA,,,,6.522300e+05,12892.0,,,,0.000000,,,,,,12892.0,0.040000,0.620000,12892.0,,,,,20.014248,,84.615395,59.684990,,,,1.216029e+07,0.000000,Country
208,Africa,2020,,,,,89.685829,-6.705356,2312.026855,,54.0,1157.901123,0.376615,21.302896,8.066416,3509.572021,4704.925781,3846.310547,1723.903687,146.373077,54.0,16.058805,146.373077,54.0,,-5.539828,8.066416,,628.423706,2011.052612,22.430740,,16.058805,1535.972412,,1.326044e+09,21.302896,Continent
264,Africa (Bp),2020,,,,,89.685829,-6.705356,2312.026855,,110.0,1157.901123,,21.302898,8.066416,,4704.925781,3846.310547,1723.903687,146.373077,110.0,16.058805,146.373077,110.0,,-5.539828,8.066416,,,2011.052612,22.430740,,16.058805,1535.972412,,,21.302898,Bp?
354,Albania,2020,ALB,Europe & Central Asia,Upper middle income,IBRD,,,,2.740000e+04,12893.0,,,,0.000000,,,,,,12893.0,0.030000,5.280000,12893.0,,,,,1852.207153,,100.000000,103.571131,,,,4.534673e+06,0.000000,Country
459,Algeria,2020,DZA,Middle East & North Africa,Lower middle income,IBRD,99.702103,-5.827147,814.563721,2.381741e+06,166.0,4.816350,0.044457,0.007700,0.000000,14879.098633,652.463989,668.911072,,0.049600,166.0,0.685000,0.049600,166.0,,-5.824166,0.000000,,1821.686035,211.635162,0.937777,18.411340,0.685000,436.012512,,1.549955e+08,0.007700,Country
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27414,Western Sahara,2020,,,,,,,,,13179.0,,,,0.000000,,,,,,13179.0,0.000000,0.000000,13179.0,,,,,0.000000,,,,,,,,0.000000,Country
27685,World,2020,WLD,,,,82.211304,-5.385285,38615.152344,1.299798e+08,6032.0,41963.964844,2.704402,1596.428223,703.916443,16523.912109,128800.398438,48508.015625,44068.472656,4345.990234,6032.0,846.229370,4345.990234,6032.0,38455.726562,-3.985941,703.916443,41963.964844,3429.322021,48380.710938,27.864630,59.731828,846.229370,38455.726562,48380.710938,3.480726e+10,1596.428223,World
27757,Yemen,2020,,,,,,,3.064353,,12870.0,,,,0.000000,,,43.814594,,,12870.0,0.500000,0.000000,12870.0,,,,,100.668922,,15.384615,,,,,9.768313e+06,0.000000,Country
27840,Zambia,2020,ZMB,Sub-Saharan Africa,Low income,IDA,,,,7.433900e+05,13180.0,,,,0.160000,,,,,,13180.0,0.150000,13.670000,13180.0,,,,,844.264526,,87.484352,24.729894,,,,6.572938e+06,0.000000,Country
