In [188]:
import pandas as pd
import numpy as np
import os

In [189]:
pd.set_option('display.max_columns', None)

## Join all the indicators

In [190]:
# Read all the 6 indicators (CSVs)
df_economic = pd.read_csv("../data/resources/readiness/melted_economic.csv")

df_governance = pd.read_csv("../data/resources/readiness/melted_governance.csv")
del df_governance["Name"]
df_social = pd.read_csv("../data/resources/readiness/melted_social.csv")
del df_social["Name"]

df_ecosystems = pd.read_csv("../data/resources/vulnerability/melted_ecosystems.csv")
del df_ecosystems["Name"]
df_habitat = pd.read_csv("../data/resources/vulnerability/melted_habitat.csv")
del df_habitat["Name"]
df_infrastructure = pd.read_csv("../data/resources/vulnerability/melted_infrastructure.csv")
del df_infrastructure["Name"]

# Also read political stability csv
df_politicalStability = pd.read_csv("../data/resources/indicators/id_gove_01/melted_governance.csv")
del df_politicalStability["Name"]

In [191]:
df_indicators = pd.merge(df_economic, df_governance, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_social, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_ecosystems, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_habitat, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_infrastructure, how="outer", on=["ISO3", "Year"])

In [192]:
df_indicators = pd.merge(df_indicators, df_politicalStability, how="outer", on=["ISO3", "Year"])
df_indicators

Unnamed: 0,ISO3,Name,Year,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated
0,AFG,Afghanistan,1995,0.503503,False,0.496497,0.861229,False,0.138771,0.704018,False,0.295982,0.516028,False,0.603153,False,0.383710,True,True,
1,ALB,Albania,1995,0.606695,False,0.393305,0.616482,False,0.383518,0.774301,False,0.225699,0.484339,False,0.509650,False,0.474276,False,True,
2,DZA,Algeria,1995,0.583375,False,0.416625,0.707108,False,0.292892,0.805221,False,0.194779,0.411816,False,0.467930,False,0.157379,False,True,
3,AND,Andorra,1995,0.589592,True,0.410408,0.498106,True,0.501894,0.843236,False,0.156764,0.477737,True,0.527740,True,0.383710,True,True,
4,AGO,Angola,1995,0.710943,False,0.289057,0.798282,False,0.201718,0.916844,False,0.083156,0.544357,False,0.648126,False,0.326570,False,True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4987,VEN,"Venezuela, Bolivarian Republic o",2020,0.888756,False,0.111244,0.872393,False,0.127607,0.668596,False,0.331404,0.351461,False,0.604549,False,0.208577,False,False,0.726031
4988,VNM,Viet Nam,2020,0.509260,False,0.490740,0.531500,False,0.468500,0.683830,False,0.316170,0.555078,False,0.433713,False,0.562195,False,False,0.399939
4989,YEM,Yemen,2020,0.620414,False,0.379586,0.879826,False,0.120174,0.756106,False,0.243894,0.570795,False,0.611543,False,0.340751,False,False,0.986514
4990,ZMB,Zambia,2020,0.586171,False,0.413829,0.600859,False,0.399141,0.849755,False,0.150245,0.433906,False,0.594931,False,0.560437,False,False,0.412301


In [193]:
# To armonize data, let's change "United states" as "USA"
invalid_rows = df_indicators["Name"] == "United States"
df_indicators.loc[invalid_rows, "Name"] = "USA"

In [194]:
# Create geometric mean for these 6 indicators (EGSEHI)
# For readiness: 1 - value
df_indicators["EGSEHI"] = df_indicators["value_economic"] * df_indicators["value_governance"] * df_indicators["value_social"] * \
                            df_indicators["value_ecosystems"] * df_indicators["value_habitat"] * df_indicators["value_infrastructure"]

root = 6
df_indicators[f"EGSEHI_{root}root"] = np.power(df_indicators["EGSEHI"], 1/root)

In [195]:
df_indicators["Country"] = df_indicators["Name"].str.upper()
del df_indicators["Name"]

In [196]:
df_indicators.to_csv("../data/indicators_conformed_file.csv", index=False)

In [197]:
df_indicators

Unnamed: 0,ISO3,Year,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country
0,AFG,1995,0.503503,False,0.496497,0.861229,False,0.138771,0.704018,False,0.295982,0.516028,False,0.603153,False,0.383710,True,True,,0.036459,0.575839,AFGHANISTAN
1,ALB,1995,0.606695,False,0.393305,0.616482,False,0.383518,0.774301,False,0.225699,0.484339,False,0.509650,False,0.474276,False,True,,0.033904,0.568908,ALBANIA
2,DZA,1995,0.583375,False,0.416625,0.707108,False,0.292892,0.805221,False,0.194779,0.411816,False,0.467930,False,0.157379,False,True,,0.010073,0.464725,ALGERIA
3,AND,1995,0.589592,True,0.410408,0.498106,True,0.501894,0.843236,False,0.156764,0.477737,True,0.527740,True,0.383710,True,True,,0.023957,0.536915,ANDORRA
4,AGO,1995,0.710943,False,0.289057,0.798282,False,0.201718,0.916844,False,0.083156,0.544357,False,0.648126,False,0.326570,False,True,,0.059952,0.625606,ANGOLA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4987,VEN,2020,0.888756,False,0.111244,0.872393,False,0.127607,0.668596,False,0.331404,0.351461,False,0.604549,False,0.208577,False,False,0.726031,0.022974,0.533178,"VENEZUELA, BOLIVARIAN REPUBLIC O"
4988,VNM,2020,0.509260,False,0.490740,0.531500,False,0.468500,0.683830,False,0.316170,0.555078,False,0.433713,False,0.562195,False,False,0.399939,0.025052,0.540927,VIET NAM
4989,YEM,2020,0.620414,False,0.379586,0.879826,False,0.120174,0.756106,False,0.243894,0.570795,False,0.611543,False,0.340751,False,False,0.986514,0.049091,0.605110,YEMEN
4990,ZMB,2020,0.586171,False,0.413829,0.600859,False,0.399141,0.849755,False,0.150245,0.433906,False,0.594931,False,0.560437,False,False,0.412301,0.043299,0.592580,ZAMBIA


### ADD reporter code to countries in indicators_df

In [198]:
# final_production_df["Country"] = final_production_df["Country"].str.upper()
# final_production_df["Product"] = final_production_df["Product"].str.upper()

# Attach country code to df
countries_list = [{'reporterCode': 36, 'reporterDesc': 'Australia'},
 {'reporterCode': 40, 'reporterDesc': 'Austria'},
 {'reporterCode': 56, 'reporterDesc': 'Belgium'},
 {'reporterCode': 68, 'reporterDesc': 'Bolivia (Plurinational State of)'},
 {'reporterCode': 68, 'reporterDesc': 'BOLIVIA, PLURINATIONAL STATE OF'},
 {'reporterCode': 68, 'reporterDesc': 'Bolivia'},
 {'reporterCode': 76, 'reporterDesc': 'Brazil'},
 {'reporterCode': 156, 'reporterDesc': 'China'},
 {'reporterCode': 251, 'reporterDesc': 'France'},
 {'reporterCode': 276, 'reporterDesc': 'Germany'},
 {'reporterCode': 278, 'reporterDesc': 'GERMAN DEM. REP'},
 {'reporterCode': 320, 'reporterDesc': 'Guatemala'},
 {'reporterCode': 360, 'reporterDesc': 'Indonesia'},
 {'reporterCode': 380, 'reporterDesc': 'Italy'},
 {'reporterCode': 417, 'reporterDesc': 'Kyrgyzstan'},
 {'reporterCode': 504, 'reporterDesc': 'Morocco'},
 {'reporterCode': 528, 'reporterDesc': 'Netherlands'},
 {'reporterCode': 710, 'reporterDesc': 'South Africa'},
 {'reporterCode': 757, 'reporterDesc': 'Switzerland'},
 {'reporterCode': 764, 'reporterDesc': 'Thailand'},
 {'reporterCode': 792, 'reporterDesc': 'Türkiye'},
 {'reporterCode': 807, 'reporterDesc': 'North Macedonia'},
 {'reporterCode': 807, 'reporterDesc': 'MACEDONIA'},
 {'reporterCode': 842, 'reporterDesc': 'USA'},
 {'reporterCode': 842, 'reporterDesc': 'UNITED STATES'},
 
 {'reporterCode': 32, 'reporterDesc': 'Argentina'},
 {'reporterCode': 100, 'reporterDesc': 'Bulgaria'},
 {'reporterCode': 112, 'reporterDesc': 'Belarus'},
 {'reporterCode': 124, 'reporterDesc': 'Canada'},
 {'reporterCode': 144, 'reporterDesc': 'Sri Lanka'},
 {'reporterCode': 152, 'reporterDesc': 'Chile'},
 {'reporterCode': 170, 'reporterDesc': 'Colombia'},
 {'reporterCode': 196, 'reporterDesc': 'Cyprus'},
 {'reporterCode': 222, 'reporterDesc': 'El Salvador'},
 {'reporterCode': 233, 'reporterDesc': 'Estonia'},
 {'reporterCode': 300, 'reporterDesc': 'Greece'},
 {'reporterCode': 372, 'reporterDesc': 'Ireland'},
 {'reporterCode': 398, 'reporterDesc': 'Kazakhstan'},
 {'reporterCode': 404, 'reporterDesc': 'Kenya'},
 {'reporterCode': 410, 'reporterDesc': 'Rep. of Korea'},
 {'reporterCode': 410, 'reporterDesc': 'KOREA, REPUBLIC OF'},
 {'reporterCode': 410, 'reporterDesc': 'KOREA, SOUTH'},
 {'reporterCode': 440, 'reporterDesc': 'Lithuania'},
 {'reporterCode': 484, 'reporterDesc': 'Mexico'},
 {'reporterCode': 512, 'reporterDesc': 'Oman'},
 {'reporterCode': 516, 'reporterDesc': 'Namibia'},
 {'reporterCode': 604, 'reporterDesc': 'Peru'},
 {'reporterCode': 620, 'reporterDesc': 'Portugal'},
 {'reporterCode': 642, 'reporterDesc': 'Romania'},
 {'reporterCode': 643, 'reporterDesc': 'Russian Federation'},
 {'reporterCode': 643, 'reporterDesc': 'RUSSIA'}, 
 {'reporterCode': 682, 'reporterDesc': 'Saudi Arabia'},
 {'reporterCode': 686, 'reporterDesc': 'Senegal'},
 {'reporterCode': 699, 'reporterDesc': 'India'},
 {'reporterCode': 702, 'reporterDesc': 'Singapore'},
 {'reporterCode': 716, 'reporterDesc': 'Zimbabwe'},
 {'reporterCode': 724, 'reporterDesc': 'Spain'},
 {'reporterCode': 748, 'reporterDesc': 'Eswatini'},
 {'reporterCode': 752, 'reporterDesc': 'Sweden'},
 {'reporterCode': 780, 'reporterDesc': 'Trinidad and Tobago'},
 {'reporterCode': 784, 'reporterDesc': 'United Arab Emirates'},
 {'reporterCode': 788, 'reporterDesc': 'Tunisia'},
 {'reporterCode': 795, 'reporterDesc': 'Turkmenistan'},
 {'reporterCode': 804, 'reporterDesc': 'Ukraine'},
 {'reporterCode': 826, 'reporterDesc': 'United Kingdom'},
 {'reporterCode': 891, 'reporterDesc': 'Serbia and Montenegro (...2005)'},
 {'reporterCode': 31, 'reporterDesc': 'Azerbaijan'},
 {'reporterCode': 44, 'reporterDesc': 'Bahamas'},
 {'reporterCode': 120, 'reporterDesc': 'Cameroon'},
 {'reporterCode': 191, 'reporterDesc': 'Croatia'},
 {'reporterCode': 203, 'reporterDesc': 'Czechia'},
 {'reporterCode': 203, 'reporterDesc': 'CZECH REPUBLIC'},
 {'reporterCode': 208, 'reporterDesc': 'Denmark'},
 {'reporterCode': 246, 'reporterDesc': 'Finland'},
 {'reporterCode': 266, 'reporterDesc': 'Gabon'},
 {'reporterCode': 268, 'reporterDesc': 'Georgia'},
 {'reporterCode': 344, 'reporterDesc': 'China, Hong Kong SAR'},
 {'reporterCode': 344, 'reporterDesc': 'HONG KONG'},
 {'reporterCode': 364, 'reporterDesc': 'Iran'},
 {'reporterCode': 364, 'reporterDesc': 'IRAN, ISLAMIC REPUBLIC OF'},
 {'reporterCode': 392, 'reporterDesc': 'Japan'},
 {'reporterCode': 422, 'reporterDesc': 'Lebanon'},
 {'reporterCode': 480, 'reporterDesc': 'Mauritius'},
 {'reporterCode': 490, 'reporterDesc': 'Other Asia, nes'},
 {'reporterCode': 579, 'reporterDesc': 'Norway'},
 {'reporterCode': 703, 'reporterDesc': 'Slovakia'},
 {'reporterCode': 704, 'reporterDesc': 'Viet Nam'},
 {'reporterCode': 704, 'reporterDesc': 'VIETNAM'},
 {'reporterCode': 705, 'reporterDesc': 'Slovenia'},
 {'reporterCode': 862, 'reporterDesc': 'Venezuela'},
 {'reporterCode': 862, 'reporterDesc': 'VENEZUELA, BOLIVARIAN REPUBLIC O'},
 {'reporterCode': 192, 'reporterDesc': 'Cuba'},
 {'reporterCode': 450, 'reporterDesc': 'Madagascar'},
 {'reporterCode': 608, 'reporterDesc': 'Philippines'},
 {'reporterCode': 736, 'reporterDesc': 'Sudan (...2011)'},
 {'reporterCode': 8, 'reporterDesc': 'Albania'},
 {'reporterCode': 72, 'reporterDesc': 'Botswana'},
 {'reporterCode': 348, 'reporterDesc': 'Hungary'},
 {'reporterCode': 384, 'reporterDesc': "Côte d'Ivoire"},
 {'reporterCode': 384, 'reporterDesc': "COTE D'IVOIRE"},
 {'reporterCode': 400, 'reporterDesc': 'Jordan'},
 {'reporterCode': 428, 'reporterDesc': 'Latvia'},
 {'reporterCode': 442, 'reporterDesc': 'Luxembourg'},
 {'reporterCode': 458, 'reporterDesc': 'Malaysia'},
 {'reporterCode': 470, 'reporterDesc': 'Malta'},
 {'reporterCode': 496, 'reporterDesc': 'Mongolia'},
 {'reporterCode': 508, 'reporterDesc': 'Mozambique'},
 {'reporterCode': 554, 'reporterDesc': 'New Zealand'},
 {'reporterCode': 562, 'reporterDesc': 'Niger'},
 {'reporterCode': 616, 'reporterDesc': 'Poland'},
 {'reporterCode': 634, 'reporterDesc': 'Qatar'},
 {'reporterCode': 659, 'reporterDesc': 'Saint Kitts and Nevis'},
 {'reporterCode': 740, 'reporterDesc': 'Suriname'},
 {'reporterCode': 818, 'reporterDesc': 'Egypt'},
 {'reporterCode': 894, 'reporterDesc': 'Zambia'},
 {'reporterCode': 51, 'reporterDesc': 'Armenia'},
 {'reporterCode': 340, 'reporterDesc': 'Honduras'},
 {'reporterCode': 466, 'reporterDesc': 'Mali'},
 {'reporterCode': 598, 'reporterDesc': 'Papua New Guinea'},
 {'reporterCode': 20, 'reporterDesc': 'Andorra'},
 {'reporterCode': 108, 'reporterDesc': 'Burundi'},
 {'reporterCode': 116, 'reporterDesc': 'Cambodia'},
 {'reporterCode': 140, 'reporterDesc': 'Central African Rep.'},
 {'reporterCode': 140, 'reporterDesc': 'CENTRAL AFRICAN REPUBLIC'},
 {'reporterCode': 188, 'reporterDesc': 'Costa Rica'},
 {'reporterCode': 204, 'reporterDesc': 'Benin'},
 {'reporterCode': 218, 'reporterDesc': 'Ecuador'},
 {'reporterCode': 231, 'reporterDesc': 'Ethiopia'},
 {'reporterCode': 242, 'reporterDesc': 'Fiji'},
 {'reporterCode': 258, 'reporterDesc': 'French Polynesia'},
 {'reporterCode': 270, 'reporterDesc': 'Gambia'},
 {'reporterCode': 288, 'reporterDesc': 'Ghana'},
 {'reporterCode': 324, 'reporterDesc': 'Guinea'},
 {'reporterCode': 328, 'reporterDesc': 'Guyana'},
 {'reporterCode': 376, 'reporterDesc': 'Israel'},
 {'reporterCode': 414, 'reporterDesc': 'Kuwait'},
 {'reporterCode': 558, 'reporterDesc': 'Nicaragua'},
 {'reporterCode': 591, 'reporterDesc': 'Panama'},
 {'reporterCode': 762, 'reporterDesc': 'Tajikistan'},
 {'reporterCode': 834, 'reporterDesc': 'United Rep. of Tanzania'},
 {'reporterCode': 834, 'reporterDesc': 'TANZANIA, UNITED REPUBLIC OF'},
 {'reporterCode': 834, 'reporterDesc': 'TANZANIA'}, 
 {'reporterCode': 854, 'reporterDesc': 'Burkina Faso'},
 {'reporterCode': 858, 'reporterDesc': 'Uruguay'},
 {'reporterCode': 132, 'reporterDesc': 'Cabo Verde'},
 {'reporterCode': 132, 'reporterDesc': 'CAPE VERDE'},
 {'reporterCode': 48, 'reporterDesc': 'Bahrain'},
 {'reporterCode': 388, 'reporterDesc': 'Jamaica'},
 {'reporterCode': 478, 'reporterDesc': 'Mauritania'},
 {'reporterCode': 12, 'reporterDesc': 'Algeria'},
 {'reporterCode': 28, 'reporterDesc': 'Antigua and Barbuda'},
 {'reporterCode': 540, 'reporterDesc': 'New Caledonia'},
 {'reporterCode': 52, 'reporterDesc': 'Barbados'},
 {'reporterCode': 566, 'reporterDesc': 'Nigeria'},
 {'reporterCode': 800, 'reporterDesc': 'Uganda'},
 {'reporterCode': 426, 'reporterDesc': 'Lesotho'},
 {'reporterCode': 174, 'reporterDesc': 'Comoros'},
 {'reporterCode': 50, 'reporterDesc': 'Bangladesh'},
 {'reporterCode': 352, 'reporterDesc': 'Iceland'},
 {'reporterCode': 454, 'reporterDesc': 'Malawi'},


 {'reporterCode': 662, 'reporterDesc': 'Saint Lucia'},
 {'reporterCode': 214, 'reporterDesc': 'Dominican Rep.'},
 {'reporterCode': 214, 'reporterDesc': 'DOMINICAN REPUBLIC'},
 {'reporterCode': 882, 'reporterDesc': 'Samoa'},
 {'reporterCode': 646, 'reporterDesc': 'Rwanda'},
 {'reporterCode': 96, 'reporterDesc': 'Brunei Darussalam'},
 {'reporterCode': 96, 'reporterDesc': 'BRUNEI'},
 {'reporterCode': 776, 'reporterDesc': 'Tonga'},
 {'reporterCode': 600, 'reporterDesc': 'Paraguay'},
 {'reporterCode': 760, 'reporterDesc': 'Syria'},
 {'reporterCode': 760, 'reporterDesc': 'SYRIAN ARAB REPUBLIC'},
 {'reporterCode': 498, 'reporterDesc': 'Rep. of Moldova'},
 {'reporterCode': 498, 'reporterDesc': 'MOLDOVA, REPUBLIC OF'},
 {'reporterCode': 498, 'reporterDesc': 'MOLDOVA'},
 {'reporterCode': 768, 'reporterDesc': 'Togo'},
 {'reporterCode': 90, 'reporterDesc': 'Solomon Isds'},
 {'reporterCode': 90, 'reporterDesc': 'SOLOMON ISLANDS'},
 {'reporterCode': 690, 'reporterDesc': 'Seychelles'},
 {'reporterCode': 70, 'reporterDesc': 'Bosnia Herzegovina'},
 {'reporterCode': 70, 'reporterDesc': 'BOSNIA-HERZEGOVINA'},
 {'reporterCode': 70, 'reporterDesc': 'BOSNIA AND HERZEGOVINA'},
 {'reporterCode': 500, 'reporterDesc': 'Montserrat'},
 {'reporterCode': 586, 'reporterDesc': 'Pakistan'},
 {'reporterCode': 524, 'reporterDesc': 'Nepal'},
 {'reporterCode': 887, 'reporterDesc': 'Yemen'},
 {'reporterCode': 887, 'reporterDesc': 'YEMEN ARAB REPUBLIC'}, # norte
 {'reporterCode': 887, 'reporterDesc': 'YEMEN, PDR'},
 {'reporterCode': 626, 'reporterDesc': 'Timor-Leste'},
 {'reporterCode': 798, 'reporterDesc': 'Tuvalu'},
 {'reporterCode': 64, 'reporterDesc': 'Bhutan'},
 {'reporterCode': 670, 'reporterDesc': 'Saint Vincent and the Grenadines'},
 {'reporterCode': 184, 'reporterDesc': 'Cook Isds'},
 {'reporterCode': 84, 'reporterDesc': 'Belize'},
 {'reporterCode': 530, 'reporterDesc': 'Netherlands Antilles (...2010)'},
 {'reporterCode': 688, 'reporterDesc': 'Serbia'},
 {'reporterCode': 548, 'reporterDesc': 'Vanuatu'},
 {'reporterCode': 275, 'reporterDesc': 'State of Palestine'},
 {'reporterCode': 434, 'reporterDesc': 'Libya'},
 {'reporterCode': 434, 'reporterDesc': 'LIBYAN ARAB JAMAHIRIYA'},
 {'reporterCode': 178, 'reporterDesc': 'Congo'},
 {'reporterCode': 178, 'reporterDesc': 'CONGO, REP.'},
 {'reporterCode': 24, 'reporterDesc': 'Angola'},
 {'reporterCode': 296, 'reporterDesc': 'Kiribati'},
 {'reporterCode': 499, 'reporterDesc': 'Montenegro'},
 {'reporterCode': 234, 'reporterDesc': 'Faeroe Isds'},
 {'reporterCode': 533, 'reporterDesc': 'Aruba'},
 {'reporterCode': 796, 'reporterDesc': 'Turks and Caicos Isds'},
 {'reporterCode': 104, 'reporterDesc': 'Myanmar'},
 {'reporterCode': 212, 'reporterDesc': 'Dominica'},
 {'reporterCode': 418, 'reporterDesc': "Lao People's Dem. Rep."},
 {'reporterCode': 418, 'reporterDesc': "LAO PEOPLE'S DEMOCRATIC REPUBLIC"},

 {'reporterCode': 304, 'reporterDesc': 'Greenland'},
 {'reporterCode': 368, 'reporterDesc': 'Iraq'},
 
 {'reporterCode': 729, 'reporterDesc': 'Sudan'},
 {'reporterCode': 446, 'reporterDesc': 'China, Macao SAR'},
 {'reporterCode': 308, 'reporterDesc': 'Grenada'},
 {'reporterCode': 585, 'reporterDesc': 'Palau'},
 {'reporterCode': 694, 'reporterDesc': 'Sierra Leone'},
 {'reporterCode': 60, 'reporterDesc': 'Bermuda'},
 {'reporterCode': 4, 'reporterDesc': 'Afghanistan'},
 {'reporterCode': 180, 'reporterDesc': 'Dem. Rep. of the Congo'},
 {'reporterCode': 180, 'reporterDesc': 'CONGO, D.R.'},
 {'reporterCode': 180, 'reporterDesc': 'CONGO, THE DEMOCRATIC REPUBLIC O'},
 {'reporterCode': 860, 'reporterDesc': 'Uzbekistan'},
 {'reporterCode': 136, 'reporterDesc': 'Cayman Isds'},

 {'reporterCode': 430, 'reporterDesc': 'Liberia'},
 {'reporterCode': 891, 'reporterDesc': 'Serbia and montenegro'},
 {'reporterCode': 332, 'reporterDesc': 'HAITI'},
 {'reporterCode': 792, 'reporterDesc': 'TURKEY'},
 {'reporterCode': 890, 'reporterDesc': 'FMR YUGOSLAVIA'},
 {'reporterCode': 890, 'reporterDesc': 'YUGOSLAVIA'},
 {'reporterCode': 200, 'reporterDesc': 'CZECHOSLOVAKIA'},
#  {'reporterCode': , 'reporterDesc': 'FMR RHODESIA NYAS'},
 {'reporterCode': 748, 'reporterDesc': 'SWAZILAND'},
 {'reporterCode': 408, 'reporterDesc': "DEM. PEOPLE'S REP. OF KOREA"},
 {'reporterCode': 408, 'reporterDesc': "KOREA, NORTH"},
 {'reporterCode': 408, 'reporterDesc': "KOREA, DEMOCRATIC PEOPLE'S REPUB"},
 {'reporterCode': 807, 'reporterDesc': "TFYR OF MACEDONIA"},
#  {'reporterCode': , 'reporterDesc': "OTHER AFRICA, NES"},
#  {'reporterCode': , 'reporterDesc': 'OTHER EUROPE, NES'},
 {'reporterCode': 232, 'reporterDesc': "ERITREA"},
 {'reporterCode': 148, 'reporterDesc': "CHAD"},
 {'reporterCode': 226, 'reporterDesc': "EQUATORIAL GUINEA"},
 {'reporterCode': 254, 'reporterDesc': "FRENCH GUIANA"},
 {'reporterCode': 736, 'reporterDesc': "SOUTH SUDAN"},
 {'reporterCode': 706, 'reporterDesc': "SOMALIA"},
#  {'reporterCode': , 'reporterDesc': ""},
 ]
countries_list = [{"reporterCode": country["reporterCode"], "Country": country["reporterDesc"].upper()} for country in countries_list]
df_countries_code = pd.json_normalize(countries_list)

df_indicators = pd.merge(df_indicators, df_countries_code, how="left", left_on="Country", right_on="Country")
df_indicators

Unnamed: 0,ISO3,Year,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country,reporterCode
0,AFG,1995,0.503503,False,0.496497,0.861229,False,0.138771,0.704018,False,0.295982,0.516028,False,0.603153,False,0.383710,True,True,,0.036459,0.575839,AFGHANISTAN,4.0
1,ALB,1995,0.606695,False,0.393305,0.616482,False,0.383518,0.774301,False,0.225699,0.484339,False,0.509650,False,0.474276,False,True,,0.033904,0.568908,ALBANIA,8.0
2,DZA,1995,0.583375,False,0.416625,0.707108,False,0.292892,0.805221,False,0.194779,0.411816,False,0.467930,False,0.157379,False,True,,0.010073,0.464725,ALGERIA,12.0
3,AND,1995,0.589592,True,0.410408,0.498106,True,0.501894,0.843236,False,0.156764,0.477737,True,0.527740,True,0.383710,True,True,,0.023957,0.536915,ANDORRA,20.0
4,AGO,1995,0.710943,False,0.289057,0.798282,False,0.201718,0.916844,False,0.083156,0.544357,False,0.648126,False,0.326570,False,True,,0.059952,0.625606,ANGOLA,24.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4987,VEN,2020,0.888756,False,0.111244,0.872393,False,0.127607,0.668596,False,0.331404,0.351461,False,0.604549,False,0.208577,False,False,0.726031,0.022974,0.533178,"VENEZUELA, BOLIVARIAN REPUBLIC O",862.0
4988,VNM,2020,0.509260,False,0.490740,0.531500,False,0.468500,0.683830,False,0.316170,0.555078,False,0.433713,False,0.562195,False,False,0.399939,0.025052,0.540927,VIET NAM,704.0
4989,YEM,2020,0.620414,False,0.379586,0.879826,False,0.120174,0.756106,False,0.243894,0.570795,False,0.611543,False,0.340751,False,False,0.986514,0.049091,0.605110,YEMEN,887.0
4990,ZMB,2020,0.586171,False,0.413829,0.600859,False,0.399141,0.849755,False,0.150245,0.433906,False,0.594931,False,0.560437,False,False,0.412301,0.043299,0.592580,ZAMBIA,894.0


In [199]:
# DATA with no reporter code
df_indicators[df_indicators["reporterCode"].isna()]["Country"].unique()

array(['DJIBOUTI', 'GUINEA-BISSAU', 'LIECHTENSTEIN', 'MALDIVES',
       'MARSHALL ISLANDS', 'MICRONESIA, FEDERATED STATES OF', 'MONACO',
       'NAURU', 'SAN MARINO', 'SAO TOME AND PRINCIPE'], dtype=object)

### Production

In [200]:
df_production = pd.read_csv("../data/production/Production_one_file_2.csv")
df_production["Year"] = pd.to_numeric(df_production["Year"], downcast="integer")
df_production["Country"] = df_production["Country"].str.upper()
df_production["Product"] = df_production["Product"].str.upper()
df_production.rename(columns={"Value": "Domestic Production Value (P_AC)"}, inplace=True)

In [201]:
df_production

Unnamed: 0,Country,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode
0,ALBANIA,IRON ORE (FE-CONT.),2601.0,org,1984,4.395020e+08,439502.0,kg,8.0
1,ALGERIA,IRON ORE (FE-CONT.),2601.0,org,1984,2.000000e+09,2000000.0,kg,12.0
2,ARGENTINA,IRON ORE (FE-CONT.),2601.0,org,1984,3.455760e+08,345576.0,kg,32.0
3,AUSTRALIA,IRON ORE (FE-CONT.),2601.0,org,1984,5.688500e+10,56885000.0,kg,36.0
4,AUSTRIA,IRON ORE (FE-CONT.),2601.0,org,1984,1.137872e+09,1137872.0,kg,40.0
...,...,...,...,...,...,...,...,...,...
96380,SPAIN,URANIUM (U3O8-CONT.),261210.0,org,2020,,,kg,724.0
96381,UKRAINE,URANIUM (U3O8-CONT.),261210.0,org,2020,8.770000e+05,877.0,kg,804.0
96382,UNITED STATES,URANIUM (U3O8-CONT.),261210.0,org,2020,7.000000e+03,7.0,kg,842.0
96383,UZBEKISTAN,URANIUM (U3O8-CONT.),261210.0,org,2020,4.127000e+06,4127.0,kg,860.0


## Join Production and EGSEHI

In [202]:
del df_indicators["Country"]
df_production_egsehi = pd.merge(df_production, df_indicators, how="left", left_on=["Year", "reporterCode"], right_on=["Year", "reporterCode"])
# del df_production_egsehi["Name"]

In [203]:
df_production_egsehi

Unnamed: 0,Country,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root
0,ALBANIA,IRON ORE (FE-CONT.),2601.0,org,1984,4.395020e+08,439502.0,kg,8.0,,,,,,,,,,,,,,,,,,,,
1,ALGERIA,IRON ORE (FE-CONT.),2601.0,org,1984,2.000000e+09,2000000.0,kg,12.0,,,,,,,,,,,,,,,,,,,,
2,ARGENTINA,IRON ORE (FE-CONT.),2601.0,org,1984,3.455760e+08,345576.0,kg,32.0,,,,,,,,,,,,,,,,,,,,
3,AUSTRALIA,IRON ORE (FE-CONT.),2601.0,org,1984,5.688500e+10,56885000.0,kg,36.0,,,,,,,,,,,,,,,,,,,,
4,AUSTRIA,IRON ORE (FE-CONT.),2601.0,org,1984,1.137872e+09,1137872.0,kg,40.0,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105272,SPAIN,URANIUM (U3O8-CONT.),261210.0,org,2020,,,kg,724.0,ESP,0.501205,False,0.498795,0.353548,False,0.646452,0.537246,False,0.462754,0.281208,False,0.373402,False,0.335442,False,False,0.294127,0.003353,0.386880
105273,UKRAINE,URANIUM (U3O8-CONT.),261210.0,org,2020,8.770000e+05,877.0,kg,804.0,UKR,0.578567,False,0.421433,0.636512,False,0.363488,0.503279,False,0.496721,0.426281,False,0.469000,False,0.292860,False,False,0.646157,0.010852,0.470525
105274,UNITED STATES,URANIUM (U3O8-CONT.),261210.0,org,2020,7.000000e+03,7.0,kg,842.0,USA,0.386435,False,0.613565,0.306741,False,0.693259,0.346982,False,0.653018,0.424584,False,0.453989,False,0.275346,False,False,0.388336,0.002183,0.360170
105275,UZBEKISTAN,URANIUM (U3O8-CONT.),261210.0,org,2020,4.127000e+06,4127.0,kg,860.0,UZB,0.473302,False,0.526698,0.671251,False,0.328749,0.697873,False,0.302127,0.506797,False,0.422226,False,0.267437,False,False,0.481922,0.012688,0.482948


### Join HHI index table with production JOIN EGSEHI table

In [204]:
df_hhi_master = pd.read_csv("../data/hhi_data/df_hhi_master.csv")
del df_hhi_master['Country']
del df_hhi_master['Value (Ton)']
# del df_hhi_master['Product']

df_hhi_master

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,Product_x,cmdCode,Source,Year,Value,qtyUnitAbbr,reporterCode,Total_value_YearProduct,Share in % (production),Share HHI Production,HHI_production,HHI_production_scaled,typeCode,freqCode,refPeriodId,refYear,refMonth,reporterISO,reporterDesc,flowCode,flowDesc,partnerCode,partnerISO,partnerDesc,partner2Code,partner2ISO,partner2Desc,classificationCode,classificationSearchCode,isOriginalClassification,cmdDesc,aggrLevel,isLeaf,customsCode,customsDesc,mosCode,motCode,motDesc,qtyUnitCode,qty,isQtyEstimated,altQtyUnitCode,altQtyUnitAbbr,altQty,isAltQtyEstimated,netWgt,isNetWgtEstimated,grossWgt,isGrossWgtEstimated,cifvalue,fobvalue,primaryValue,legacyEstimationFlag,isReported,isAggregate,Product_y,Total_netWgt_YearProduct,Share in % (exports),Share HHI Exports,HHI_exports,HHI_exports_scaled
0,IRON ORE (FE-CONT.),2601.0,org,2000,5.000000e+06,kg,8.0,6.058481e+11,0.000008,6.811025e-11,0.120238,0.075656,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,IRON ORE (FE-CONT.),2601.0,org,2000,7.200000e+08,kg,12.0,6.058481e+11,0.001188,1.412334e-06,0.120238,0.075656,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,IRON ORE (FE-CONT.),2601.0,org,2000,0.000000e+00,kg,32.0,6.058481e+11,0.000000,0.000000e+00,0.120238,0.075656,C,A,20000101.0,2000.0,52.0,ARG,Argentina,X,Export,0.0,W00,World,0.0,W00,World,H1,HS,True,Iron ores and concentrates; including roasted ...,4.0,False,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,6.811000e+04,False,-1.0,,,False,6.811000e+04,False,,False,,1.093500e+04,1.093500e+04,0.0,False,False,IRON,2.743603e+11,2.482502e-07,6.162818e-14,0.383618,0.342631
3,IRON ORE (FE-CONT.),2601.0,org,2000,1.077287e+11,kg,36.0,6.058481e+11,0.177815,3.161809e-02,0.120238,0.075656,C,A,20000101.0,2000.0,52.0,AUS,Australia,X,Export,0.0,W00,World,0.0,W00,World,H1,HS,True,Iron ores and concentrates; including roasted ...,4.0,False,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,1.652031e+11,False,-1.0,,,False,1.652031e+11,False,,False,,2.560404e+09,2.560404e+09,0.0,False,False,IRON,2.743603e+11,6.021392e-01,3.625716e-01,0.383618,0.342631
4,IRON ORE (FE-CONT.),2601.0,org,2000,5.950240e+08,kg,40.0,6.058481e+11,0.000982,9.645870e-07,0.120238,0.075656,C,A,20000101.0,2000.0,52.0,AUT,Austria,X,Export,0.0,W00,World,0.0,W00,World,H1,HS,True,Iron ores and concentrates; including roasted ...,4.0,False,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,2.800000e+03,False,-1.0,,,False,2.800000e+03,False,,False,,1.340000e+02,1.340000e+02,0.0,False,False,IRON,2.743603e+11,1.020556e-08,1.041534e-16,0.383618,0.342631
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71228,,261510.0,,2020,,kg,752.0,,,,,,C,A,20200101.0,2020.0,52.0,SWE,Sweden,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Zirconium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,4.570543e+04,True,-1.0,,0.0,False,4.570543e+04,True,0.0,False,0.0,2.909783e+04,2.909783e+04,6.0,False,True,ZIRCONIUM,5.196047e+08,8.796192e-05,7.737300e-09,0.121045,0.052793
71229,,261510.0,,2020,,kg,757.0,,,,,,C,A,20200101.0,2020.0,52.0,CHE,Switzerland,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Zirconium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,2.500000e+01,True,-1.0,,0.0,False,2.500000e+01,True,0.0,False,0.0,2.665140e+02,2.665140e+02,6.0,False,True,ZIRCONIUM,5.196047e+08,4.811350e-08,2.314909e-15,0.121045,0.052793
71230,,261510.0,,2020,,kg,784.0,,,,,,C,A,20200101.0,2020.0,52.0,ARE,United Arab Emirates,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Zirconium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,8.278620e+05,False,8.0,kg,827862.0,False,8.278620e+05,False,0.0,False,0.0,1.421075e+06,1.421075e+06,0.0,False,True,ZIRCONIUM,5.196047e+08,1.593254e-03,2.538457e-06,0.121045,0.052793
71231,,261510.0,,2020,,kg,826.0,,,,,,C,A,20200101.0,2020.0,52.0,GBR,United Kingdom,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Zirconium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,1.223196e+06,False,8.0,kg,1223196.0,False,1.223196e+06,False,0.0,False,0.0,3.896642e+06,3.896642e+06,0.0,False,True,ZIRCONIUM,5.196047e+08,2.354090e-03,5.541738e-06,0.121045,0.052793


In [205]:
# Join HHI table with (indicators JOIN production)
df_hhi_production_egsehi = pd.merge(df_production_egsehi, df_hhi_master, how="left", 
    left_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"], 
    right_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"])
df_hhi_production_egsehi

Unnamed: 0,Country,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Product_x,Value,Total_value_YearProduct,Share in % (production),Share HHI Production,HHI_production,HHI_production_scaled,typeCode,freqCode,refPeriodId,refYear,refMonth,reporterISO,reporterDesc,flowCode,flowDesc,partnerCode,partnerISO,partnerDesc,partner2Code,partner2ISO,partner2Desc,classificationCode,classificationSearchCode,isOriginalClassification,cmdDesc,aggrLevel,isLeaf,customsCode,customsDesc,mosCode,motCode,motDesc,qtyUnitCode,qty,isQtyEstimated,altQtyUnitCode,altQtyUnitAbbr,altQty,isAltQtyEstimated,netWgt,isNetWgtEstimated,grossWgt,isGrossWgtEstimated,cifvalue,fobvalue,primaryValue,legacyEstimationFlag,isReported,isAggregate,Product_y,Total_netWgt_YearProduct,Share in % (exports),Share HHI Exports,HHI_exports,HHI_exports_scaled
0,ALBANIA,IRON ORE (FE-CONT.),2601.0,org,1984,4.395020e+08,439502.0,kg,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,ALGERIA,IRON ORE (FE-CONT.),2601.0,org,1984,2.000000e+09,2000000.0,kg,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,ARGENTINA,IRON ORE (FE-CONT.),2601.0,org,1984,3.455760e+08,345576.0,kg,32.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,AUSTRALIA,IRON ORE (FE-CONT.),2601.0,org,1984,5.688500e+10,56885000.0,kg,36.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,AUSTRIA,IRON ORE (FE-CONT.),2601.0,org,1984,1.137872e+09,1137872.0,kg,40.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453200,SPAIN,URANIUM (U3O8-CONT.),261210.0,org,2020,,,kg,724.0,ESP,0.501205,False,0.498795,0.353548,False,0.646452,0.537246,False,0.462754,0.281208,False,0.373402,False,0.335442,False,False,0.294127,0.003353,0.386880,URANIUM (U3O8-CONT.),,56269000.0,,,0.218243,0.182702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
453201,UKRAINE,URANIUM (U3O8-CONT.),261210.0,org,2020,8.770000e+05,877.0,kg,804.0,UKR,0.578567,False,0.421433,0.636512,False,0.363488,0.503279,False,0.496721,0.426281,False,0.469000,False,0.292860,False,False,0.646157,0.010852,0.470525,URANIUM (U3O8-CONT.),877000.0,56269000.0,0.015586,2.429186e-04,0.218243,0.182702,C,A,20200101.0,2020.0,52.0,UKR,Ukraine,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Uranium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,0.2,False,8.0,kg,0.2,False,0.2,False,0.0,False,0.0,23.29,23.29,0.0,False,True,URANIUM,6182528.2,3.234923e-08,1.046472e-15,0.578258,0.557482
453202,UNITED STATES,URANIUM (U3O8-CONT.),261210.0,org,2020,7.000000e+03,7.0,kg,842.0,USA,0.386435,False,0.613565,0.306741,False,0.693259,0.346982,False,0.653018,0.424584,False,0.453989,False,0.275346,False,False,0.388336,0.002183,0.360170,URANIUM (U3O8-CONT.),7000.0,56269000.0,0.000124,1.547596e-08,0.218243,0.182702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
453203,UZBEKISTAN,URANIUM (U3O8-CONT.),261210.0,org,2020,4.127000e+06,4127.0,kg,860.0,UZB,0.473302,False,0.526698,0.671251,False,0.328749,0.697873,False,0.302127,0.506797,False,0.422226,False,0.267437,False,False,0.481922,0.012688,0.482948,URANIUM (U3O8-CONT.),4127000.0,56269000.0,0.073344,5.379359e-03,0.218243,0.182702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [184]:
# GET entries whose hhi indexes are amepty
# df_hhi_production_egsehi[df_hhi_production_egsehi["HHI_production"].isna() & df_hhi_production_egsehi["HHI_exports"].isna()]

# All the rows have infiormation
df_hhi_production_egsehi[~df_hhi_production_egsehi["HHI_production"].isna()]

# # In this case we have a lot of rows with HhI_exports empty because export data contains information from 1970 to 2020 whilst the rest of data is from 2000 to 2020
# df_hhi_production_egsehi[df_hhi_production_egsehi["HHI_exports"].isna()  & (df_hhi_production_egsehi["Year"] >= 2000)][["Year", "Country", "Product"]].drop_duplicates().iloc[100:120]

Unnamed: 0,Country,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,ISO3,value_economic,no_value_economic,value_governance,no_value_governance,value_social,no_value_social,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Product_x,Value,Total_value_YearProduct,Share in % (production),Share HHI Production,HHI_production,HHI_production_scaled,typeCode,freqCode,refPeriodId,refYear,refMonth,reporterISO,reporterDesc,flowCode,flowDesc,partnerCode,partnerISO,partnerDesc,partner2Code,partner2ISO,partner2Desc,classificationCode,classificationSearchCode,isOriginalClassification,cmdDesc,aggrLevel,isLeaf,customsCode,customsDesc,mosCode,motCode,motDesc,qtyUnitCode,qty,isQtyEstimated,altQtyUnitCode,altQtyUnitAbbr,altQty,isAltQtyEstimated,netWgt,isNetWgtEstimated,grossWgt,isGrossWgtEstimated,cifvalue,fobvalue,primaryValue,legacyEstimationFlag,isReported,isAggregate,Product_y,Total_netWgt_YearProduct,Share in % (exports),Share HHI Exports,HHI_exports,HHI_exports_scaled
1200,ALBANIA,IRON ORE (FE-CONT.),2601.0,org,2000,5.000000e+06,5000.0,kg,8.0,ALB,0.393305,False,0.371652,False,0.232460,False,0.479632,False,0.507264,False,0.499336,False,False,0.547124,0.004128,0.400521,IRON ORE (FE-CONT.),5.000000e+06,6.058481e+11,0.000008,6.811025e-11,0.120238,0.075656,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1201,ALGERIA,IRON ORE (FE-CONT.),2601.0,org,2000,7.200000e+08,720000.0,kg,12.0,DZA,0.416625,False,0.296768,False,0.203393,False,0.416637,False,0.455111,False,0.157379,False,False,0.759974,0.000750,0.301453,IRON ORE (FE-CONT.),7.200000e+08,6.058481e+11,0.001188,1.412334e-06,0.120238,0.075656,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1202,ARGENTINA,IRON ORE (FE-CONT.),2601.0,org,2000,0.000000e+00,0.0,kg,32.0,ARG,0.390098,False,0.504022,False,0.245704,False,0.423649,False,0.585977,False,0.285047,False,False,0.395388,0.003419,0.388126,IRON ORE (FE-CONT.),0.000000e+00,6.058481e+11,0.000000,0.000000e+00,0.120238,0.075656,C,A,20000101.0,2000.0,52.0,ARG,Argentina,X,Export,0.0,W00,World,0.0,W00,World,H1,HS,True,Iron ores and concentrates; including roasted ...,4.0,False,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,6.811000e+04,False,-1.0,,,False,6.811000e+04,False,,False,,1.093500e+04,1.093500e+04,0.0,False,False,IRON,2.743603e+11,2.482502e-07,6.162818e-14,0.383618,0.342631
1203,AUSTRALIA,IRON ORE (FE-CONT.),2601.0,org,2000,1.077287e+11,107728740.0,kg,36.0,AUS,0.504854,False,0.838319,False,0.498050,False,0.382859,False,0.499155,False,0.200596,False,False,0.101314,0.008081,0.447961,IRON ORE (FE-CONT.),1.077287e+11,6.058481e+11,0.177815,3.161809e-02,0.120238,0.075656,C,A,20000101.0,2000.0,52.0,AUS,Australia,X,Export,0.0,W00,World,0.0,W00,World,H1,HS,True,Iron ores and concentrates; including roasted ...,4.0,False,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,1.652031e+11,False,-1.0,,,False,1.652031e+11,False,,False,,2.560404e+09,2.560404e+09,0.0,False,False,IRON,2.743603e+11,6.021392e-01,3.625716e-01,0.383618,0.342631
1204,AUSTRIA,IRON ORE (FE-CONT.),2601.0,org,2000,5.950240e+08,595024.0,kg,40.0,AUT,0.458602,False,0.809302,False,0.604748,False,0.290711,False,0.345209,False,0.379297,True,False,0.222907,0.008544,0.452141,IRON ORE (FE-CONT.),5.950240e+08,6.058481e+11,0.000982,9.645870e-07,0.120238,0.075656,C,A,20000101.0,2000.0,52.0,AUT,Austria,X,Export,0.0,W00,World,0.0,W00,World,H1,HS,True,Iron ores and concentrates; including roasted ...,4.0,False,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,2.800000e+03,False,-1.0,,,False,2.800000e+03,False,,False,,1.340000e+02,1.340000e+02,0.0,False,False,IRON,2.743603e+11,1.020556e-08,1.041534e-16,0.383618,0.342631
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453200,SPAIN,URANIUM (U3O8-CONT.),261210.0,org,2020,,,kg,724.0,ESP,0.498795,False,0.646452,False,0.462754,False,0.281208,False,0.373402,False,0.335442,False,False,0.294127,0.005256,0.416970,URANIUM (U3O8-CONT.),,5.626900e+07,,,0.218243,0.182702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
453201,UKRAINE,URANIUM (U3O8-CONT.),261210.0,org,2020,8.770000e+05,877.0,kg,804.0,UKR,0.421433,False,0.363488,False,0.496721,False,0.426281,False,0.469000,False,0.292860,False,False,0.646157,0.004455,0.405642,URANIUM (U3O8-CONT.),8.770000e+05,5.626900e+07,0.015586,2.429186e-04,0.218243,0.182702,C,A,20200101.0,2020.0,52.0,UKR,Ukraine,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Uranium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,2.000000e-01,False,8.0,kg,0.2,False,2.000000e-01,False,0.0,False,0.0,2.329000e+01,2.329000e+01,0.0,False,True,URANIUM,6.182528e+06,3.234923e-08,1.046472e-15,0.578258,0.557482
453202,UNITED STATES,URANIUM (U3O8-CONT.),261210.0,org,2020,7.000000e+03,7.0,kg,842.0,USA,0.613565,False,0.693259,False,0.653018,False,0.424584,False,0.453989,False,0.275346,False,False,0.388336,0.014742,0.495178,URANIUM (U3O8-CONT.),7.000000e+03,5.626900e+07,0.000124,1.547596e-08,0.218243,0.182702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
453203,UZBEKISTAN,URANIUM (U3O8-CONT.),261210.0,org,2020,4.127000e+06,4127.0,kg,860.0,UZB,0.526698,False,0.328749,False,0.302127,False,0.506797,False,0.422226,False,0.267437,False,False,0.481922,0.002994,0.379638,URANIUM (U3O8-CONT.),4.127000e+06,5.626900e+07,0.073344,5.379359e-03,0.218243,0.182702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
df_hhi_master[(df_hhi_master["Year"]==2002) & (df_hhi_master["Country"] == "URUGUAY")]

In [99]:
df_hhi_master[(df_hhi_master["Year"]==2009) & (df_hhi_master["Country"] == "CROATIA")]

Unnamed: 0,Year,Country,Value,Product,reporterCode,reporterDesc_x,Total_value_YearProduct,Share in % (production),Share HHI Production,HHI_production,...,cifvalue,fobvalue,primaryValue,legacyEstimationFlag,isReported,isAggregate,Total_netWgt_YearProduct,Share in % (exports),Share HHI Exports,HHI_exports
294,2009,CROATIA,500.0,ALUMINIUM,191.0,CROATIA,197735840.0,0.000253,6.39395e-08,1713.133234,...,,,,,,,,,,
6720,2009,CROATIA,,BARYTES,191.0,CROATIA,7662051.0,,,2359.851398,...,,9777.0,9777.0,0.0,True,False,3581624000.0,0.000142,2.027591e-08,3146.684482
47963,2009,CROATIA,,SILVER,191.0,CROATIA,22322463.0,,,981.801726,...,,,,,,,,,,


In [206]:
# FOR NULL DATA BUT FROM 1970 TO 2020 (df_hhi_master only contain data from 2000 t0 2020)
df_hhi_production_egsehi_1970and2020 = pd.merge(df_hhi_master, df_production_egsehi, how="left", 
    left_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"], 
    right_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"]
)

In [207]:
df_hhi_production_egsehi_1970and2020

Unnamed: 0,Product_x,cmdCode,Source,Year,Value,qtyUnitAbbr,reporterCode,Total_value_YearProduct,Share in % (production),Share HHI Production,HHI_production,HHI_production_scaled,typeCode,freqCode,refPeriodId,refYear,refMonth,reporterISO,reporterDesc,flowCode,flowDesc,partnerCode,partnerISO,partnerDesc,partner2Code,partner2ISO,partner2Desc,classificationCode,classificationSearchCode,isOriginalClassification,cmdDesc,aggrLevel,isLeaf,customsCode,customsDesc,mosCode,motCode,motDesc,qtyUnitCode,qty,isQtyEstimated,altQtyUnitCode,altQtyUnitAbbr,altQty,isAltQtyEstimated,netWgt,isNetWgtEstimated,grossWgt,isGrossWgtEstimated,cifvalue,fobvalue,primaryValue,legacyEstimationFlag,isReported,isAggregate,Product_y,Total_netWgt_YearProduct,Share in % (exports),Share HHI Exports,HHI_exports,HHI_exports_scaled,Country,Product,Domestic Production Value (P_AC),Value (Ton),ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root
0,IRON ORE (FE-CONT.),2601.0,org,2000,5.000000e+06,kg,8.0,6.058481e+11,0.000008,6.811025e-11,0.120238,0.075656,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ALBANIA,IRON ORE (FE-CONT.),5.000000e+06,5000.0,ALB,0.606695,False,0.393305,0.628348,False,0.371652,0.767540,False,0.232460,0.479632,False,0.507264,False,0.499336,False,False,0.547124,0.035547,0.573413
1,IRON ORE (FE-CONT.),2601.0,org,2000,7.200000e+08,kg,12.0,6.058481e+11,0.001188,1.412334e-06,0.120238,0.075656,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,ALGERIA,IRON ORE (FE-CONT.),7.200000e+08,720000.0,DZA,0.583375,False,0.416625,0.703232,False,0.296768,0.796607,False,0.203393,0.416637,False,0.455111,False,0.157379,False,False,0.759974,0.009752,0.462224
2,IRON ORE (FE-CONT.),2601.0,org,2000,0.000000e+00,kg,32.0,6.058481e+11,0.000000,0.000000e+00,0.120238,0.075656,C,A,20000101.0,2000.0,52.0,ARG,Argentina,X,Export,0.0,W00,World,0.0,W00,World,H1,HS,True,Iron ores and concentrates; including roasted ...,4.0,False,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,6.811000e+04,False,-1.0,,,False,6.811000e+04,False,,False,,1.093500e+04,1.093500e+04,0.0,False,False,IRON,2.743603e+11,2.482502e-07,6.162818e-14,0.383618,0.342631,ARGENTINA,IRON ORE (FE-CONT.),0.000000e+00,0.0,ARG,0.609902,False,0.390098,0.495978,False,0.504022,0.754296,False,0.245704,0.423649,False,0.585977,False,0.285047,False,False,0.395388,0.016146,0.502741
3,IRON ORE (FE-CONT.),2601.0,org,2000,1.077287e+11,kg,36.0,6.058481e+11,0.177815,3.161809e-02,0.120238,0.075656,C,A,20000101.0,2000.0,52.0,AUS,Australia,X,Export,0.0,W00,World,0.0,W00,World,H1,HS,True,Iron ores and concentrates; including roasted ...,4.0,False,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,1.652031e+11,False,-1.0,,,False,1.652031e+11,False,,False,,2.560404e+09,2.560404e+09,0.0,False,False,IRON,2.743603e+11,6.021392e-01,3.625716e-01,0.383618,0.342631,AUSTRALIA,IRON ORE (FE-CONT.),1.077287e+11,107728740.0,AUS,0.495146,False,0.504854,0.161681,False,0.838319,0.501950,False,0.498050,0.382859,False,0.499155,False,0.200596,False,False,0.101314,0.001540,0.339840
4,IRON ORE (FE-CONT.),2601.0,org,2000,5.950240e+08,kg,40.0,6.058481e+11,0.000982,9.645870e-07,0.120238,0.075656,C,A,20000101.0,2000.0,52.0,AUT,Austria,X,Export,0.0,W00,World,0.0,W00,World,H1,HS,True,Iron ores and concentrates; including roasted ...,4.0,False,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,2.800000e+03,False,-1.0,,,False,2.800000e+03,False,,False,,1.340000e+02,1.340000e+02,0.0,False,False,IRON,2.743603e+11,1.020556e-08,1.041534e-16,0.383618,0.342631,AUSTRIA,IRON ORE (FE-CONT.),5.950240e+08,595024.0,AUT,0.541398,False,0.458602,0.190698,False,0.809302,0.395252,False,0.604748,0.290711,False,0.345209,False,0.379297,True,False,0.222907,0.001553,0.340311
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
426338,,261510.0,,2020,,kg,752.0,,,,,,C,A,20200101.0,2020.0,52.0,SWE,Sweden,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Zirconium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,4.570543e+04,True,-1.0,,0.0,False,4.570543e+04,True,0.0,False,0.0,2.909783e+04,2.909783e+04,6.0,False,True,ZIRCONIUM,5.196047e+08,8.796192e-05,7.737300e-09,0.121045,0.052793,,,,,,,,,,,,,,,,,,,,,,,,
426339,,261510.0,,2020,,kg,757.0,,,,,,C,A,20200101.0,2020.0,52.0,CHE,Switzerland,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Zirconium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,2.500000e+01,True,-1.0,,0.0,False,2.500000e+01,True,0.0,False,0.0,2.665140e+02,2.665140e+02,6.0,False,True,ZIRCONIUM,5.196047e+08,4.811350e-08,2.314909e-15,0.121045,0.052793,,,,,,,,,,,,,,,,,,,,,,,,
426340,,261510.0,,2020,,kg,784.0,,,,,,C,A,20200101.0,2020.0,52.0,ARE,United Arab Emirates,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Zirconium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,8.278620e+05,False,8.0,kg,827862.0,False,8.278620e+05,False,0.0,False,0.0,1.421075e+06,1.421075e+06,0.0,False,True,ZIRCONIUM,5.196047e+08,1.593254e-03,2.538457e-06,0.121045,0.052793,,,,,,,,,,,,,,,,,,,,,,,,
426341,,261510.0,,2020,,kg,826.0,,,,,,C,A,20200101.0,2020.0,52.0,GBR,United Kingdom,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Zirconium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,1.223196e+06,False,8.0,kg,1223196.0,False,1.223196e+06,False,0.0,False,0.0,3.896642e+06,3.896642e+06,0.0,False,True,ZIRCONIUM,5.196047e+08,2.354090e-03,5.541738e-06,0.121045,0.052793,,,,,,,,,,,,,,,,,,,,,,,,


#### Save conformed table

In [163]:
df_hhi_production_egsehi

Unnamed: 0,Country,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,ISO3,value_economic,no_value_economic,value_governance,no_value_governance,value_social,no_value_social,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_raw_isolated,value_governance_raw_isolated,EGSEHI,EGSEHI_6root,Product_x,Value,Total_value_YearProduct,Share in % (production),Share HHI Production,HHI_production,HHI_production_scaled,typeCode,freqCode,refPeriodId,refYear,refMonth,reporterISO,reporterDesc,flowCode,flowDesc,partnerCode,partnerISO,partnerDesc,partner2Code,partner2ISO,partner2Desc,classificationCode,classificationSearchCode,isOriginalClassification,cmdDesc,aggrLevel,isLeaf,customsCode,customsDesc,mosCode,motCode,motDesc,qtyUnitCode,qty,isQtyEstimated,altQtyUnitCode,altQtyUnitAbbr,altQty,isAltQtyEstimated,netWgt,isNetWgtEstimated,grossWgt,isGrossWgtEstimated,cifvalue,fobvalue,primaryValue,legacyEstimationFlag,isReported,isAggregate,Product_y,Total_netWgt_YearProduct,Share in % (exports),Share HHI Exports,HHI_exports,HHI_exports_scaled
0,ALBANIA,IRON ORE (FE-CONT.),2601.0,org,1984,4.395020e+08,439502.0,kg,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,ALGERIA,IRON ORE (FE-CONT.),2601.0,org,1984,2.000000e+09,2000000.0,kg,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,ARGENTINA,IRON ORE (FE-CONT.),2601.0,org,1984,3.455760e+08,345576.0,kg,32.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,AUSTRALIA,IRON ORE (FE-CONT.),2601.0,org,1984,5.688500e+10,56885000.0,kg,36.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,AUSTRIA,IRON ORE (FE-CONT.),2601.0,org,1984,1.137872e+09,1137872.0,kg,40.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
453200,SPAIN,URANIUM (U3O8-CONT.),261210.0,org,2020,,,kg,724.0,ESP,0.498795,False,0.646452,False,0.462754,False,0.281208,False,0.373402,False,0.335442,False,False,0.294127,0.005256,0.416970,URANIUM (U3O8-CONT.),,56269000.0,,,0.218243,0.182702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
453201,UKRAINE,URANIUM (U3O8-CONT.),261210.0,org,2020,8.770000e+05,877.0,kg,804.0,UKR,0.421433,False,0.363488,False,0.496721,False,0.426281,False,0.469000,False,0.292860,False,False,0.646157,0.004455,0.405642,URANIUM (U3O8-CONT.),877000.0,56269000.0,0.015586,2.429186e-04,0.218243,0.182702,C,A,20200101.0,2020.0,52.0,UKR,Ukraine,X,Export,0.0,W00,World,0.0,W00,World,H5,HS,True,Uranium ores and concentrates,6.0,True,C00,TOTAL CPC,0.0,0.0,TOTAL MOT,8.0,0.2,False,8.0,kg,0.2,False,0.2,False,0.0,False,0.0,23.29,23.29,0.0,False,True,URANIUM,6182528.2,3.234923e-08,1.046472e-15,0.578258,0.557482
453202,UNITED STATES,URANIUM (U3O8-CONT.),261210.0,org,2020,7.000000e+03,7.0,kg,842.0,USA,0.613565,False,0.693259,False,0.653018,False,0.424584,False,0.453989,False,0.275346,False,False,0.388336,0.014742,0.495178,URANIUM (U3O8-CONT.),7000.0,56269000.0,0.000124,1.547596e-08,0.218243,0.182702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
453203,UZBEKISTAN,URANIUM (U3O8-CONT.),261210.0,org,2020,4.127000e+06,4127.0,kg,860.0,UZB,0.526698,False,0.328749,False,0.302127,False,0.506797,False,0.422226,False,0.267437,False,False,0.481922,0.002994,0.379638,URANIUM (U3O8-CONT.),4127000.0,56269000.0,0.073344,5.379359e-03,0.218243,0.182702,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [208]:
df_hhi_production_egsehi.to_csv("../data/hhi_production_exports_egsehi_conformed_table.csv", index=False)

In [209]:
df_hhi_production_egsehi_1970and2020.to_csv("../data/hhi_production_exports_egsehi_null_19702020_conformed_table.csv", index=False)