In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
pd.set_option('display.max_columns', None)

## Join all the indicators

In [3]:
# Read all the 6 indicators (CSVs)
df_economic = pd.read_csv("../data/resources/readiness/melted_economic.csv")

df_governance = pd.read_csv("../data/resources/readiness/melted_governance.csv")
del df_governance["Name"]
df_social = pd.read_csv("../data/resources/readiness/melted_social.csv")
del df_social["Name"]

df_ecosystems = pd.read_csv("../data/resources/vulnerability/melted_ecosystems.csv")
del df_ecosystems["Name"]
df_habitat = pd.read_csv("../data/resources/vulnerability/melted_habitat.csv")
del df_habitat["Name"]
df_infrastructure = pd.read_csv("../data/resources/vulnerability/melted_infrastructure.csv")
del df_infrastructure["Name"]

# Also read political stability csv
df_politicalStability = pd.read_csv("../data/resources/indicators/id_gove_01/melted_governance.csv")
del df_politicalStability["Name"]

In [4]:
df_indicators = pd.merge(df_economic, df_governance, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_social, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_ecosystems, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_habitat, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_infrastructure, how="outer", on=["ISO3", "Year"])

In [5]:
df_indicators = pd.merge(df_indicators, df_politicalStability, how="outer", on=["ISO3", "Year"])
df_indicators

Unnamed: 0,ISO3,Name,Year,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated
0,AFG,Afghanistan,1995,0.503503,False,0.496497,0.861229,False,0.138771,0.704018,False,0.295982,0.516028,False,0.603153,False,0.383710,True,False,0.976884
1,ALB,Albania,1995,0.606695,False,0.393305,0.616482,False,0.383518,0.774301,False,0.225699,0.484339,False,0.509650,False,0.474276,False,False,0.461108
2,DZA,Algeria,1995,0.583375,False,0.416625,0.707108,False,0.292892,0.805221,False,0.194779,0.411816,False,0.467930,False,0.157379,False,False,0.819686
3,AND,Andorra,1995,0.589592,True,0.410408,0.498106,True,0.501894,0.843236,False,0.156764,0.477737,True,0.527740,True,0.383710,True,True,0.366254
4,AGO,Angola,1995,0.710943,False,0.289057,0.798282,False,0.201718,0.916844,False,0.083156,0.544357,False,0.648126,False,0.326570,False,False,0.888430
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4987,VEN,"Venezuela, Bolivarian Republic o",2020,0.888756,False,0.111244,0.872393,False,0.127607,0.668596,False,0.331404,0.351461,False,0.604549,False,0.208577,False,False,0.726031
4988,VNM,Viet Nam,2020,0.509260,False,0.490740,0.531500,False,0.468500,0.683830,False,0.316170,0.555078,False,0.433713,False,0.562195,False,False,0.399939
4989,YEM,Yemen,2020,0.620414,False,0.379586,0.879826,False,0.120174,0.756106,False,0.243894,0.570795,False,0.611543,False,0.340751,False,False,0.986514
4990,ZMB,Zambia,2020,0.586171,False,0.413829,0.600859,False,0.399141,0.849755,False,0.150245,0.433906,False,0.594931,False,0.560437,False,False,0.412301


In [6]:
# To armonize data, let's change "United states" as "USA"
invalid_rows = df_indicators["Name"] == "United States"
df_indicators.loc[invalid_rows, "Name"] = "USA"

In [7]:
# Create geometric mean for these 6 indicators (EGSEHI)
# For readiness: 1 - value
df_indicators["EGSEHI"] = df_indicators["value_economic"] * df_indicators["value_governance"] * df_indicators["value_social"] * \
                            df_indicators["value_ecosystems"] * df_indicators["value_habitat"] * df_indicators["value_infrastructure"]

root = 6
df_indicators[f"EGSEHI_{root}root"] = np.power(df_indicators["EGSEHI"], 1/root)

In [8]:
df_indicators["Country"] = df_indicators["Name"].str.upper()
del df_indicators["Name"]

In [9]:
df_indicators.to_csv("../data/indicators_conformed_file.csv", index=False)

In [10]:
df_indicators

Unnamed: 0,ISO3,Year,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country
0,AFG,1995,0.503503,False,0.496497,0.861229,False,0.138771,0.704018,False,0.295982,0.516028,False,0.603153,False,0.383710,True,False,0.976884,0.036459,0.575839,AFGHANISTAN
1,ALB,1995,0.606695,False,0.393305,0.616482,False,0.383518,0.774301,False,0.225699,0.484339,False,0.509650,False,0.474276,False,False,0.461108,0.033904,0.568908,ALBANIA
2,DZA,1995,0.583375,False,0.416625,0.707108,False,0.292892,0.805221,False,0.194779,0.411816,False,0.467930,False,0.157379,False,False,0.819686,0.010073,0.464725,ALGERIA
3,AND,1995,0.589592,True,0.410408,0.498106,True,0.501894,0.843236,False,0.156764,0.477737,True,0.527740,True,0.383710,True,True,0.366254,0.023957,0.536915,ANDORRA
4,AGO,1995,0.710943,False,0.289057,0.798282,False,0.201718,0.916844,False,0.083156,0.544357,False,0.648126,False,0.326570,False,False,0.888430,0.059952,0.625606,ANGOLA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4987,VEN,2020,0.888756,False,0.111244,0.872393,False,0.127607,0.668596,False,0.331404,0.351461,False,0.604549,False,0.208577,False,False,0.726031,0.022974,0.533178,"VENEZUELA, BOLIVARIAN REPUBLIC O"
4988,VNM,2020,0.509260,False,0.490740,0.531500,False,0.468500,0.683830,False,0.316170,0.555078,False,0.433713,False,0.562195,False,False,0.399939,0.025052,0.540927,VIET NAM
4989,YEM,2020,0.620414,False,0.379586,0.879826,False,0.120174,0.756106,False,0.243894,0.570795,False,0.611543,False,0.340751,False,False,0.986514,0.049091,0.605110,YEMEN
4990,ZMB,2020,0.586171,False,0.413829,0.600859,False,0.399141,0.849755,False,0.150245,0.433906,False,0.594931,False,0.560437,False,False,0.412301,0.043299,0.592580,ZAMBIA


### ADD reporter code to countries in indicators_df

In [11]:
# final_production_df["Country"] = final_production_df["Country"].str.upper()
# final_production_df["Product"] = final_production_df["Product"].str.upper()

# Attach country code to df
countries_list = [{'reporterCode': 36, 'reporterDesc': 'Australia'},
 {'reporterCode': 40, 'reporterDesc': 'Austria'},
 {'reporterCode': 56, 'reporterDesc': 'Belgium'},
 {'reporterCode': 68, 'reporterDesc': 'Bolivia (Plurinational State of)'},
 {'reporterCode': 68, 'reporterDesc': 'BOLIVIA, PLURINATIONAL STATE OF'},
 {'reporterCode': 68, 'reporterDesc': 'Bolivia'},
 {'reporterCode': 76, 'reporterDesc': 'Brazil'},
 {'reporterCode': 156, 'reporterDesc': 'China'},
 {'reporterCode': 251, 'reporterDesc': 'France'},
 {'reporterCode': 276, 'reporterDesc': 'Germany'},
 {'reporterCode': 278, 'reporterDesc': 'GERMAN DEM. REP'},
 {'reporterCode': 320, 'reporterDesc': 'Guatemala'},
 {'reporterCode': 360, 'reporterDesc': 'Indonesia'},
 {'reporterCode': 380, 'reporterDesc': 'Italy'},
 {'reporterCode': 417, 'reporterDesc': 'Kyrgyzstan'},
 {'reporterCode': 504, 'reporterDesc': 'Morocco'},
 {'reporterCode': 528, 'reporterDesc': 'Netherlands'},
 {'reporterCode': 710, 'reporterDesc': 'South Africa'},
 {'reporterCode': 757, 'reporterDesc': 'Switzerland'},
 {'reporterCode': 764, 'reporterDesc': 'Thailand'},
 {'reporterCode': 792, 'reporterDesc': 'Türkiye'},
 {'reporterCode': 807, 'reporterDesc': 'North Macedonia'},
 {'reporterCode': 807, 'reporterDesc': 'MACEDONIA'},
 {'reporterCode': 842, 'reporterDesc': 'USA'},
 {'reporterCode': 842, 'reporterDesc': 'UNITED STATES'},
 
 {'reporterCode': 32, 'reporterDesc': 'Argentina'},
 {'reporterCode': 100, 'reporterDesc': 'Bulgaria'},
 {'reporterCode': 112, 'reporterDesc': 'Belarus'},
 {'reporterCode': 124, 'reporterDesc': 'Canada'},
 {'reporterCode': 144, 'reporterDesc': 'Sri Lanka'},
 {'reporterCode': 152, 'reporterDesc': 'Chile'},
 {'reporterCode': 170, 'reporterDesc': 'Colombia'},
 {'reporterCode': 196, 'reporterDesc': 'Cyprus'},
 {'reporterCode': 222, 'reporterDesc': 'El Salvador'},
 {'reporterCode': 233, 'reporterDesc': 'Estonia'},
 {'reporterCode': 300, 'reporterDesc': 'Greece'},
 {'reporterCode': 372, 'reporterDesc': 'Ireland'},
 {'reporterCode': 398, 'reporterDesc': 'Kazakhstan'},
 {'reporterCode': 404, 'reporterDesc': 'Kenya'},
 {'reporterCode': 410, 'reporterDesc': 'Rep. of Korea'},
 {'reporterCode': 410, 'reporterDesc': 'KOREA, REPUBLIC OF'},
 {'reporterCode': 410, 'reporterDesc': 'KOREA, SOUTH'},
 {'reporterCode': 440, 'reporterDesc': 'Lithuania'},
 {'reporterCode': 484, 'reporterDesc': 'Mexico'},
 {'reporterCode': 512, 'reporterDesc': 'Oman'},
 {'reporterCode': 516, 'reporterDesc': 'Namibia'},
 {'reporterCode': 604, 'reporterDesc': 'Peru'},
 {'reporterCode': 620, 'reporterDesc': 'Portugal'},
 {'reporterCode': 642, 'reporterDesc': 'Romania'},
 {'reporterCode': 643, 'reporterDesc': 'Russian Federation'},
 {'reporterCode': 643, 'reporterDesc': 'RUSSIA'}, 
 {'reporterCode': 682, 'reporterDesc': 'Saudi Arabia'},
 {'reporterCode': 686, 'reporterDesc': 'Senegal'},
 {'reporterCode': 699, 'reporterDesc': 'India'},
 {'reporterCode': 702, 'reporterDesc': 'Singapore'},
 {'reporterCode': 716, 'reporterDesc': 'Zimbabwe'},
 {'reporterCode': 724, 'reporterDesc': 'Spain'},
 {'reporterCode': 748, 'reporterDesc': 'Eswatini'},
 {'reporterCode': 752, 'reporterDesc': 'Sweden'},
 {'reporterCode': 780, 'reporterDesc': 'Trinidad and Tobago'},
 {'reporterCode': 784, 'reporterDesc': 'United Arab Emirates'},
 {'reporterCode': 788, 'reporterDesc': 'Tunisia'},
 {'reporterCode': 795, 'reporterDesc': 'Turkmenistan'},
 {'reporterCode': 804, 'reporterDesc': 'Ukraine'},
 {'reporterCode': 826, 'reporterDesc': 'United Kingdom'},
 {'reporterCode': 891, 'reporterDesc': 'Serbia and Montenegro (...2005)'},
 {'reporterCode': 31, 'reporterDesc': 'Azerbaijan'},
 {'reporterCode': 44, 'reporterDesc': 'Bahamas'},
 {'reporterCode': 120, 'reporterDesc': 'Cameroon'},
 {'reporterCode': 191, 'reporterDesc': 'Croatia'},
 {'reporterCode': 203, 'reporterDesc': 'Czechia'},
 {'reporterCode': 203, 'reporterDesc': 'CZECH REPUBLIC'},
 {'reporterCode': 208, 'reporterDesc': 'Denmark'},
 {'reporterCode': 246, 'reporterDesc': 'Finland'},
 {'reporterCode': 266, 'reporterDesc': 'Gabon'},
 {'reporterCode': 268, 'reporterDesc': 'Georgia'},
 {'reporterCode': 344, 'reporterDesc': 'China, Hong Kong SAR'},
 {'reporterCode': 344, 'reporterDesc': 'HONG KONG'},
 {'reporterCode': 364, 'reporterDesc': 'Iran'},
 {'reporterCode': 364, 'reporterDesc': 'IRAN, ISLAMIC REPUBLIC OF'},
 {'reporterCode': 392, 'reporterDesc': 'Japan'},
 {'reporterCode': 422, 'reporterDesc': 'Lebanon'},
 {'reporterCode': 480, 'reporterDesc': 'Mauritius'},
 {'reporterCode': 490, 'reporterDesc': 'Other Asia, nes'},
 {'reporterCode': 579, 'reporterDesc': 'Norway'},
 {'reporterCode': 703, 'reporterDesc': 'Slovakia'},
 {'reporterCode': 704, 'reporterDesc': 'Viet Nam'},
 {'reporterCode': 704, 'reporterDesc': 'VIETNAM'},
 {'reporterCode': 705, 'reporterDesc': 'Slovenia'},
 {'reporterCode': 862, 'reporterDesc': 'Venezuela'},
 {'reporterCode': 862, 'reporterDesc': 'VENEZUELA, BOLIVARIAN REPUBLIC O'},
 {'reporterCode': 192, 'reporterDesc': 'Cuba'},
 {'reporterCode': 450, 'reporterDesc': 'Madagascar'},
 {'reporterCode': 608, 'reporterDesc': 'Philippines'},
 {'reporterCode': 736, 'reporterDesc': 'Sudan (...2011)'},
 {'reporterCode': 8, 'reporterDesc': 'Albania'},
 {'reporterCode': 72, 'reporterDesc': 'Botswana'},
 {'reporterCode': 348, 'reporterDesc': 'Hungary'},
 {'reporterCode': 384, 'reporterDesc': "Côte d'Ivoire"},
 {'reporterCode': 384, 'reporterDesc': "COTE D'IVOIRE"},
 {'reporterCode': 400, 'reporterDesc': 'Jordan'},
 {'reporterCode': 428, 'reporterDesc': 'Latvia'},
 {'reporterCode': 442, 'reporterDesc': 'Luxembourg'},
 {'reporterCode': 458, 'reporterDesc': 'Malaysia'},
 {'reporterCode': 470, 'reporterDesc': 'Malta'},
 {'reporterCode': 496, 'reporterDesc': 'Mongolia'},
 {'reporterCode': 508, 'reporterDesc': 'Mozambique'},
 {'reporterCode': 554, 'reporterDesc': 'New Zealand'},
 {'reporterCode': 562, 'reporterDesc': 'Niger'},
 {'reporterCode': 616, 'reporterDesc': 'Poland'},
 {'reporterCode': 634, 'reporterDesc': 'Qatar'},
 {'reporterCode': 659, 'reporterDesc': 'Saint Kitts and Nevis'},
 {'reporterCode': 740, 'reporterDesc': 'Suriname'},
 {'reporterCode': 818, 'reporterDesc': 'Egypt'},
 {'reporterCode': 894, 'reporterDesc': 'Zambia'},
 {'reporterCode': 51, 'reporterDesc': 'Armenia'},
 {'reporterCode': 340, 'reporterDesc': 'Honduras'},
 {'reporterCode': 466, 'reporterDesc': 'Mali'},
 {'reporterCode': 598, 'reporterDesc': 'Papua New Guinea'},
 {'reporterCode': 20, 'reporterDesc': 'Andorra'},
 {'reporterCode': 108, 'reporterDesc': 'Burundi'},
 {'reporterCode': 116, 'reporterDesc': 'Cambodia'},
 {'reporterCode': 140, 'reporterDesc': 'Central African Rep.'},
 {'reporterCode': 140, 'reporterDesc': 'CENTRAL AFRICAN REPUBLIC'},
 {'reporterCode': 188, 'reporterDesc': 'Costa Rica'},
 {'reporterCode': 204, 'reporterDesc': 'Benin'},
 {'reporterCode': 218, 'reporterDesc': 'Ecuador'},
 {'reporterCode': 231, 'reporterDesc': 'Ethiopia'},
 {'reporterCode': 242, 'reporterDesc': 'Fiji'},
 {'reporterCode': 258, 'reporterDesc': 'French Polynesia'},
 {'reporterCode': 270, 'reporterDesc': 'Gambia'},
 {'reporterCode': 288, 'reporterDesc': 'Ghana'},
 {'reporterCode': 324, 'reporterDesc': 'Guinea'},
 {'reporterCode': 328, 'reporterDesc': 'Guyana'},
 {'reporterCode': 376, 'reporterDesc': 'Israel'},
 {'reporterCode': 414, 'reporterDesc': 'Kuwait'},
 {'reporterCode': 558, 'reporterDesc': 'Nicaragua'},
 {'reporterCode': 591, 'reporterDesc': 'Panama'},
 {'reporterCode': 762, 'reporterDesc': 'Tajikistan'},
 {'reporterCode': 834, 'reporterDesc': 'United Rep. of Tanzania'},
 {'reporterCode': 834, 'reporterDesc': 'TANZANIA, UNITED REPUBLIC OF'},
 {'reporterCode': 834, 'reporterDesc': 'TANZANIA'}, 
 {'reporterCode': 854, 'reporterDesc': 'Burkina Faso'},
 {'reporterCode': 858, 'reporterDesc': 'Uruguay'},
 {'reporterCode': 132, 'reporterDesc': 'Cabo Verde'},
 {'reporterCode': 132, 'reporterDesc': 'CAPE VERDE'},
 {'reporterCode': 48, 'reporterDesc': 'Bahrain'},
 {'reporterCode': 388, 'reporterDesc': 'Jamaica'},
 {'reporterCode': 478, 'reporterDesc': 'Mauritania'},
 {'reporterCode': 12, 'reporterDesc': 'Algeria'},
 {'reporterCode': 28, 'reporterDesc': 'Antigua and Barbuda'},
 {'reporterCode': 540, 'reporterDesc': 'New Caledonia'},
 {'reporterCode': 52, 'reporterDesc': 'Barbados'},
 {'reporterCode': 566, 'reporterDesc': 'Nigeria'},
 {'reporterCode': 800, 'reporterDesc': 'Uganda'},
 {'reporterCode': 426, 'reporterDesc': 'Lesotho'},
 {'reporterCode': 174, 'reporterDesc': 'Comoros'},
 {'reporterCode': 50, 'reporterDesc': 'Bangladesh'},
 {'reporterCode': 352, 'reporterDesc': 'Iceland'},
 {'reporterCode': 454, 'reporterDesc': 'Malawi'},


 {'reporterCode': 662, 'reporterDesc': 'Saint Lucia'},
 {'reporterCode': 214, 'reporterDesc': 'Dominican Rep.'},
 {'reporterCode': 214, 'reporterDesc': 'DOMINICAN REPUBLIC'},
 {'reporterCode': 882, 'reporterDesc': 'Samoa'},
 {'reporterCode': 646, 'reporterDesc': 'Rwanda'},
 {'reporterCode': 96, 'reporterDesc': 'Brunei Darussalam'},
 {'reporterCode': 96, 'reporterDesc': 'BRUNEI'},
 {'reporterCode': 776, 'reporterDesc': 'Tonga'},
 {'reporterCode': 600, 'reporterDesc': 'Paraguay'},
 {'reporterCode': 760, 'reporterDesc': 'Syria'},
 {'reporterCode': 760, 'reporterDesc': 'SYRIAN ARAB REPUBLIC'},
 {'reporterCode': 498, 'reporterDesc': 'Rep. of Moldova'},
 {'reporterCode': 498, 'reporterDesc': 'MOLDOVA, REPUBLIC OF'},
 {'reporterCode': 498, 'reporterDesc': 'MOLDOVA'},
 {'reporterCode': 768, 'reporterDesc': 'Togo'},
 {'reporterCode': 90, 'reporterDesc': 'Solomon Isds'},
 {'reporterCode': 90, 'reporterDesc': 'SOLOMON ISLANDS'},
 {'reporterCode': 690, 'reporterDesc': 'Seychelles'},
 {'reporterCode': 70, 'reporterDesc': 'Bosnia Herzegovina'},
 {'reporterCode': 70, 'reporterDesc': 'BOSNIA-HERZEGOVINA'},
 {'reporterCode': 70, 'reporterDesc': 'BOSNIA AND HERZEGOVINA'},
 {'reporterCode': 500, 'reporterDesc': 'Montserrat'},
 {'reporterCode': 586, 'reporterDesc': 'Pakistan'},
 {'reporterCode': 524, 'reporterDesc': 'Nepal'},
 {'reporterCode': 887, 'reporterDesc': 'Yemen'},
 {'reporterCode': 887, 'reporterDesc': 'YEMEN ARAB REPUBLIC'}, # norte
 {'reporterCode': 887, 'reporterDesc': 'YEMEN, PDR'},
 {'reporterCode': 626, 'reporterDesc': 'Timor-Leste'},
 {'reporterCode': 798, 'reporterDesc': 'Tuvalu'},
 {'reporterCode': 64, 'reporterDesc': 'Bhutan'},
 {'reporterCode': 670, 'reporterDesc': 'Saint Vincent and the Grenadines'},
 {'reporterCode': 184, 'reporterDesc': 'Cook Isds'},
 {'reporterCode': 84, 'reporterDesc': 'Belize'},
 {'reporterCode': 530, 'reporterDesc': 'Netherlands Antilles (...2010)'},
 {'reporterCode': 688, 'reporterDesc': 'Serbia'},
 {'reporterCode': 548, 'reporterDesc': 'Vanuatu'},
 {'reporterCode': 275, 'reporterDesc': 'State of Palestine'},
 {'reporterCode': 434, 'reporterDesc': 'Libya'},
 {'reporterCode': 434, 'reporterDesc': 'LIBYAN ARAB JAMAHIRIYA'},
 {'reporterCode': 178, 'reporterDesc': 'Congo'},
 {'reporterCode': 178, 'reporterDesc': 'CONGO, REP.'},
 {'reporterCode': 24, 'reporterDesc': 'Angola'},
 {'reporterCode': 296, 'reporterDesc': 'Kiribati'},
 {'reporterCode': 499, 'reporterDesc': 'Montenegro'},
 {'reporterCode': 234, 'reporterDesc': 'Faeroe Isds'},
 {'reporterCode': 533, 'reporterDesc': 'Aruba'},
 {'reporterCode': 796, 'reporterDesc': 'Turks and Caicos Isds'},
 {'reporterCode': 104, 'reporterDesc': 'Myanmar'},
 {'reporterCode': 212, 'reporterDesc': 'Dominica'},
 {'reporterCode': 418, 'reporterDesc': "Lao People's Dem. Rep."},
 {'reporterCode': 418, 'reporterDesc': "LAO PEOPLE'S DEMOCRATIC REPUBLIC"},

 {'reporterCode': 304, 'reporterDesc': 'Greenland'},
 {'reporterCode': 368, 'reporterDesc': 'Iraq'},
 
 {'reporterCode': 729, 'reporterDesc': 'Sudan'},
 {'reporterCode': 446, 'reporterDesc': 'China, Macao SAR'},
 {'reporterCode': 308, 'reporterDesc': 'Grenada'},
 {'reporterCode': 585, 'reporterDesc': 'Palau'},
 {'reporterCode': 694, 'reporterDesc': 'Sierra Leone'},
 {'reporterCode': 60, 'reporterDesc': 'Bermuda'},
 {'reporterCode': 4, 'reporterDesc': 'Afghanistan'},
 {'reporterCode': 180, 'reporterDesc': 'Dem. Rep. of the Congo'},
 {'reporterCode': 180, 'reporterDesc': 'CONGO, D.R.'},
 {'reporterCode': 180, 'reporterDesc': 'CONGO, THE DEMOCRATIC REPUBLIC O'},
 {'reporterCode': 860, 'reporterDesc': 'Uzbekistan'},
 {'reporterCode': 136, 'reporterDesc': 'Cayman Isds'},

 {'reporterCode': 430, 'reporterDesc': 'Liberia'},
 {'reporterCode': 891, 'reporterDesc': 'Serbia and montenegro'},
 {'reporterCode': 332, 'reporterDesc': 'HAITI'},
 {'reporterCode': 792, 'reporterDesc': 'TURKEY'},
 {'reporterCode': 890, 'reporterDesc': 'FMR YUGOSLAVIA'},
 {'reporterCode': 890, 'reporterDesc': 'YUGOSLAVIA'},
 {'reporterCode': 200, 'reporterDesc': 'CZECHOSLOVAKIA'},
#  {'reporterCode': , 'reporterDesc': 'FMR RHODESIA NYAS'},
 {'reporterCode': 748, 'reporterDesc': 'SWAZILAND'},
 {'reporterCode': 408, 'reporterDesc': "DEM. PEOPLE'S REP. OF KOREA"},
 {'reporterCode': 408, 'reporterDesc': "KOREA, NORTH"},
 {'reporterCode': 408, 'reporterDesc': "KOREA, DEMOCRATIC PEOPLE'S REPUB"},
 {'reporterCode': 807, 'reporterDesc': "TFYR OF MACEDONIA"},
#  {'reporterCode': , 'reporterDesc': "OTHER AFRICA, NES"},
#  {'reporterCode': , 'reporterDesc': 'OTHER EUROPE, NES'},
 {'reporterCode': 232, 'reporterDesc': "ERITREA"},
 {'reporterCode': 148, 'reporterDesc': "CHAD"},
 {'reporterCode': 226, 'reporterDesc': "EQUATORIAL GUINEA"},
 {'reporterCode': 254, 'reporterDesc': "FRENCH GUIANA"},
 {'reporterCode': 736, 'reporterDesc': "SOUTH SUDAN"},
 {'reporterCode': 706, 'reporterDesc': "SOMALIA"},
#  {'reporterCode': , 'reporterDesc': ""},
 ]
countries_list = [{"partnerCode_import": country["reporterCode"], "Country": country["reporterDesc"].upper()} for country in countries_list]
df_countries_code = pd.json_normalize(countries_list)

df_indicators = pd.merge(df_indicators, df_countries_code, how="left", left_on="Country", right_on="Country")
# df_indicators.rename(columns={"reporterCode": "partnerCode_import"}, inplace=True)
df_indicators

Unnamed: 0,ISO3,Year,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country,partnerCode_import
0,AFG,1995,0.503503,False,0.496497,0.861229,False,0.138771,0.704018,False,0.295982,0.516028,False,0.603153,False,0.383710,True,False,0.976884,0.036459,0.575839,AFGHANISTAN,4.0
1,ALB,1995,0.606695,False,0.393305,0.616482,False,0.383518,0.774301,False,0.225699,0.484339,False,0.509650,False,0.474276,False,False,0.461108,0.033904,0.568908,ALBANIA,8.0
2,DZA,1995,0.583375,False,0.416625,0.707108,False,0.292892,0.805221,False,0.194779,0.411816,False,0.467930,False,0.157379,False,False,0.819686,0.010073,0.464725,ALGERIA,12.0
3,AND,1995,0.589592,True,0.410408,0.498106,True,0.501894,0.843236,False,0.156764,0.477737,True,0.527740,True,0.383710,True,True,0.366254,0.023957,0.536915,ANDORRA,20.0
4,AGO,1995,0.710943,False,0.289057,0.798282,False,0.201718,0.916844,False,0.083156,0.544357,False,0.648126,False,0.326570,False,False,0.888430,0.059952,0.625606,ANGOLA,24.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4987,VEN,2020,0.888756,False,0.111244,0.872393,False,0.127607,0.668596,False,0.331404,0.351461,False,0.604549,False,0.208577,False,False,0.726031,0.022974,0.533178,"VENEZUELA, BOLIVARIAN REPUBLIC O",862.0
4988,VNM,2020,0.509260,False,0.490740,0.531500,False,0.468500,0.683830,False,0.316170,0.555078,False,0.433713,False,0.562195,False,False,0.399939,0.025052,0.540927,VIET NAM,704.0
4989,YEM,2020,0.620414,False,0.379586,0.879826,False,0.120174,0.756106,False,0.243894,0.570795,False,0.611543,False,0.340751,False,False,0.986514,0.049091,0.605110,YEMEN,887.0
4990,ZMB,2020,0.586171,False,0.413829,0.600859,False,0.399141,0.849755,False,0.150245,0.433906,False,0.594931,False,0.560437,False,False,0.412301,0.043299,0.592580,ZAMBIA,894.0


In [12]:
# DATA with no reporter code
df_indicators[df_indicators["partnerCode_import"].isna()]["Country"].unique()

array(['DJIBOUTI', 'GUINEA-BISSAU', 'LIECHTENSTEIN', 'MALDIVES',
       'MARSHALL ISLANDS', 'MICRONESIA, FEDERATED STATES OF', 'MONACO',
       'NAURU', 'SAN MARINO', 'SAO TOME AND PRINCIPE'], dtype=object)

## Join indicators (EGSEHI) with imports consolidated file

In [13]:
final_imports_df = pd.read_csv("../data/imports_consolidated/imports_consolidated.csv")

# Lets select "important" columns
# columns = [
#     "period", "reporterCode", "reporterISO", "reporterDesc", "flowDesc",
#     "partnerCode", "partnerISO", "partnerDesc", "classificationSearchCode",
#     "cmdCode", "cmdDesc", "qty", "netWgt", 
#     "qtyUnitAbbr", "cifvalue", "primaryValue"
# ]
columns = [
    "period", 
    "reporterCode", "reporterDesc",
    "partnerCode", "partnerISO", "partnerDesc",
    "cmdCode", "qty", "netWgt", 
    "qtyUnitAbbr", "cifvalue", "primaryValue", 
    "netWgt_total_import", "cifvalue_total_import", "primaryValue_total_import"
]
final_imports_df = final_imports_df[columns]
final_imports_df.rename(
    columns={
        "period": "Year",
        "partnerCode": "partnerCode_import",
        "partnerISO": "partnerISO_import",
        "partnerDesc": "partnerDesc_import",
        "qty": "qty_import",
        "netWgt": "netWgt_import",
        "qtyUnitAbbr": "qtyUnitAbbr_import",
        "cifvalue": "cifvalue_import",
        "primaryValue": "primaryValue_import"
    }, inplace=True)

# Change in comtrade_imports file
final_imports_df["reporterDesc"] = final_imports_df["reporterDesc"].str.upper()

final_imports_df

Unnamed: 0,Year,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,cmdCode,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import
0,2000,12,ALGERIA,528,NLD,Netherlands,2606,100400.0,100400.0,kg,37768.000,37768.000,364790.0,112483.000,112483.000
1,2000,12,ALGERIA,156,CHN,China,2606,263140.0,263140.0,kg,74064.000,74064.000,364790.0,112483.000,112483.000
2,2000,12,ALGERIA,0,W00,World,2606,364790.0,364790.0,kg,112483.000,112483.000,364790.0,112483.000,112483.000
3,2000,12,ALGERIA,724,ESP,Spain,2606,1250.0,1250.0,kg,651.000,651.000,364790.0,112483.000,112483.000
4,2000,20,ANDORRA,724,ESP,Spain,2606,42.0,42.0,kg,50.000,50.000,42.0,50.000,50.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
405796,2020,894,ZAMBIA,0,W00,World,261510,63000.0,63000.0,kg,26944.376,26944.376,63000.0,26944.376,26944.376
405797,2020,894,ZAMBIA,380,ITA,Italy,261510,30000.0,30000.0,kg,9204.315,9204.315,63000.0,26944.376,26944.376
405798,2020,894,ZAMBIA,710,ZAF,South Africa,261510,33000.0,33000.0,kg,17740.061,17740.061,63000.0,26944.376,26944.376
405799,2020,716,ZIMBABWE,710,ZAF,South Africa,261510,0.0,0.0,,845145.046,845145.046,0.0,845145.046,845145.046


In [14]:
imports_egsehi_df = pd.merge(final_imports_df, df_indicators, how="left", left_on=["partnerCode_import", "Year"], right_on=["partnerCode_import", "Year"])
imports_egsehi_df

Unnamed: 0,Year,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,cmdCode,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country
0,2000,12,ALGERIA,528,NLD,Netherlands,2606,100400.0,100400.0,kg,37768.000,37768.000,364790.0,112483.000,112483.000,NLD,0.531779,False,0.468221,0.103672,False,0.896328,0.455831,False,0.544169,0.298018,False,0.412105,False,0.554108,False,False,0.000000,0.001710,0.345812,NETHERLANDS
1,2000,12,ALGERIA,156,CHN,China,2606,263140.0,263140.0,kg,74064.000,74064.000,364790.0,112483.000,112483.000,CHN,0.554322,False,0.445678,0.558184,False,0.441816,0.798681,False,0.201319,0.539247,False,0.355488,False,0.298814,False,False,0.468786,0.014156,0.491837,CHINA
2,2000,12,ALGERIA,0,W00,World,2606,364790.0,364790.0,kg,112483.000,112483.000,364790.0,112483.000,112483.000,,,,,,,,,,,,,,,,,,,,,
3,2000,12,ALGERIA,724,ESP,Spain,2606,1250.0,1250.0,kg,651.000,651.000,364790.0,112483.000,112483.000,ESP,0.571520,False,0.428480,0.262886,False,0.737114,0.609579,False,0.390421,0.287537,False,0.366877,False,0.337134,False,False,0.308989,0.003257,0.385012,SPAIN
4,2000,20,ANDORRA,724,ESP,Spain,2606,42.0,42.0,kg,50.000,50.000,42.0,50.000,50.000,ESP,0.571520,False,0.428480,0.262886,False,0.737114,0.609579,False,0.390421,0.287537,False,0.366877,False,0.337134,False,False,0.308989,0.003257,0.385012,SPAIN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
405796,2020,894,ZAMBIA,0,W00,World,261510,63000.0,63000.0,kg,26944.376,26944.376,63000.0,26944.376,26944.376,,,,,,,,,,,,,,,,,,,,,
405797,2020,894,ZAMBIA,380,ITA,Italy,261510,30000.0,30000.0,kg,9204.315,9204.315,63000.0,26944.376,26944.376,ITA,0.594452,False,0.405548,0.408415,False,0.591585,0.429033,False,0.570967,0.336803,False,0.395838,False,0.386352,False,False,0.284013,0.005365,0.418406,ITALY
405798,2020,894,ZAMBIA,710,ZAF,South Africa,261510,33000.0,33000.0,kg,17740.061,17740.061,63000.0,26944.376,26944.376,ZAF,0.618231,False,0.381769,0.496676,False,0.503324,0.793097,False,0.206903,0.482972,False,0.527543,False,0.248088,False,False,0.436834,0.015393,0.498757,SOUTH AFRICA
405799,2020,716,ZIMBABWE,710,ZAF,South Africa,261510,0.0,0.0,,845145.046,845145.046,0.0,845145.046,845145.046,ZAF,0.618231,False,0.381769,0.496676,False,0.503324,0.793097,False,0.206903,0.482972,False,0.527543,False,0.248088,False,False,0.436834,0.015393,0.498757,SOUTH AFRICA


In [15]:

final_imports_df[(final_imports_df["reporterDesc"] == "CANADA") & (final_imports_df["Year"] == 2019) & (final_imports_df["cmdCode"] == 283691)]

Unnamed: 0,Year,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,cmdCode,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import
373823,2019,124,CANADA,0,W00,World,283691,1978337.172,1978337.172,kg,0.0,8655253.446,1978337.172,0.0,8655253.446
373824,2019,124,CANADA,32,ARG,Argentina,283691,76542.0,76542.0,kg,0.0,334873.307,1978337.172,0.0,8655253.446
373825,2019,124,CANADA,40,AUT,Austria,283691,159423.0,159423.0,kg,0.0,697478.375,1978337.172,0.0,8655253.446
373826,2019,124,CANADA,56,BEL,Belgium,283691,6.0,6.0,kg,0.0,27.886,1978337.172,0.0,8655253.446
373827,2019,124,CANADA,152,CHL,Chile,283691,1349466.172,1349466.172,kg,0.0,5903941.779,1978337.172,0.0,8655253.446
373828,2019,124,CANADA,156,CHN,China,283691,162452.0,162452.0,kg,0.0,710730.775,1978337.172,0.0,8655253.446
373829,2019,124,CANADA,276,DEU,Germany,283691,50305.0,50305.0,kg,0.0,220083.74,1978337.172,0.0,8655253.446
373830,2019,124,CANADA,392,JPN,Japan,283691,2.0,2.0,kg,0.0,10.551,1978337.172,0.0,8655253.446
373831,2019,124,CANADA,566,NGA,Nigeria,283691,1.0,1.0,kg,0.0,6.029,1978337.172,0.0,8655253.446
373832,2019,124,CANADA,699,IND,India,283691,781.0,781.0,kg,0.0,3415.598,1978337.172,0.0,8655253.446


In [65]:
imports_egsehi_df[(imports_egsehi_df["reporterDesc"] == "CANADA") & (imports_egsehi_df["Year"] == 2019) & (imports_egsehi_df["cmdCode"] == 283691)]

Unnamed: 0,Year,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,cmdCode,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country
373823,2019,124,CANADA,0,W00,World,283691,1978337.172,1978337.172,kg,0.0,8655253.446,1978337.172,0.0,8655253.446,,,,,,,,,,,,,,,,,,,,,
373824,2019,124,CANADA,32,ARG,Argentina,283691,76542.0,76542.0,kg,0.0,334873.307,1978337.172,0.0,8655253.446,ARG,0.713153,False,0.286847,0.547761,False,0.452239,0.603575,False,0.396425,0.437942,False,0.577572,False,0.291193,False,False,0.392448,0.017366,0.508883,ARGENTINA
373825,2019,124,CANADA,40,AUT,Austria,283691,159423.0,159423.0,kg,0.0,697478.375,1978337.172,0.0,8655253.446,AUT,0.432543,False,0.567457,0.201731,False,0.798269,0.285687,False,0.714313,0.281332,False,0.342145,False,0.351418,True,False,0.163839,0.000843,0.307367,AUSTRIA
373826,2019,124,CANADA,56,BEL,Belgium,283691,6.0,6.0,kg,0.0,27.886,1978337.172,0.0,8655253.446,BEL,0.513159,False,0.486841,0.259039,False,0.740961,0.447912,False,0.552088,0.31413,False,0.47878,False,0.529744,False,False,0.264283,0.004744,0.409908,BELGIUM
373827,2019,124,CANADA,152,CHL,Chile,283691,1349466.172,1349466.172,kg,0.0,5903941.779,1978337.172,0.0,8655253.446,CHL,0.460639,False,0.539361,0.320946,False,0.679054,0.592538,False,0.407462,0.409332,False,0.537044,False,0.280798,False,False,0.369914,0.005407,0.418952,CHILE
373828,2019,124,CANADA,156,CHN,China,283691,162452.0,162452.0,kg,0.0,710730.775,1978337.172,0.0,8655253.446,CHN,0.425325,False,0.574675,0.547667,False,0.452333,0.432438,False,0.567562,0.515325,False,0.427117,False,0.302432,False,False,0.429917,0.006705,0.434246,CHINA
373829,2019,124,CANADA,276,DEU,Germany,283691,50305.0,50305.0,kg,0.0,220083.74,1978337.172,0.0,8655253.446,DEU,0.422296,False,0.577704,0.197733,False,0.802267,0.29418,False,0.70582,0.279286,False,0.376512,False,0.354964,False,False,0.241488,0.000917,0.311688,GERMANY
373830,2019,124,CANADA,392,JPN,Japan,283691,2.0,2.0,kg,0.0,10.551,1978337.172,0.0,8655253.446,JPN,0.366393,False,0.633607,0.2263,False,0.7737,0.332607,False,0.667393,0.469642,False,0.524999,False,0.436417,False,False,0.137741,0.002968,0.379081,JAPAN
373831,2019,124,CANADA,566,NGA,Nigeria,283691,1.0,1.0,kg,0.0,6.029,1978337.172,0.0,8655253.446,NGA,0.760098,False,0.239902,0.725792,False,0.274208,0.796336,False,0.203664,0.430907,False,0.620786,False,0.274442,False,False,0.807115,0.032252,0.56419,NIGERIA
373832,2019,124,CANADA,699,IND,India,283691,781.0,781.0,kg,0.0,3415.598,1978337.172,0.0,8655253.446,IND,0.666594,False,0.333406,0.551404,False,0.448596,0.694129,False,0.305871,0.548283,False,0.461657,False,0.303896,False,False,0.546492,0.019626,0.519362,INDIA


## Join Production and (Imports AND Indicators)

### Production

In [15]:
df_production = pd.read_csv("../data/production/Production_one_file_2.csv")
df_production["Year"] = pd.to_numeric(df_production["Year"], downcast="integer")
df_production["Country"] = df_production["Country"].str.upper()
df_production["Product"] = df_production["Product"].str.upper()
df_production.rename(columns={"Value": "Domestic Production Value (P_AC)"}, inplace=True)
del df_production["Country"]
df_production

Unnamed: 0,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode
0,IRON ORE (FE-CONT.),2601.0,org,1984,4.395020e+08,439502.0,kg,8.0
1,IRON ORE (FE-CONT.),2601.0,org,1984,2.000000e+09,2000000.0,kg,12.0
2,IRON ORE (FE-CONT.),2601.0,org,1984,3.455760e+08,345576.0,kg,32.0
3,IRON ORE (FE-CONT.),2601.0,org,1984,5.688500e+10,56885000.0,kg,36.0
4,IRON ORE (FE-CONT.),2601.0,org,1984,1.137872e+09,1137872.0,kg,40.0
...,...,...,...,...,...,...,...,...
96380,URANIUM (U3O8-CONT.),261210.0,org,2020,,,kg,724.0
96381,URANIUM (U3O8-CONT.),261210.0,org,2020,8.770000e+05,877.0,kg,804.0
96382,URANIUM (U3O8-CONT.),261210.0,org,2020,7.000000e+03,7.0,kg,842.0
96383,URANIUM (U3O8-CONT.),261210.0,org,2020,4.127000e+06,4127.0,kg,860.0


In [61]:
# del df_indicators["Country"]
df_production_imports_egsehi = pd.merge(df_production, imports_egsehi_df, how="outer", left_on=["Year", "reporterCode", "cmdCode"], right_on=["Year", "reporterCode", "cmdCode"])
df_production_imports_egsehi.loc[df_production_imports_egsehi["Source"].isna(), "Source"] = 'org'
df_production_imports_egsehi.loc[df_production_imports_egsehi["qtyUnitAbbr"].isna(), "qtyUnitAbbr"] = df_production_imports_egsehi[df_production_imports_egsehi["qtyUnitAbbr"].isna()]['qtyUnitAbbr_import']
df_production_imports_egsehi

Unnamed: 0,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country
0,IRON ORE (FE-CONT.),2601.0,org,1984,4.395020e+08,439502.0,kg,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,IRON ORE (FE-CONT.),2601.0,org,1984,2.000000e+09,2000000.0,kg,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,IRON ORE (FE-CONT.),2601.0,org,1984,3.455760e+08,345576.0,kg,32.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,IRON ORE (FE-CONT.),2601.0,org,1984,5.688500e+10,56885000.0,kg,36.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,IRON ORE (FE-CONT.),2601.0,org,1984,1.137872e+09,1137872.0,kg,40.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
484451,,261510.0,org,2020,,,kg,894.0,ZAMBIA,0.0,W00,World,63000.0,63000.0,kg,26944.376,26944.376,63000.0,26944.376,26944.376,,,,,,,,,,,,,,,,,,,,,
484452,,261510.0,org,2020,,,kg,894.0,ZAMBIA,380.0,ITA,Italy,30000.0,30000.0,kg,9204.315,9204.315,63000.0,26944.376,26944.376,ITA,0.594452,False,0.405548,0.408415,False,0.591585,0.429033,False,0.570967,0.336803,False,0.395838,False,0.386352,False,False,0.284013,0.005365,0.418406,ITALY
484453,,261510.0,org,2020,,,kg,894.0,ZAMBIA,710.0,ZAF,South Africa,33000.0,33000.0,kg,17740.061,17740.061,63000.0,26944.376,26944.376,ZAF,0.618231,False,0.381769,0.496676,False,0.503324,0.793097,False,0.206903,0.482972,False,0.527543,False,0.248088,False,False,0.436834,0.015393,0.498757,SOUTH AFRICA
484454,,261510.0,org,2020,,,,716.0,ZIMBABWE,710.0,ZAF,South Africa,0.0,0.0,,845145.046,845145.046,0.0,845145.046,845145.046,ZAF,0.618231,False,0.381769,0.496676,False,0.503324,0.793097,False,0.206903,0.482972,False,0.527543,False,0.248088,False,False,0.436834,0.015393,0.498757,SOUTH AFRICA


In [45]:
df_production[df_production["cmdCode"] == 810411]

Unnamed: 0,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode


In [50]:
imports_egsehi_df[imports_egsehi_df["cmdCode"] == 810411]

Unnamed: 0,Year,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,cmdCode,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country
8310,2000,32,ARGENTINA,380,ITA,Italy,810411,16.000,16.000,kg,36.00,36.00,1061410.000,2149456.00,2149456.00,ITA,0.632931,False,0.367069,0.330560,False,0.669440,0.548051,False,0.451949,0.344865,False,0.387665,False,0.396503,False,False,0.200470,0.006078,0.427199,ITALY
8311,2000,32,ARGENTINA,156,CHN,China,810411,473229.000,473229.000,kg,831978.00,831978.00,1061410.000,2149456.00,2149456.00,CHN,0.554322,False,0.445678,0.558184,False,0.441816,0.798681,False,0.201319,0.539247,False,0.355488,False,0.298814,False,False,0.468786,0.014156,0.491837,CHINA
8312,2000,32,ARGENTINA,899,_X,"Areas, nes",810411,438068.000,438068.000,kg,977793.00,977793.00,1061410.000,2149456.00,2149456.00,,,,,,,,,,,,,,,,,,,,,
8313,2000,32,ARGENTINA,40,AUT,Austria,810411,10000.000,10000.000,kg,13913.00,13913.00,1061410.000,2149456.00,2149456.00,AUT,0.541398,False,0.458602,0.190698,False,0.809302,0.395252,False,0.604748,0.290711,False,0.345209,False,0.379297,True,False,0.222907,0.001553,0.340311,AUSTRIA
8314,2000,32,ARGENTINA,376,ISR,Israel,810411,20254.000,20254.000,kg,48937.00,48937.00,1061410.000,2149456.00,2149456.00,ISR,0.583569,False,0.416431,0.373849,False,0.626151,0.465082,False,0.534918,0.390940,False,0.419780,False,0.324151,False,False,0.667037,0.005398,0.418825,ISRAEL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
399221,2020,860,UZBEKISTAN,792,TUR,Türkiye,810411,19990.000,19990.000,kg,47468.00,47468.00,192540.000,495462.00,495462.00,TUR,0.512772,False,0.487228,0.583057,False,0.416943,0.451959,False,0.548041,0.446285,False,0.426331,False,0.304115,False,False,0.651507,0.007819,0.445508,TURKEY
399222,2020,704,VIET NAM,0,W00,World,810411,2117703.536,2117703.536,kg,4855659.39,4855659.39,2117703.536,4855659.39,4855659.39,,,,,,,,,,,,,,,,,,,,,
399223,2020,704,VIET NAM,156,CHN,China,810411,2040942.218,2040942.218,kg,4679654.20,4679654.20,2117703.536,4855659.39,4855659.39,CHN,0.405026,False,0.594974,0.517543,False,0.482457,0.418528,False,0.581472,0.515749,False,0.430819,False,0.302432,False,False,0.449072,0.005895,0.425030,CHINA
399224,2020,704,VIET NAM,392,JPN,Japan,810411,504.167,504.167,kg,1156.00,1156.00,2117703.536,4855659.39,4855659.39,JPN,0.374739,False,0.625261,0.223700,False,0.776300,0.332986,False,0.667014,0.470386,False,0.526285,False,0.436417,False,False,0.148827,0.003016,0.380101,JAPAN


## Join Production and EGSEHI (NOT USEFUL, CONFUSION)

In [229]:
# del df_indicators["Country"]
# df_production_egsehi = pd.merge(df_production, df_indicators, how="left", left_on=["Year", "reporterCode"], right_on=["Year", "reporterCode"])
# # del df_production_egsehi["Name"]

In [203]:
# df_production_egsehi

Unnamed: 0,Country,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root
0,ALBANIA,IRON ORE (FE-CONT.),2601.0,org,1984,4.395020e+08,439502.0,kg,8.0,,,,,,,,,,,,,,,,,,,,
1,ALGERIA,IRON ORE (FE-CONT.),2601.0,org,1984,2.000000e+09,2000000.0,kg,12.0,,,,,,,,,,,,,,,,,,,,
2,ARGENTINA,IRON ORE (FE-CONT.),2601.0,org,1984,3.455760e+08,345576.0,kg,32.0,,,,,,,,,,,,,,,,,,,,
3,AUSTRALIA,IRON ORE (FE-CONT.),2601.0,org,1984,5.688500e+10,56885000.0,kg,36.0,,,,,,,,,,,,,,,,,,,,
4,AUSTRIA,IRON ORE (FE-CONT.),2601.0,org,1984,1.137872e+09,1137872.0,kg,40.0,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105272,SPAIN,URANIUM (U3O8-CONT.),261210.0,org,2020,,,kg,724.0,ESP,0.501205,False,0.498795,0.353548,False,0.646452,0.537246,False,0.462754,0.281208,False,0.373402,False,0.335442,False,False,0.294127,0.003353,0.386880
105273,UKRAINE,URANIUM (U3O8-CONT.),261210.0,org,2020,8.770000e+05,877.0,kg,804.0,UKR,0.578567,False,0.421433,0.636512,False,0.363488,0.503279,False,0.496721,0.426281,False,0.469000,False,0.292860,False,False,0.646157,0.010852,0.470525
105274,UNITED STATES,URANIUM (U3O8-CONT.),261210.0,org,2020,7.000000e+03,7.0,kg,842.0,USA,0.386435,False,0.613565,0.306741,False,0.693259,0.346982,False,0.653018,0.424584,False,0.453989,False,0.275346,False,False,0.388336,0.002183,0.360170
105275,UZBEKISTAN,URANIUM (U3O8-CONT.),261210.0,org,2020,4.127000e+06,4127.0,kg,860.0,UZB,0.473302,False,0.526698,0.671251,False,0.328749,0.697873,False,0.302127,0.506797,False,0.422226,False,0.267437,False,False,0.481922,0.012688,0.482948


## Join HHI table with Production AND (Imports AND Indicators)

In [62]:
df_hhi_master = pd.read_csv("../data/hhi_data/df_hhi_master.csv")
# del df_hhi_master['Country']
# del df_hhi_master['Value (Ton)']
# del df_hhi_master['reporterDesc']
# del df_hhi_master['Product']
df_hhi_master

Unnamed: 0,Product_x,cmdCode,Year,qtyUnitAbbr,Source,HHI_production,HHI_production_scaled,Product_y,HHI_exports,HHI_exports_scaled
0,IRON ORE (FE-CONT.),2601.0,2000,kg,org,0.120238,0.075656,IRON,0.383618,0.342631
1,IRON ORE (FE-CONT.),2601.0,2001,kg,org,0.127200,0.083260,IRON,0.336855,0.291013
2,IRON ORE (FE-CONT.),2601.0,2002,kg,org,0.124491,0.080301,IRON,0.271638,0.219024
3,IRON ORE (FE-CONT.),2601.0,2003,kg,org,0.127726,0.083835,IRON,0.289253,0.238467
4,IRON ORE (FE-CONT.),2601.0,2004,kg,org,0.132830,0.089409,IRON,0.301543,0.252034
...,...,...,...,...,...,...,...,...,...,...
1445,LITHIUM-ION BATTERIES,850760.0,2019,kg,org,,,LITHIUM-ION BATTERIES,0.265269,0.211994
1446,MAGNESIUM,810411.0,2019,kg,org,,,MAGNESIUM,0.802419,0.804918
1447,LITHIUM-ION BATTERIES,850760.0,2020,u,org,,,LITHIUM-ION BATTERIES,0.352185,0.307934
1448,LITHIUM-ION BATTERIES,850760.0,2020,kg,org,,,LITHIUM-ION BATTERIES,0.352185,0.307934


In [52]:
df_hhi_master[df_hhi_master["Source"].isna()]

Unnamed: 0,Product_x,cmdCode,Year,qtyUnitAbbr,Source,HHI_production,HHI_production_scaled,Product_y,HHI_exports,HHI_exports_scaled


In [32]:
df_hhi_master[df_hhi_master["cmdCode"] == 810411]

Unnamed: 0,Product_x,cmdCode,Year,qtyUnitAbbr,Source,HHI_production,HHI_production_scaled,Product_y,HHI_exports,HHI_exports_scaled
1408,MAGNESIUM,810411.0,2000,kg,org,,,MAGNESIUM,0.274901,0.222625
1410,MAGNESIUM,810411.0,2001,kg,org,,,MAGNESIUM,0.277231,0.225197
1412,MAGNESIUM,810411.0,2002,kg,org,,,MAGNESIUM,0.308439,0.259645
1414,MAGNESIUM,810411.0,2003,kg,org,,,MAGNESIUM,0.468259,0.436061
1416,MAGNESIUM,810411.0,2004,kg,org,,,MAGNESIUM,0.552427,0.528968
1417,MAGNESIUM,810411.0,2005,kg,org,,,MAGNESIUM,0.504034,0.47555
1418,MAGNESIUM,810411.0,2006,kg,org,,,MAGNESIUM,0.494587,0.465122
1419,MAGNESIUM,810411.0,2007,kg,org,,,MAGNESIUM,0.580603,0.56007
1420,MAGNESIUM,810411.0,2008,kg,org,,,MAGNESIUM,0.569467,0.547777
1421,MAGNESIUM,810411.0,2009,kg,org,,,MAGNESIUM,0.508803,0.480814


In [68]:
# # Join HHI table with (production JOIN Imports JOIN Indicators) PREVIOUS APPROACH (CONSIDERS COUNTRY BUT WAS NOT CORRECT)
# df_production_imports_egsehi_hhi = pd.merge(df_production_imports_egsehi, df_hhi_master, how="left", 
#     left_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"], 
#     right_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"])
# df_production_imports_egsehi_hhi


# Join HHI table with (production JOIN Imports JOIN Indicators)
df_production_imports_egsehi_hhi = pd.merge(df_production_imports_egsehi, df_hhi_master, how="left", 
    left_on=["Year", "cmdCode", "Source", "qtyUnitAbbr"], 
    right_on=["Year", "cmdCode", "Source", "qtyUnitAbbr"])

df_production_imports_egsehi_hhi.loc[df_production_imports_egsehi_hhi["Product"].isna(), "Product"] = df_production_imports_egsehi_hhi[df_production_imports_egsehi_hhi["Product"].isna()]['Product_x']

df_production_imports_egsehi_hhi

Unnamed: 0,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country,Product_x,HHI_production,HHI_production_scaled,Product_y,HHI_exports,HHI_exports_scaled
0,IRON ORE (FE-CONT.),2601.0,org,1984,4.395020e+08,439502.0,kg,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,IRON ORE (FE-CONT.),2601.0,org,1984,2.000000e+09,2000000.0,kg,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,IRON ORE (FE-CONT.),2601.0,org,1984,3.455760e+08,345576.0,kg,32.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,IRON ORE (FE-CONT.),2601.0,org,1984,5.688500e+10,56885000.0,kg,36.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,IRON ORE (FE-CONT.),2601.0,org,1984,1.137872e+09,1137872.0,kg,40.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1342931,ZIRCON (CONC.),261510.0,org,2020,,,kg,894.0,ZAMBIA,0.0,W00,World,63000.0,63000.0,kg,26944.376,26944.376,63000.0,26944.376,26944.376,,,,,,,,,,,,,,,,,,,,,,ZIRCON (CONC.),0.187614,0.149247,ZIRCONIUM,0.121045,0.052793
1342932,ZIRCON (CONC.),261510.0,org,2020,,,kg,894.0,ZAMBIA,380.0,ITA,Italy,30000.0,30000.0,kg,9204.315,9204.315,63000.0,26944.376,26944.376,ITA,0.594452,False,0.405548,0.408415,False,0.591585,0.429033,False,0.570967,0.336803,False,0.395838,False,0.386352,False,False,0.284013,0.005365,0.418406,ITALY,ZIRCON (CONC.),0.187614,0.149247,ZIRCONIUM,0.121045,0.052793
1342933,ZIRCON (CONC.),261510.0,org,2020,,,kg,894.0,ZAMBIA,710.0,ZAF,South Africa,33000.0,33000.0,kg,17740.061,17740.061,63000.0,26944.376,26944.376,ZAF,0.618231,False,0.381769,0.496676,False,0.503324,0.793097,False,0.206903,0.482972,False,0.527543,False,0.248088,False,False,0.436834,0.015393,0.498757,SOUTH AFRICA,ZIRCON (CONC.),0.187614,0.149247,ZIRCONIUM,0.121045,0.052793
1342934,,261510.0,org,2020,,,,716.0,ZIMBABWE,710.0,ZAF,South Africa,0.0,0.0,,845145.046,845145.046,0.0,845145.046,845145.046,ZAF,0.618231,False,0.381769,0.496676,False,0.503324,0.793097,False,0.206903,0.482972,False,0.527543,False,0.248088,False,False,0.436834,0.015393,0.498757,SOUTH AFRICA,,,,,,


In [64]:
df_production_imports_egsehi[df_production_imports_egsehi["cmdCode"] == 810411]

Unnamed: 0,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country
246191,,810411.0,org,2000,,,kg,32.0,ARGENTINA,380.0,ITA,Italy,16.000,16.000,kg,36.00,36.00,1061410.000,2149456.00,2149456.00,ITA,0.632931,False,0.367069,0.330560,False,0.669440,0.548051,False,0.451949,0.344865,False,0.387665,False,0.396503,False,False,0.200470,0.006078,0.427199,ITALY
246192,,810411.0,org,2000,,,kg,32.0,ARGENTINA,156.0,CHN,China,473229.000,473229.000,kg,831978.00,831978.00,1061410.000,2149456.00,2149456.00,CHN,0.554322,False,0.445678,0.558184,False,0.441816,0.798681,False,0.201319,0.539247,False,0.355488,False,0.298814,False,False,0.468786,0.014156,0.491837,CHINA
246193,,810411.0,org,2000,,,kg,32.0,ARGENTINA,899.0,_X,"Areas, nes",438068.000,438068.000,kg,977793.00,977793.00,1061410.000,2149456.00,2149456.00,,,,,,,,,,,,,,,,,,,,,
246194,,810411.0,org,2000,,,kg,32.0,ARGENTINA,40.0,AUT,Austria,10000.000,10000.000,kg,13913.00,13913.00,1061410.000,2149456.00,2149456.00,AUT,0.541398,False,0.458602,0.190698,False,0.809302,0.395252,False,0.604748,0.290711,False,0.345209,False,0.379297,True,False,0.222907,0.001553,0.340311,AUSTRIA
246195,,810411.0,org,2000,,,kg,32.0,ARGENTINA,376.0,ISR,Israel,20254.000,20254.000,kg,48937.00,48937.00,1061410.000,2149456.00,2149456.00,ISR,0.583569,False,0.416431,0.373849,False,0.626151,0.465082,False,0.534918,0.390940,False,0.419780,False,0.324151,False,False,0.667037,0.005398,0.418825,ISRAEL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
481040,,810411.0,org,2020,,,kg,860.0,UZBEKISTAN,792.0,TUR,Türkiye,19990.000,19990.000,kg,47468.00,47468.00,192540.000,495462.00,495462.00,TUR,0.512772,False,0.487228,0.583057,False,0.416943,0.451959,False,0.548041,0.446285,False,0.426331,False,0.304115,False,False,0.651507,0.007819,0.445508,TURKEY
481041,,810411.0,org,2020,,,kg,704.0,VIET NAM,0.0,W00,World,2117703.536,2117703.536,kg,4855659.39,4855659.39,2117703.536,4855659.39,4855659.39,,,,,,,,,,,,,,,,,,,,,
481042,,810411.0,org,2020,,,kg,704.0,VIET NAM,156.0,CHN,China,2040942.218,2040942.218,kg,4679654.20,4679654.20,2117703.536,4855659.39,4855659.39,CHN,0.405026,False,0.594974,0.517543,False,0.482457,0.418528,False,0.581472,0.515749,False,0.430819,False,0.302432,False,False,0.449072,0.005895,0.425030,CHINA
481043,,810411.0,org,2020,,,kg,704.0,VIET NAM,392.0,JPN,Japan,504.167,504.167,kg,1156.00,1156.00,2117703.536,4855659.39,4855659.39,JPN,0.374739,False,0.625261,0.223700,False,0.776300,0.332986,False,0.667014,0.470386,False,0.526285,False,0.436417,False,False,0.148827,0.003016,0.380101,JAPAN


In [54]:
df_hhi_master[df_hhi_master["cmdCode"] == 810411]

Unnamed: 0,Product_x,cmdCode,Year,qtyUnitAbbr,Source,HHI_production,HHI_production_scaled,Product_y,HHI_exports,HHI_exports_scaled
1408,MAGNESIUM,810411.0,2000,kg,org,,,MAGNESIUM,0.274901,0.222625
1410,MAGNESIUM,810411.0,2001,kg,org,,,MAGNESIUM,0.277231,0.225197
1412,MAGNESIUM,810411.0,2002,kg,org,,,MAGNESIUM,0.308439,0.259645
1414,MAGNESIUM,810411.0,2003,kg,org,,,MAGNESIUM,0.468259,0.436061
1416,MAGNESIUM,810411.0,2004,kg,org,,,MAGNESIUM,0.552427,0.528968
1417,MAGNESIUM,810411.0,2005,kg,org,,,MAGNESIUM,0.504034,0.47555
1418,MAGNESIUM,810411.0,2006,kg,org,,,MAGNESIUM,0.494587,0.465122
1419,MAGNESIUM,810411.0,2007,kg,org,,,MAGNESIUM,0.580603,0.56007
1420,MAGNESIUM,810411.0,2008,kg,org,,,MAGNESIUM,0.569467,0.547777
1421,MAGNESIUM,810411.0,2009,kg,org,,,MAGNESIUM,0.508803,0.480814


In [69]:
df_production_imports_egsehi_hhi[df_production_imports_egsehi_hhi["cmdCode"] == 810411]

Unnamed: 0,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country,Product_x,HHI_production,HHI_production_scaled,Product_y,HHI_exports,HHI_exports_scaled
1104671,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,380.0,ITA,Italy,16.000,16.000,kg,36.00,36.00,1061410.000,2149456.00,2149456.00,ITA,0.632931,False,0.367069,0.330560,False,0.669440,0.548051,False,0.451949,0.344865,False,0.387665,False,0.396503,False,False,0.200470,0.006078,0.427199,ITALY,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104672,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,156.0,CHN,China,473229.000,473229.000,kg,831978.00,831978.00,1061410.000,2149456.00,2149456.00,CHN,0.554322,False,0.445678,0.558184,False,0.441816,0.798681,False,0.201319,0.539247,False,0.355488,False,0.298814,False,False,0.468786,0.014156,0.491837,CHINA,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104673,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,899.0,_X,"Areas, nes",438068.000,438068.000,kg,977793.00,977793.00,1061410.000,2149456.00,2149456.00,,,,,,,,,,,,,,,,,,,,,,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104674,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,40.0,AUT,Austria,10000.000,10000.000,kg,13913.00,13913.00,1061410.000,2149456.00,2149456.00,AUT,0.541398,False,0.458602,0.190698,False,0.809302,0.395252,False,0.604748,0.290711,False,0.345209,False,0.379297,True,False,0.222907,0.001553,0.340311,AUSTRIA,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104675,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,376.0,ISR,Israel,20254.000,20254.000,kg,48937.00,48937.00,1061410.000,2149456.00,2149456.00,ISR,0.583569,False,0.416431,0.373849,False,0.626151,0.465082,False,0.534918,0.390940,False,0.419780,False,0.324151,False,False,0.667037,0.005398,0.418825,ISRAEL,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339520,MAGNESIUM,810411.0,org,2020,,,kg,860.0,UZBEKISTAN,792.0,TUR,Türkiye,19990.000,19990.000,kg,47468.00,47468.00,192540.000,495462.00,495462.00,TUR,0.512772,False,0.487228,0.583057,False,0.416943,0.451959,False,0.548041,0.446285,False,0.426331,False,0.304115,False,False,0.651507,0.007819,0.445508,TURKEY,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985
1339521,MAGNESIUM,810411.0,org,2020,,,kg,704.0,VIET NAM,0.0,W00,World,2117703.536,2117703.536,kg,4855659.39,4855659.39,2117703.536,4855659.39,4855659.39,,,,,,,,,,,,,,,,,,,,,,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985
1339522,MAGNESIUM,810411.0,org,2020,,,kg,704.0,VIET NAM,156.0,CHN,China,2040942.218,2040942.218,kg,4679654.20,4679654.20,2117703.536,4855659.39,4855659.39,CHN,0.405026,False,0.594974,0.517543,False,0.482457,0.418528,False,0.581472,0.515749,False,0.430819,False,0.302432,False,False,0.449072,0.005895,0.425030,CHINA,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985
1339523,MAGNESIUM,810411.0,org,2020,,,kg,704.0,VIET NAM,392.0,JPN,Japan,504.167,504.167,kg,1156.00,1156.00,2117703.536,4855659.39,4855659.39,JPN,0.374739,False,0.625261,0.223700,False,0.776300,0.332986,False,0.667014,0.470386,False,0.526285,False,0.436417,False,False,0.148827,0.003016,0.380101,JAPAN,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985


In [72]:
df_production_imports_egsehi_hhi[(df_production_imports_egsehi_hhi["cmdCode"] == 810411) & (~df_production_imports_egsehi_hhi["HHI_exports"].isna())]

Unnamed: 0,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country,Product_x,HHI_production,HHI_production_scaled,Product_y,HHI_exports,HHI_exports_scaled
1104671,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,380.0,ITA,Italy,16.000,16.000,kg,36.00,36.00,1061410.000,2149456.00,2149456.00,ITA,0.632931,False,0.367069,0.330560,False,0.669440,0.548051,False,0.451949,0.344865,False,0.387665,False,0.396503,False,False,0.200470,0.006078,0.427199,ITALY,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104672,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,156.0,CHN,China,473229.000,473229.000,kg,831978.00,831978.00,1061410.000,2149456.00,2149456.00,CHN,0.554322,False,0.445678,0.558184,False,0.441816,0.798681,False,0.201319,0.539247,False,0.355488,False,0.298814,False,False,0.468786,0.014156,0.491837,CHINA,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104673,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,899.0,_X,"Areas, nes",438068.000,438068.000,kg,977793.00,977793.00,1061410.000,2149456.00,2149456.00,,,,,,,,,,,,,,,,,,,,,,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104674,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,40.0,AUT,Austria,10000.000,10000.000,kg,13913.00,13913.00,1061410.000,2149456.00,2149456.00,AUT,0.541398,False,0.458602,0.190698,False,0.809302,0.395252,False,0.604748,0.290711,False,0.345209,False,0.379297,True,False,0.222907,0.001553,0.340311,AUSTRIA,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104675,MAGNESIUM,810411.0,org,2000,,,kg,32.0,ARGENTINA,376.0,ISR,Israel,20254.000,20254.000,kg,48937.00,48937.00,1061410.000,2149456.00,2149456.00,ISR,0.583569,False,0.416431,0.373849,False,0.626151,0.465082,False,0.534918,0.390940,False,0.419780,False,0.324151,False,False,0.667037,0.005398,0.418825,ISRAEL,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339520,MAGNESIUM,810411.0,org,2020,,,kg,860.0,UZBEKISTAN,792.0,TUR,Türkiye,19990.000,19990.000,kg,47468.00,47468.00,192540.000,495462.00,495462.00,TUR,0.512772,False,0.487228,0.583057,False,0.416943,0.451959,False,0.548041,0.446285,False,0.426331,False,0.304115,False,False,0.651507,0.007819,0.445508,TURKEY,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985
1339521,MAGNESIUM,810411.0,org,2020,,,kg,704.0,VIET NAM,0.0,W00,World,2117703.536,2117703.536,kg,4855659.39,4855659.39,2117703.536,4855659.39,4855659.39,,,,,,,,,,,,,,,,,,,,,,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985
1339522,MAGNESIUM,810411.0,org,2020,,,kg,704.0,VIET NAM,156.0,CHN,China,2040942.218,2040942.218,kg,4679654.20,4679654.20,2117703.536,4855659.39,4855659.39,CHN,0.405026,False,0.594974,0.517543,False,0.482457,0.418528,False,0.581472,0.515749,False,0.430819,False,0.302432,False,False,0.449072,0.005895,0.425030,CHINA,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985
1339523,MAGNESIUM,810411.0,org,2020,,,kg,704.0,VIET NAM,392.0,JPN,Japan,504.167,504.167,kg,1156.00,1156.00,2117703.536,4855659.39,4855659.39,JPN,0.374739,False,0.625261,0.223700,False,0.776300,0.332986,False,0.667014,0.470386,False,0.526285,False,0.436417,False,False,0.148827,0.003016,0.380101,JAPAN,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985


In [66]:
df_production_imports_egsehi_hhi[(~df_production_imports_egsehi_hhi["HHI_exports"].isna()) & (df_production_imports_egsehi_hhi["cmdCode"] == 810411)]

Unnamed: 0,Product,cmdCode,Source,Year,Domestic Production Value (P_AC),Value (Ton),qtyUnitAbbr,reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country,Product_x,HHI_production,HHI_production_scaled,Product_y,HHI_exports,HHI_exports_scaled
1104671,,810411.0,org,2000,,,kg,32.0,ARGENTINA,380.0,ITA,Italy,16.000,16.000,kg,36.00,36.00,1061410.000,2149456.00,2149456.00,ITA,0.632931,False,0.367069,0.330560,False,0.669440,0.548051,False,0.451949,0.344865,False,0.387665,False,0.396503,False,False,0.200470,0.006078,0.427199,ITALY,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104672,,810411.0,org,2000,,,kg,32.0,ARGENTINA,156.0,CHN,China,473229.000,473229.000,kg,831978.00,831978.00,1061410.000,2149456.00,2149456.00,CHN,0.554322,False,0.445678,0.558184,False,0.441816,0.798681,False,0.201319,0.539247,False,0.355488,False,0.298814,False,False,0.468786,0.014156,0.491837,CHINA,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104673,,810411.0,org,2000,,,kg,32.0,ARGENTINA,899.0,_X,"Areas, nes",438068.000,438068.000,kg,977793.00,977793.00,1061410.000,2149456.00,2149456.00,,,,,,,,,,,,,,,,,,,,,,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104674,,810411.0,org,2000,,,kg,32.0,ARGENTINA,40.0,AUT,Austria,10000.000,10000.000,kg,13913.00,13913.00,1061410.000,2149456.00,2149456.00,AUT,0.541398,False,0.458602,0.190698,False,0.809302,0.395252,False,0.604748,0.290711,False,0.345209,False,0.379297,True,False,0.222907,0.001553,0.340311,AUSTRIA,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
1104675,,810411.0,org,2000,,,kg,32.0,ARGENTINA,376.0,ISR,Israel,20254.000,20254.000,kg,48937.00,48937.00,1061410.000,2149456.00,2149456.00,ISR,0.583569,False,0.416431,0.373849,False,0.626151,0.465082,False,0.534918,0.390940,False,0.419780,False,0.324151,False,False,0.667037,0.005398,0.418825,ISRAEL,MAGNESIUM,,,MAGNESIUM,0.274901,0.222625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339520,,810411.0,org,2020,,,kg,860.0,UZBEKISTAN,792.0,TUR,Türkiye,19990.000,19990.000,kg,47468.00,47468.00,192540.000,495462.00,495462.00,TUR,0.512772,False,0.487228,0.583057,False,0.416943,0.451959,False,0.548041,0.446285,False,0.426331,False,0.304115,False,False,0.651507,0.007819,0.445508,TURKEY,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985
1339521,,810411.0,org,2020,,,kg,704.0,VIET NAM,0.0,W00,World,2117703.536,2117703.536,kg,4855659.39,4855659.39,2117703.536,4855659.39,4855659.39,,,,,,,,,,,,,,,,,,,,,,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985
1339522,,810411.0,org,2020,,,kg,704.0,VIET NAM,156.0,CHN,China,2040942.218,2040942.218,kg,4679654.20,4679654.20,2117703.536,4855659.39,4855659.39,CHN,0.405026,False,0.594974,0.517543,False,0.482457,0.418528,False,0.581472,0.515749,False,0.430819,False,0.302432,False,False,0.449072,0.005895,0.425030,CHINA,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985
1339523,,810411.0,org,2020,,,kg,704.0,VIET NAM,392.0,JPN,Japan,504.167,504.167,kg,1156.00,1156.00,2117703.536,4855659.39,4855659.39,JPN,0.374739,False,0.625261,0.223700,False,0.776300,0.332986,False,0.667014,0.470386,False,0.526285,False,0.436417,False,False,0.148827,0.003016,0.380101,JAPAN,MAGNESIUM,,,MAGNESIUM,0.750842,0.747985


In [252]:
# # All the rows have infiormation
# df_hhi_production_egsehi[~df_hhi_production_egsehi["HHI_production"].isna()]

In [73]:
# # Join HHI table with (production JOIN Imports JOIN Indicators)
# df_production_imports_egsehi_hhi_1970and2020 = pd.merge(df_hhi_master, df_production_imports_egsehi, how="left", 
#     left_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"], 
#     right_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"])
# df_production_imports_egsehi_hhi_1970and2020


# Join HHI table with (production JOIN Imports JOIN Indicators)
df_production_imports_egsehi_hhi_1970and2020 = pd.merge(df_hhi_master, df_production_imports_egsehi, how="left", 
    left_on=["Year", "cmdCode", "Source", "qtyUnitAbbr"], 
    right_on=["Year", "cmdCode", "Source", "qtyUnitAbbr"])

df_production_imports_egsehi_hhi_1970and2020.loc[df_production_imports_egsehi_hhi_1970and2020["Product"].isna(), "Product"] = df_production_imports_egsehi_hhi_1970and2020[df_production_imports_egsehi_hhi_1970and2020["Product"].isna()]['Product_x']

df_production_imports_egsehi_hhi_1970and2020

Unnamed: 0,Product_x,cmdCode,Year,qtyUnitAbbr,Source,HHI_production,HHI_production_scaled,Product_y,HHI_exports,HHI_exports_scaled,Product,Domestic Production Value (P_AC),Value (Ton),reporterCode,reporterDesc,partnerCode_import,partnerISO_import,partnerDesc_import,qty_import,netWgt_import,qtyUnitAbbr_import,cifvalue_import,primaryValue_import,netWgt_total_import,cifvalue_total_import,primaryValue_total_import,ISO3,value_economic,no_value_economic,value_economic_original,value_governance,no_value_governance,value_governance_original,value_social,no_value_social,value_social_original,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,no_value_governance_isolated,value_governance_isolated,EGSEHI,EGSEHI_6root,Country
0,IRON ORE (FE-CONT.),2601.0,2000,kg,org,0.120238,0.075656,IRON,0.383618,0.342631,IRON ORE (FE-CONT.),5000000.0,5000.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,IRON ORE (FE-CONT.),2601.0,2000,kg,org,0.120238,0.075656,IRON,0.383618,0.342631,IRON ORE (FE-CONT.),720000000.0,720000.0,12.0,ALGERIA,724.0,ESP,Spain,4387.000,4387.000,kg,1481.00,1481.00,1.793200e+04,5.163000e+03,5.163000e+03,ESP,0.571520,False,0.428480,0.262886,False,0.737114,0.609579,False,0.390421,0.287537,False,0.366877,False,0.337134,False,False,0.308989,0.003257,0.385012,SPAIN
2,IRON ORE (FE-CONT.),2601.0,2000,kg,org,0.120238,0.075656,IRON,0.383618,0.342631,IRON ORE (FE-CONT.),720000000.0,720000.0,12.0,ALGERIA,528.0,NLD,Netherlands,13545.000,13545.000,kg,3682.00,3682.00,1.793200e+04,5.163000e+03,5.163000e+03,NLD,0.531779,False,0.468221,0.103672,False,0.896328,0.455831,False,0.544169,0.298018,False,0.412105,False,0.554108,False,False,0.000000,0.001710,0.345812,NETHERLANDS
3,IRON ORE (FE-CONT.),2601.0,2000,kg,org,0.120238,0.075656,IRON,0.383618,0.342631,IRON ORE (FE-CONT.),720000000.0,720000.0,12.0,ALGERIA,0.0,W00,World,17932.000,17932.000,kg,5163.00,5163.00,1.793200e+04,5.163000e+03,5.163000e+03,,,,,,,,,,,,,,,,,,,,,
4,IRON ORE (FE-CONT.),2601.0,2000,kg,org,0.120238,0.075656,IRON,0.383618,0.342631,IRON ORE (FE-CONT.),0.0,0.0,32.0,ARGENTINA,604.0,PER,Peru,294.000,294.000,kg,530.00,530.00,5.323026e+09,1.656972e+08,1.656972e+08,PER,0.605626,False,0.394374,0.571815,False,0.428185,0.827810,False,0.172190,0.439455,False,0.657424,False,0.342680,False,False,0.654439,0.028382,0.552298,PERU
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1283795,MAGNESIUM,810411.0,2020,kg,org,,,MAGNESIUM,0.750842,0.747985,MAGNESIUM,,,860.0,UZBEKISTAN,792.0,TUR,Türkiye,19990.000,19990.000,kg,47468.00,47468.00,1.925400e+05,4.954620e+05,4.954620e+05,TUR,0.512772,False,0.487228,0.583057,False,0.416943,0.451959,False,0.548041,0.446285,False,0.426331,False,0.304115,False,False,0.651507,0.007819,0.445508,TURKEY
1283796,MAGNESIUM,810411.0,2020,kg,org,,,MAGNESIUM,0.750842,0.747985,MAGNESIUM,,,704.0,VIET NAM,0.0,W00,World,2117703.536,2117703.536,kg,4855659.39,4855659.39,2.117704e+06,4.855659e+06,4.855659e+06,,,,,,,,,,,,,,,,,,,,,
1283797,MAGNESIUM,810411.0,2020,kg,org,,,MAGNESIUM,0.750842,0.747985,MAGNESIUM,,,704.0,VIET NAM,156.0,CHN,China,2040942.218,2040942.218,kg,4679654.20,4679654.20,2.117704e+06,4.855659e+06,4.855659e+06,CHN,0.405026,False,0.594974,0.517543,False,0.482457,0.418528,False,0.581472,0.515749,False,0.430819,False,0.302432,False,False,0.449072,0.005895,0.425030,CHINA
1283798,MAGNESIUM,810411.0,2020,kg,org,,,MAGNESIUM,0.750842,0.747985,MAGNESIUM,,,704.0,VIET NAM,392.0,JPN,Japan,504.167,504.167,kg,1156.00,1156.00,2.117704e+06,4.855659e+06,4.855659e+06,JPN,0.374739,False,0.625261,0.223700,False,0.776300,0.332986,False,0.667014,0.470386,False,0.526285,False,0.436417,False,False,0.148827,0.003016,0.380101,JAPAN


## Join HHI index table with production JOIN EGSEHI table (NOT USEFUL, CONFUSION)

In [35]:
# df_hhi_master = pd.read_csv("../data/hhi_data/df_hhi_master.csv")
# del df_hhi_master['Country']
# del df_hhi_master['Value (Ton)']
# # del df_hhi_master['Product']

# df_hhi_master

In [36]:
# # Join HHI table with (indicators JOIN production)
# df_hhi_production_egsehi = pd.merge(df_production_egsehi, df_hhi_master, how="left", 
#     left_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"], 
#     right_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"])
# df_hhi_production_egsehi

In [37]:
# # GET entries whose hhi indexes are amepty
# # df_hhi_production_egsehi[df_hhi_production_egsehi["HHI_production"].isna() & df_hhi_production_egsehi["HHI_exports"].isna()]

# # All the rows have infiormation
# df_hhi_production_egsehi[~df_hhi_production_egsehi["HHI_production"].isna()]

# # # In this case we have a lot of rows with HhI_exports empty because export data contains information from 1970 to 2020 whilst the rest of data is from 2000 to 2020
# # df_hhi_production_egsehi[df_hhi_production_egsehi["HHI_exports"].isna()  & (df_hhi_production_egsehi["Year"] >= 2000)][["Year", "Country", "Product"]].drop_duplicates().iloc[100:120]

In [None]:
# df_hhi_master[(df_hhi_master["Year"]==2002) & (df_hhi_master["Country"] == "URUGUAY")]

In [38]:
# df_hhi_master[(df_hhi_master["Year"]==2009) & (df_hhi_master["Country"] == "CROATIA")]

In [206]:
# # FOR NULL DATA BUT FROM 1970 TO 2020 (df_hhi_master only contain data from 2000 t0 2020)
# df_hhi_production_egsehi_1970and2020 = pd.merge(df_hhi_master, df_production_egsehi, how="left", 
#     left_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"], 
#     right_on=["Year", "reporterCode", "cmdCode", "Source", "qtyUnitAbbr"]
# )

In [39]:
# df_hhi_production_egsehi_1970and2020

In [40]:
# df_hhi_production_egsehi

## Save conformed tables

In [74]:
# df_hhi_production_egsehi.to_csv("../data/hhi_production_exports_egsehi_conformed_table.csv", index=False)
df_production_imports_egsehi_hhi.to_csv("../data/production_imports_egsehi_hhi_exports_conformed_table.csv", index=False)

In [75]:
# df_hhi_production_egsehi_1970and2020.to_csv("../data/hhi_production_exports_egsehi_null_19702020_conformed_table.csv", index=False)
df_production_imports_egsehi_hhi_1970and2020.to_csv("../data/production_imports_egsehi_hhi_exports_null_19702020_conformed_table.csv", index=False)