In [38]:
import pandas as pd
import numpy as np

In [39]:
# Read all the 6 indicators (CSVs)
df_economic = pd.read_csv("../data/resources/readiness/melted_economic.csv")

df_governance = pd.read_csv("../data/resources/readiness/melted_governance.csv")
del df_governance["Name"]
df_social = pd.read_csv("../data/resources/readiness/melted_social.csv")
del df_social["Name"]

df_ecosystems = pd.read_csv("../data/resources/vulnerability/melted_ecosystems.csv")
del df_ecosystems["Name"]
df_habitat = pd.read_csv("../data/resources/vulnerability/melted_habitat.csv")
del df_habitat["Name"]
df_infrastructure = pd.read_csv("../data/resources/vulnerability/melted_infrastructure.csv")
del df_infrastructure["Name"]

In [45]:
df_indicators = pd.merge(df_economic, df_governance, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_social, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_ecosystems, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_habitat, how="outer", on=["ISO3", "Year"])
df_indicators = pd.merge(df_indicators, df_infrastructure, how="outer", on=["ISO3", "Year"])

In [51]:
# To armonize data, let's change "United states" as "USA"
invalid_rows = df_indicators["Name"] == "United States"
df_indicators.loc[invalid_rows, "Name"] = "USA"

Unnamed: 0,ISO3,Name,Year,value_economic,no_value_economic,value_governance,no_value_governance,value_social,no_value_social,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure
0,AFG,Afghanistan,1995,0.496497,False,0.138771,False,0.295982,False,0.516028,False,0.603153,False,0.383710,True
1,ALB,Albania,1995,0.393305,False,0.383518,False,0.225699,False,0.484339,False,0.509650,False,0.474276,False
2,DZA,Algeria,1995,0.416625,False,0.292892,False,0.194779,False,0.411816,False,0.467930,False,0.157379,False
3,AND,Andorra,1995,0.410408,True,0.501894,True,0.156764,False,0.477737,True,0.527740,True,0.383710,True
4,AGO,Angola,1995,0.289057,False,0.201718,False,0.083156,False,0.544357,False,0.648126,False,0.326570,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4987,VEN,"Venezuela, Bolivarian Republic o",2020,0.111244,False,0.127607,False,0.331404,False,0.351461,False,0.604549,False,0.208577,False
4988,VNM,Viet Nam,2020,0.490740,False,0.468500,False,0.316170,False,0.555078,False,0.433713,False,0.562195,False
4989,YEM,Yemen,2020,0.379586,False,0.120174,False,0.243894,False,0.570795,False,0.611543,False,0.340751,False
4990,ZMB,Zambia,2020,0.413829,False,0.399141,False,0.150245,False,0.433906,False,0.594931,False,0.560437,False


In [52]:
# Create geometric mean for these 6 indicators (EGSEHI)
df_indicators["EGSEHI"] = df_indicators["value_economic"] * df_indicators["value_governance"] * df_indicators["value_social"] * \
                            df_indicators["value_ecosystems"] * df_indicators["value_habitat"] * df_indicators["value_infrastructure"]

root = 6
df_indicators[f"EGSEHI_{root}root"] = np.power(df_indicators["EGSEHI"], root)

In [53]:
df_indicators["Name"] = df_indicators["Name"].str.upper()

In [54]:
df_indicators.to_csv("../data/indicators_conformed_file.csv", index=False)

In [55]:
df_indicators

Unnamed: 0,ISO3,Name,Year,value_economic,no_value_economic,value_governance,no_value_governance,value_social,no_value_social,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,EGSEHI,EGSEHI_6root
0,AFG,AFGHANISTAN,1995,0.496497,False,0.138771,False,0.295982,False,0.516028,False,0.603153,False,0.383710,True,0.002435,2.086886e-16
1,ALB,ALBANIA,1995,0.393305,False,0.383518,False,0.225699,False,0.484339,False,0.509650,False,0.474276,False,0.003986,4.008480e-15
2,DZA,ALGERIA,1995,0.416625,False,0.292892,False,0.194779,False,0.411816,False,0.467930,False,0.157379,False,0.000721,1.402677e-19
3,AND,ANDORRA,1995,0.410408,True,0.501894,True,0.156764,False,0.477737,True,0.527740,True,0.383710,True,0.003124,9.292030e-16
4,AGO,ANGOLA,1995,0.289057,False,0.201718,False,0.083156,False,0.544357,False,0.648126,False,0.326570,False,0.000559,3.039842e-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4987,VEN,"VENEZUELA, BOLIVARIAN REPUBLIC O",2020,0.111244,False,0.127607,False,0.331404,False,0.351461,False,0.604549,False,0.208577,False,0.000208,8.213285e-23
4988,VNM,VIET NAM,2020,0.490740,False,0.468500,False,0.316170,False,0.555078,False,0.433713,False,0.562195,False,0.009838,9.068718e-13
4989,YEM,YEMEN,2020,0.379586,False,0.120174,False,0.243894,False,0.570795,False,0.611543,False,0.340751,False,0.001323,5.370178e-18
4990,ZMB,ZAMBIA,2020,0.413829,False,0.399141,False,0.150245,False,0.433906,False,0.594931,False,0.560437,False,0.003590,2.142007e-15


## Production

In [56]:
df_production = pd.read_csv("../data/Production_one_file.csv")
df_production["Year"] = pd.to_numeric(df_production["Year"], downcast="integer")
df_production["Country"] = df_production["Country"].str.upper()
df_production["Product"] = df_production["Product"].str.upper()
df_production.rename(columns={"Value": "Domestic Production Value (P_AC)"}, inplace=True)

In [57]:
df_production

Unnamed: 0,Year,Country,Domestic Production Value (P_AC),Product
0,1970,ALBANIA,,ALUMINIUM
1,1971,ALBANIA,,ALUMINIUM
2,1972,ALBANIA,,ALUMINIUM
3,1973,ALBANIA,,ALUMINIUM
4,1974,ALBANIA,,ALUMINIUM
...,...,...,...,...
69530,2016,RUSSIAN FEDERATION,5.584791e+08,PETROLEUM
69531,2017,RUSSIAN FEDERATION,5.585467e+08,PETROLEUM
69532,2018,RUSSIAN FEDERATION,5.678849e+08,PETROLEUM
69533,2019,RUSSIAN FEDERATION,5.733888e+08,PETROLEUM


### Join Production and EGSEHI

In [58]:
df_production_egsehi = pd.merge(df_production, df_indicators, how="left", left_on=["Year", "Country"], right_on=["Year", "Name"])
del df_production_egsehi["Name"]

In [59]:
df_production_egsehi

Unnamed: 0,Year,Country,Domestic Production Value (P_AC),Product,ISO3,value_economic,no_value_economic,value_governance,no_value_governance,value_social,no_value_social,value_ecosystems,no_value_ecosystems,value_habitat,no_value_habitat,value_infrastructure,no_value_infrastructure,EGSEHI,EGSEHI_6root
0,1970,ALBANIA,,ALUMINIUM,,,,,,,,,,,,,,,
1,1971,ALBANIA,,ALUMINIUM,,,,,,,,,,,,,,,
2,1972,ALBANIA,,ALUMINIUM,,,,,,,,,,,,,,,
3,1973,ALBANIA,,ALUMINIUM,,,,,,,,,,,,,,,
4,1974,ALBANIA,,ALUMINIUM,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69530,2016,RUSSIAN FEDERATION,5.584791e+08,PETROLEUM,RUS,0.624176,False,0.358850,False,0.608937,False,0.414390,False,0.517701,False,0.158387,False,0.004634,9.908522e-15
69531,2017,RUSSIAN FEDERATION,5.585467e+08,PETROLEUM,RUS,0.642995,False,0.365093,False,0.582633,False,0.413218,False,0.513469,False,0.158387,False,0.004596,9.430271e-15
69532,2018,RUSSIAN FEDERATION,5.678849e+08,PETROLEUM,RUS,0.645651,False,0.370905,False,0.607808,False,0.412046,False,0.509131,False,0.158387,False,0.004836,1.279774e-14
69533,2019,RUSSIAN FEDERATION,5.733888e+08,PETROLEUM,RUS,0.670757,False,0.378865,False,0.599214,False,0.413524,False,0.512174,False,0.158387,False,0.005108,1.776735e-14


### Join HHI index table with production JOIN EGSEHI table

In [36]:
# TODO: Jion HHI table

### Save conformed table

In [60]:
df_production_egsehi.to_csv("../data/hhi_production_egsehi_conformed_table.csv", index=False)