In [1]:
import pandas as pd
import numpy as np

consumption = pd.read_csv("../../data/clean/consumption.csv")
income = pd.read_csv("../../data/clean/disposable_income.csv")
inflation = pd.read_csv("../../data/clean/inflation_hvpi.csv")
saving = pd.read_csv("../../data/clean/saving.csv")

In [2]:
# Set same Index for all data files
consumption_final = consumption.set_index(['country', 'year'])
income_final = income.set_index(['country', 'year'])
inflation_final = inflation.set_index(['country', 'year'])
saving_final = saving.set_index(['country', 'year'])

In [3]:
# join data files
database_indicators = consumption_final.join(income_final, how='inner')
database_indicators = database_indicators.join(inflation_final, how='inner')
database_indicators = database_indicators.join(saving_final, how='inner')
database_indicators

Unnamed: 0_level_0,Unnamed: 1_level_0,consumptionMEUR,net_income_MEUR,hvpi_2015,inflation,saving_net,saving_ratio
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
EU,2012,231037,242021,98,2.6,15543,7.497778
EU,2013,232071,243075,99,1.3,15719,7.920000
EU,2014,235210,246891,99,0.4,16375,7.895926
EU,2015,241094,252831,100,0.1,16428,8.295926
EU,2016,246785,259480,100,0.2,17394,8.830000
...,...,...,...,...,...,...,...
SE,2019,211604,224413,106,1.7,36130,17.030000
SE,2020,208823,226954,107,0.7,36841,17.610000
SE,2021,233087,249979,110,2.7,40939,17.530000
SE,2022,244379,258773,119,8.1,36917,15.740000


In [None]:
# Reset Index and name columns
database_indicators = database_indicators.reset_index()
database_indicators.columns = ["country", "year", "consumption_MEUR", "net_income_MEUR", "hvpi_2015", "inflation", "saving_net", "saving_ratio"]
database_indicators

In [None]:
# Create column country names
database_indicators["country_ID"] = database_indicators["country"]
database_indicators["country"] = database_indicators["country"].replace({"EU":"European_Union", "BE":"Belgium", "BG":"Bulgaria", "DK":"Denmark", "DE":"Germany", "EE":"Estonia",
                                                        "FI":"Finland", "FR":"France", "GR":"Greece", "IE":"Ireland", "IT":"Italy",
                                                        "HR":"Croatia", "LV":"Latvia", "LT":"Lithuania", "LU":"Luxembourg", "MT":"Malta", "NL":"Netherlands", 
                                                        "AT":"Austria", "PL":"Poland", "PT":"Portugal", "RO":"Romania", "SE":"Sweden",
                                                        "SK":"Slovakia", "SI":"Slovenia", "ES":"Spain", "CZ":"Czech_Republic", "HU":"Hungary",
                                                        "CY":"Cyprus"}, regex=True)
database_indicators

In [None]:
# Create column year ID
database_indicators["year_ID"] = (database_indicators["year"] - 2000)
database_indicators

In [None]:
# Stack Dataframe
database_indicators = database_indicators.set_index(["country_ID", "country", "year_ID", "year"])
database_indicators = database_indicators.stack()
database_indicators = pd.DataFrame(database_indicators)
database_indicators = database_indicators.reset_index()
database_indicators.columns = ["country_ID", "country", "year_ID", "year", "KPI_name", "value"]
database_indicators

In [None]:
# Create column KPI ID
database_indicators["KPI_ID"] = database_indicators["KPI_name"]
database_indicators["KPI_ID"] = database_indicators["KPI_name"].replace({"consumption_MEUR":"CON", "net_income_MEUR":"INC", "hvpi_2015":"HVPI", "inflation":"INF", "saving_net":"SAVN", "saving_ratio":"SAVR"}, regex=True)	
database_indicators

In [None]:
# Create column indicator ID
database_indicators["indicator_ID"] = database_indicators["country_ID"] + database_indicators["year_ID"].astype(str)
database_indicators["indicator_ID"] = database_indicators["indicator_ID"] + "_" + database_indicators["KPI_ID"]
database_indicators

In [None]:
# Create file for entity "country"
df_country = database_indicators["country_ID"]
df_country = pd.DataFrame(df_country)
df_country["country"] = database_indicators["country"]
df_country = df_country.drop_duplicates()
df_country

In [None]:
#df_country.to_csv("../../data/database/country.csv", index=False, encoding="utf-8", sep=";")

In [None]:
# Create file for entity "year"
df_year = database_indicators["year_ID"]
df_year = pd.DataFrame(df_year)
df_year["year"] = database_indicators["year"]
df_year = df_year.drop_duplicates()
df_year

In [None]:
new_rows = pd.DataFrame({"year_ID":[10,11,24], "year":[2010,2011,2024]})

df_year = pd.concat([df_year, new_rows], ignore_index=True)

df_year

In [None]:
#df_year.to_csv("../../data/database/year.csv", index=False, encoding="utf-8", sep=";")

In [None]:
# Create file for entity "KPIs"
df_kpi = database_indicators["KPI_ID"]
df_kpi = pd.DataFrame(df_kpi)
df_kpi["KPI_name"] = database_indicators["KPI_name"]
df_kpi = df_kpi.drop_duplicates()
df_kpi

In [None]:
#df_kpi.to_csv("../../data/database/kpi.csv", index=False, encoding="utf-8", sep=";")

In [None]:
# Create file for entity "indicators"
selected_columns = ['indicator_ID', 'country_ID', 'year_ID', 'KPI_ID', 'value']

df_indicators = database_indicators[selected_columns]
df_indicators

In [None]:
#df_indicators.to_csv("../../data/database/indicators.csv", index=False, encoding="utf-8", sep=";")