In [116]:
# This notebook converts different dataframes from a Google Sheet used for analysis (Sources: Economic Data from IMF, Well-Being Indicators from OECD/ World Bank)

# Import all libaries 

import pandas as pd 
import plotly.express as px
import streamlit as st



In [117]:
# Import Data from Google Sheets, using a loop 

gids =["1952168269","1020209628","887547605", "1168409358", "1058701335", "917968550", "701342270", "1186064842"]
spreadsheet_id = "1BV0koOEqQs580tEPGv9bpZYUfY8q8UTfZGTcEoK_VtQ"
temp = {"IMF_data_df":"",
        "urban_population_df":"",
        "income_group_df":"",
        "life_expectancy_df":"",
        "gini_index_df":"",
        "poverty_rate_df":"",
        "birth_rate_df":"",
        "health_expenditure_df":""
        }

for gid, t in zip(gids, temp):
    
    temp[t]=pd.read_csv(f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}/export?format=csv&gid={gid}")


  temp[t]=pd.read_csv(f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}/export?format=csv&gid={gid}")


In [118]:
# I. IMF Dataframe Conversion

# Converting IMF dataframe to display years as rows and select specific indicators and countries 

# Selecting columns to keep and to melt 
columns_to_keep = ["COUNTRY", "INDICATOR", "UNIT"]
columns_to_melt = ["1980","1981","1982","1983","1984","1985","1986","1987", "1988", "1989", "1990","1991","1992", "1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022","2023","2024","2025","2026","2027","2028","2029","2030" ]

# Melt years from columns to rows 
melted_IMF_df = pd.melt(temp["IMF_data_df"], id_vars=columns_to_keep, value_vars = columns_to_melt, var_name="YEAR", value_name="VALUE")
melted_IMF_df

# Select countries

df_countries = melted_IMF_df[melted_IMF_df['COUNTRY'].isin(['Germany', 'Denmark', 'Poland, Republic of','United States','Chile','Costa Rica','Japan','China','Indonesia','South Africa', 'Ghana',"Cote d'Ivoire"])] 

df_countries

# Select specific indicators: GDP per capita, Unemployment, Consumer Price Index (Inflation), Savings

df_countries_analysis = df_countries[df_countries['INDICATOR'].isin(['Gross domestic product (GDP), Current prices, Per capita, US dollar','Gross national savings, Percent of GDP','All Items, Consumer price index (CPI), End-of-period (EoP)','Unemployment rate'])]

df_countries_analysis


Unnamed: 0,COUNTRY,INDICATOR,UNIT,YEAR,VALUE
152,United States,"Gross national savings, Percent of GDP",Percent,1980,22.059
156,United States,Unemployment rate,,1980,7.175
213,United States,"Gross domestic product (GDP), Current prices, ...",US dollar,1980,12552.943
237,United States,"All Items, Consumer price index (CPI), End-of-...",Index,1980,86.750
273,Germany,Unemployment rate,,1980,3.359
...,...,...,...,...,...
415411,Ghana,"Gross national savings, Percent of GDP",Percent,2030,17.420
418098,"Poland, Republic of","Gross national savings, Percent of GDP",Percent,2030,16.977
418244,"Poland, Republic of","Gross domestic product (GDP), Current prices, ...",US dollar,2030,38590.703
418262,"Poland, Republic of",Unemployment rate,,2030,3.176


In [None]:
# Rename economic indicators in IMF Data Frameframe

indicator_map = {
'Gross domestic product (GDP), Current prices, Per capita, US dollar': 'GDP per capita',
'Gross national savings, Percent of GDP': 'National savings (% GDP)',
'All Items, Consumer price index (CPI), End-of-period (EoP)': 'Inflation (CPI, %))',
'Unemployment rate': 'Unemployment levels (%)'}

df_countries_analysis['INDICATOR'] = df_countries_analysis['INDICATOR'].map(indicator_map)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_countries_analysis['INDICATOR'] = df_countries_analysis['INDICATOR'].map(indicator_map)


In [120]:
# Rename columns in IMF DF to prepare for joining with other dfs 

df_countries_analysis = df_countries_analysis.rename(columns={
    'COUNTRY': 'Country Name',          # Renaming COUNTRY to Country for consistency
    'INDICATOR': 'Indicator Name',      # Renaming INDICATOR to Indicator
    'YEAR': 'Year',                # Renaming YEAR to Year
    'VALUE': 'Value'})
#df_countries_analysis = df_countries_analysis.drop(columns=["UNIT", "Unit"])
df_countries_analysis

Unnamed: 0,Country Name,Indicator Name,UNIT,Year,Value
152,United States,National savings (% GDP),Percent,1980,22.059
156,United States,Unemployment levels (%),,1980,7.175
213,United States,GDP per capita (adjusted for purchasing power ...,US dollar,1980,12552.943
237,United States,"Inflation (CPI, %))",Index,1980,86.750
273,Germany,Unemployment levels (%),,1980,3.359
...,...,...,...,...,...
415411,Ghana,National savings (% GDP),Percent,2030,17.420
418098,"Poland, Republic of",National savings (% GDP),Percent,2030,16.977
418244,"Poland, Republic of",GDP per capita (adjusted for purchasing power ...,US dollar,2030,38590.703
418262,"Poland, Republic of",Unemployment levels (%),,2030,3.176


In [121]:
# II. Urban population (UB) dataframe conversion 

# SELECTING COLUMNS TO KEEP AND TO MELT
columns_to_keep = ["Country Name", "Indicator Name",]
columns_to_melt = ["1980","1981","1982","1983","1984","1985","1986","1987", "1988", "1989", "1990","1991","1992", "1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022","2023","2024" ]
# melting
melted_ub_df = pd.melt(temp["urban_population_df"], id_vars=columns_to_keep, value_vars = columns_to_melt, var_name="Year", value_name="Value")
melted_ub_df
# selecting indicator urban population and countries

df_countries_ub = melted_ub_df[melted_ub_df['Country Name'].isin(['Germany', 'Denmark', 'Poland','United States','Chile','Costa Rica','Japan','China','Indonesia','South Africa', 'Ghana',"Cote d'Ivoire"])] 

df_countries_ub.sort_values("Year")



Unnamed: 0,Country Name,Indicator Name,Year,Value
39,Chile,Urban population (% of total population),1980,81.243
263,South Africa,Urban population (% of total population),1980,48.425
251,United States,Urban population (% of total population),1980,73.738
190,Poland,Urban population (% of total population),1980,58.086
106,Indonesia,Urban population (% of total population),1980,22.104
...,...,...,...,...
11744,China,Urban population (% of total population),2024,65.544
11743,Chile,Urban population (% of total population),2024,88.116
11955,United States,Urban population (% of total population),2024,83.515
11759,Germany,Urban population (% of total population),2024,77.895


In [122]:
# III. Life Expectancy dataframe conversion 

# SELECTING COLUMNS TO KEEP AND TO MELT
columns_to_keep_ = ["Country Name", "Indicator Name",]
columns_to_melt_ = ["1980","1981","1982","1983","1984","1985","1986","1987", "1988", "1989", "1990","1991","1992", "1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022","2023","2024" ]
# melting
melted_life_exp = pd.melt(temp["life_expectancy_df"], id_vars=columns_to_keep_, value_vars = columns_to_melt_, var_name="Year", value_name="Value")
melted_life_exp
# selecting indicator urban population and countries

df_countries_life_exp = melted_life_exp[melted_life_exp['Country Name'].isin(['Germany', 'Denmark', 'Poland','United States','Chile','Costa Rica','Japan','China','Indonesia','South Africa', 'Ghana',"Cote d'Ivoire"])] 

df_countries_life_exp.sort_values("Year")

Unnamed: 0,Country Name,Indicator Name,Year,Value
39,Chile,"Life expectancy at birth, total (years)",1980,69.475000
263,South Africa,"Life expectancy at birth, total (years)",1980,59.961000
251,United States,"Life expectancy at birth, total (years)",1980,73.609756
190,Poland,"Life expectancy at birth, total (years)",1980,70.097561
106,Indonesia,"Life expectancy at birth, total (years)",1980,58.701000
...,...,...,...,...
11744,China,"Life expectancy at birth, total (years)",2024,
11743,Chile,"Life expectancy at birth, total (years)",2024,
11955,United States,"Life expectancy at birth, total (years)",2024,
11759,Germany,"Life expectancy at birth, total (years)",2024,


In [123]:
# IV. GINI dataframe conversion 

# SELECTING COLUMNS TO KEEP AND TO MELT
columns_to_keep_ = ["Country Name", "Indicator Name",]
columns_to_melt_ = ["1980","1981","1982","1983","1984","1985","1986","1987", "1988", "1989", "1990","1991","1992", "1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022","2023","2024" ]
# melting
melted_life_exp = pd.melt(temp["gini_index_df"], id_vars=columns_to_keep_, value_vars = columns_to_melt_, var_name="Year", value_name="Value")
melted_life_exp
# selecting indicator urban population and countries

df_gini_index = melted_life_exp[melted_life_exp['Country Name'].isin(['Germany', 'Denmark', 'Poland','United States','Chile','Costa Rica','Japan','China','Indonesia','South Africa', 'Ghana',"Cote d'Ivoire"])] 

df_gini_index.sort_values("Year")


Unnamed: 0,Country Name,Indicator Name,Year,Value
39,Chile,Gini index,1980,
263,South Africa,Gini index,1980,
251,United States,Gini index,1980,34.7
190,Poland,Gini index,1980,
106,Indonesia,Gini index,1980,
...,...,...,...,...
11744,China,Gini index,2024,
11743,Chile,Gini index,2024,
11955,United States,Gini index,2024,
11759,Germany,Gini index,2024,


In [124]:
# V. Health Expenditure Dataframe conversion

# SELECTING COLUMNS TO KEEP AND TO MELT
columns_to_keep_ = ["Country Name", "Indicator Name",]
columns_to_melt_ = ["1980","1981","1982","1983","1984","1985","1986","1987", "1988", "1989", "1990","1991","1992", "1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022","2023","2024" ]
# melting
melted_health_exp = pd.melt(temp["health_expenditure_df"], id_vars=columns_to_keep_, value_vars = columns_to_melt_, var_name="Year", value_name="Value")
melted_health_exp
# selecting indicator urban population and countries

df_countries_health_exp = melted_health_exp[melted_health_exp['Country Name'].isin(['Germany', 'Denmark', 'Poland','United States','Chile','Costa Rica','Japan','China','Indonesia','South Africa', 'Ghana',"Cote d'Ivoire"])] 

df_countries_health_exp.sort_values("Year")

Unnamed: 0,Country Name,Indicator Name,Year,Value
39,Chile,Current health expenditure (% of GDP),1980,
263,South Africa,Current health expenditure (% of GDP),1980,
251,United States,Current health expenditure (% of GDP),1980,
190,Poland,Current health expenditure (% of GDP),1980,
106,Indonesia,Current health expenditure (% of GDP),1980,
...,...,...,...,...
11744,China,Current health expenditure (% of GDP),2024,
11743,Chile,Current health expenditure (% of GDP),2024,
11955,United States,Current health expenditure (% of GDP),2024,
11759,Germany,Current health expenditure (% of GDP),2024,


In [125]:
# VI. Poverty Rate dataframe Conversion 

# SELECTING COLUMNS TO KEEP AND TO MELT
columns_to_keep_ = ["Country Name", "Indicator Name",]
columns_to_melt_ = ["1980","1981","1982","1983","1984","1985","1986","1987", "1988", "1989", "1990","1991","1992", "1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022","2023","2024" ]
# melting
melted_poverty_rate = pd.melt(temp["poverty_rate_df"], id_vars=columns_to_keep_, value_vars = columns_to_melt_, var_name="Year", value_name="Value")
melted_poverty_rate
# selecting indicator urban population and countries

df_countries_povery_rate = melted_poverty_rate[melted_poverty_rate['Country Name'].isin(['Germany', 'Denmark', 'Poland','United States','Chile','Costa Rica','Japan','China','Indonesia','South Africa', 'Ghana',"Cote d'Ivoire"])]  

df_countries_povery_rate.sort_values("Year")

Unnamed: 0,Country Name,Indicator Name,Year,Value
39,Chile,Poverty headcount ratio at national poverty li...,1980,
263,South Africa,Poverty headcount ratio at national poverty li...,1980,
251,United States,Poverty headcount ratio at national poverty li...,1980,
190,Poland,Poverty headcount ratio at national poverty li...,1980,
106,Indonesia,Poverty headcount ratio at national poverty li...,1980,
...,...,...,...,...
11744,China,Poverty headcount ratio at national poverty li...,2024,
11743,Chile,Poverty headcount ratio at national poverty li...,2024,
11955,United States,Poverty headcount ratio at national poverty li...,2024,
11759,Germany,Poverty headcount ratio at national poverty li...,2024,


In [126]:
# VII.  Birthrate dataframe Conversion 

# SELECTING COLUMNS TO KEEP AND TO MELT
columns_to_keep_ = ["Country Name", "Indicator Name",]
columns_to_melt_ = ["1980","1981","1982","1983","1984","1985","1986","1987", "1988", "1989", "1990","1991","1992", "1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022","2023","2024" ]
# melting
melted_birth_rate = pd.melt(temp["birth_rate_df"], id_vars=columns_to_keep_, value_vars = columns_to_melt_, var_name="Year", value_name="Value")
melted_birth_rate
# selecting indicator urban population and countries

df_countries_birth_rate = melted_birth_rate[melted_birth_rate['Country Name'].isin(['Germany', 'Denmark', 'Poland','United States','Chile','Costa Rica','Japan','China','Indonesia','South Africa', 'Ghana',"Cote d'Ivoire"])] 

df_countries_birth_rate.sort_values("Year")

Unnamed: 0,Country Name,Indicator Name,Year,Value
39,Chile,"Birth rate, crude (per 1,000 people)",1980,23.234
263,South Africa,"Birth rate, crude (per 1,000 people)",1980,34.663
251,United States,"Birth rate, crude (per 1,000 people)",1980,15.900
190,Poland,"Birth rate, crude (per 1,000 people)",1980,19.600
106,Indonesia,"Birth rate, crude (per 1,000 people)",1980,33.686
...,...,...,...,...
11744,China,"Birth rate, crude (per 1,000 people)",2024,
11743,Chile,"Birth rate, crude (per 1,000 people)",2024,
11955,United States,"Birth rate, crude (per 1,000 people)",2024,
11759,Germany,"Birth rate, crude (per 1,000 people)",2024,


In [None]:
# VIII.  Concatenate dfs to final table 

df_countries_full = pd.concat([
    df_countries_analysis,  # Data from the first DataFrame (e.g., GINI data)
    df_countries_ub,        # Data from the second DataFrame (e.g., Urban Pop data)
    df_gini_index,
    df_countries_life_exp,
    df_countries_health_exp,
    df_countries_povery_rate,
    df_countries_birth_rate,
], ignore_index=True)    

df_countries_full.to_csv(
    "Full_country_list.csv", # 1. The name of your output file
    index=False               # 2. Crucial: Prevents writing the row index to the Excel file
)