In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
gdp = pd.read_excel("gdp.xls")
gdp_per_capita = pd.read_excel("gdp_per_capita.xls")
df_indicator = pd.read_csv("better_life_index.csv")

#
Clean GDP

In [None]:
# Verify null values
gdp[gdp.isnull().any(axis=1)]

In [4]:
# Remove null values
gdp = gdp[~gdp.isnull().any(axis=1)]

# Rename first column
gdp = gdp.rename(
  columns={"GDP, current prices (Billions of U.S. dollars)": "Country"})

# Remove not coutries
gdp = gdp.iloc[:196, :]

# Replace 'no data' to NaN
gdp = gdp.map(lambda x: np.nan if x == "no data" else x)

# Pivot Table
gdp = gdp.melt(
  id_vars=["Country"], var_name="Year", value_name="GDP ($USD Billions)")

#
Clean GDP Per Capita

In [None]:
# Verify null values
gdp_per_capita[gdp_per_capita.isnull().any(axis=1)]

In [6]:
# Remove null values
gdp_per_capita = gdp_per_capita[~gdp_per_capita.isnull().any(axis=1)]

# Rename first column
gdp_per_capita = gdp_per_capita.rename(
  columns={
    "GDP per capita, current prices\n (U.S. dollars per capita)": "Country"
  }
)

# Remove not coutries
gdp_per_capita = gdp_per_capita.iloc[:196, :]

# Replace 'no data' to NaN
gdp_per_capita = gdp_per_capita.map(lambda x: np.nan if x == "no data" else x)

# Pivot Table
gdp_per_capita = gdp_per_capita.melt(
  id_vars=["Country"], var_name="Year", value_name="GDP Per Capita ($USD)")

#
Clean Better Life Index

In [7]:
# Verify columns that have only one value
col_to_remove = []
for col in df_indicator.columns:
  if len(df_indicator[col].unique()) == 1:
    col_to_remove.append(col)

# Columns to Remove
col_to_remove.append("LOCATION")
col_to_remove.append("INDICATOR")
col_to_remove.append("UNIT_MEASURE")

# Remove not used columns
df_indicator = df_indicator.drop(
  columns=col_to_remove
)

In [8]:
# Add a column with 'INEQUALITY' cleaned as 'Inequality'
for i in range(len(df_indicator)):
  match df_indicator.loc[i, "INEQUALITY"]:
    case "TOT":
      df_indicator.loc[i, "Inequality"] = "Total"
    case "WMN":
      df_indicator.loc[i, "Inequality"] = "Women"
    case "MN":
      df_indicator.loc[i, "Inequality"] = "Men"
    case "LW":
      df_indicator.loc[i, "Inequality"] = "Low"
    case "HGH":
      df_indicator.loc[i, "Inequality"] = "High"

In [9]:
# Remove column "INEQUALITY"
df_indicator = df_indicator.drop(columns="INEQUALITY")

#
Merge dataframes to get only countries that are present in both dataframes

In [10]:
# Step 1: Merge gdp with gdp_per_capita
# Step 2: Inner join with Better Life Index countries. The goal is to have only 
# countries present in both dataframes

df_gdp_cleaned = gdp.merge(
  gdp_per_capita,
  how="inner",
  on=["Country", "Year"]
).merge(
  df_indicator["Country"],
  how="inner",
  on="Country"
).drop_duplicates()

In [11]:
# Countries that are not present in both dataframes
country_out = df_indicator.merge(
  df_gdp_cleaned["Country"], how="outer", on="Country"
  )["Country"].unique()

In [12]:
# Inner join Better Life Index countries with GDP countries. The goal is to have
# only countries present in both dataframes
df_indicator_cleaned = df_indicator.merge(
  df_gdp_cleaned["Country"],
  how="inner",
  on="Country"
).drop_duplicates()

# Keep only Inequality Total
df_indicator_cleaned = df_indicator_cleaned[
  df_indicator_cleaned["Inequality"] == "Total"].drop(columns="Inequality")

#
Create unique Dataframe for 2024

In [13]:
# Only values from 2024
gdp2024 = (
  df_gdp_cleaned[df_gdp_cleaned["Year"] == 2024].
  drop(columns="Year")
)

In [None]:
# Create a dataframe with variable name and respective indicator
indicator = pd.DataFrame(
  df_indicator_cleaned["Indicator"].unique(),
  columns=["Indicator"]
)

In [23]:
# Merge indicator dataframe with Unit of Measures
indicator = indicator.merge(
  df_indicator_cleaned[["Indicator","Unit of Measures"]].drop_duplicates(),
  how="inner",
  on="Indicator"
)

In [53]:
df_all = df_indicator_cleaned.drop(columns="Unit of Measures").pivot_table(
  values="OBS_VALUE",
  index="Country",
  columns="Indicator"
).reset_index(drop=False)