In [53]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import statsmodels.formula.api as smf
import scipy.stats as stats

path = "/Users/eddie/Downloads/theglobaleconomy data/annual/"
files = os.listdir(path)
merged = None

for filename in files:
    _df = pd.read_csv(path+filename)
    if merged is None:
        merged = _df
    else:
        merged = pd.merge(merged, _df, on=["Country", "Year", "Code", "ContinentCode"], how="outer")

df = merged[(merged["Year"] >= 2015) & (merged["Year"] <= 2024)]

In [None]:
def get_missing_data(data, group=None, plot=False, sort=False):
    if not group is None:
        data = data.groupby(group)
    by_column = data.isnull().sum() / len(data)
    if sort:    by_column = by_column.sort_values()
    total = data.isnull().sum().sum() / data.size
    return by_column, total

get_missing_data(df, sort=True)[0]

In [None]:
## rename to make it easier for OLS

rename_dict = {"Happiness Index 0 (unhappy) - 10 (happy)": "happiness",
               "Gross Domestic Product billions of U.S. dollars": "GDP",
                "GDP per capita current U.S. dollars": "GDP_per_capita",
                "GDP per capita Purchasing Power Parity": "GDPPPP",
                "Trade balance as percent of GDP": "Trade_balance_percent",
                "Government spending as percent of GDP": "Govt_spending_percent"
}
df = df.rename(rename_dict, axis=1)
indicators = ["GDP", "GDP_per_capita", "GDPPPP", "Trade_balance_percent", "Govt_spending_percent"]

In [71]:
print("INDIVIDUAL LINEAR REGRESSIONS:")
significance_level = 0.01

for indicator in indicators:
    model = smf.ols(f'happiness ~ {indicator}', data=df).fit()
    if model.pvalues[indicator] < significance_level:
        print(model.rsquared.round(3),"\t", indicator, f"(p<{significance_level})")
    else:
        print("Not statistically significant")

INDIVIDUAL LINEAR REGRESSIONS:
0.033 	 GDP (p<0.01)
0.505 	 GDP_per_capita (p<0.01)
0.538 	 GDPPPP (p<0.01)
0.196 	 Trade_balance_percent (p<0.01)
0.112 	 Govt_spending_percent (p<0.01)


In [72]:
print("MULTIPLE REGRESSION MODEL - ALL INDICATORS:")
model = smf.ols(f'happiness ~ {" + ".join(indicators)}', data=df).fit()
model.summary()

MULTIPLE REGRESSION MODEL - ALL INDICATORS:


0,1,2,3
Dep. Variable:,happiness,R-squared:,0.545
Model:,OLS,Adj. R-squared:,0.543
Method:,Least Squares,F-statistic:,304.6
Date:,"Mon, 15 Dec 2025",Prob (F-statistic):,1.5e-214
Time:,11:04:31,Log-Likelihood:,-1455.1
No. Observations:,1279,AIC:,2922.0
Df Residuals:,1273,BIC:,2953.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.3628,0.067,65.079,0.000,4.231,4.494
GDP,1.235e-05,9.24e-06,1.336,0.182,-5.79e-06,3.05e-05
GDP_per_capita,9.246e-06,3.04e-06,3.041,0.002,3.28e-06,1.52e-05
GDPPPP,2.186e-05,2.81e-06,7.785,0.000,1.64e-05,2.74e-05
Trade_balance_percent,0.0017,0.002,0.791,0.429,-0.002,0.006
Govt_spending_percent,0.0247,0.004,6.329,0.000,0.017,0.032

0,1,2,3
Omnibus:,69.03,Durbin-Watson:,0.373
Prob(Omnibus):,0.0,Jarque-Bera (JB):,84.74
Skew:,-0.531,Prob(JB):,3.97e-19
Kurtosis:,3.68,Cond. No.,144000.0


In [76]:
print("MULTIPLE REGRESSION MODEL - SELECTED INDICATORS:*\n*Selected by p-value")
model = smf.ols(f'happiness ~ GDP_per_capita + GDPPPP + Govt_spending_percent', data=df).fit()
model.summary()

MULTIPLE REGRESSION MODEL - SELECTED INDICATORS:*
*Selected by p-value


0,1,2,3
Dep. Variable:,happiness,R-squared:,0.544
Model:,OLS,Adj. R-squared:,0.543
Method:,Least Squares,F-statistic:,506.8
Date:,"Mon, 15 Dec 2025",Prob (F-statistic):,9.49e-217
Time:,11:06:50,Log-Likelihood:,-1456.3
No. Observations:,1279,AIC:,2921.0
Df Residuals:,1275,BIC:,2941.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,4.3589,0.065,67.461,0.000,4.232,4.486
GDP_per_capita,9.722e-06,2.94e-06,3.311,0.001,3.96e-06,1.55e-05
GDPPPP,2.23e-05,2.56e-06,8.713,0.000,1.73e-05,2.73e-05
Govt_spending_percent,0.0238,0.004,6.241,0.000,0.016,0.031

0,1,2,3
Omnibus:,76.938,Durbin-Watson:,0.373
Prob(Omnibus):,0.0,Jarque-Bera (JB):,97.349
Skew:,-0.56,Prob(JB):,7.260000000000001e-22
Kurtosis:,3.758,Cond. No.,138000.0


In [None]:
## ADD INTERACTION EFFECTS HERE

## [DEFINITION: An interaction effect occurs when the effect of one variable depends on the value of another variable]