### **Regression Analysis and Hypothesis Testing of Life Expectancy between Economic Indicators**

In [115]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
import statsmodels.api as sma
import statsmodels.stats.weightstats as ssw

### Import data

In [None]:
df = pd.read_excel(r'EconomicIndicators2020.xlsx')
df.head(5)

### Data exploration

#### Descriptive Statistics

In [None]:
df.describe()

#### Scatter Plot of each Variables with LifeExpect

In [None]:
location = 1
df_plot = df.drop(['No', 'LifeExpect', 'Country', 'Continent'], axis = 1)
plt.figure(figsize = (12, 5))
for i, color in zip(df_plot.columns, ['#191970', '#00008B', '#0000CD', 'teal', 'darkcyan', 'c']):
    plt.subplot(2, 3, location)
    plt.grid()
    location += 1
    plt.scatter(df[i], df['LifeExpect'], c = color, s = 10)
    plt.xlabel(i)
    plt.tight_layout()
    plt.show

### Data analysis

#### t-test

In [61]:
confidence = 0.99
value = 70
LifeExpect = np.array(df['LifeExpect'])

t_score, p_value = scipy.stats.ttest_1samp(LifeExpect, value)
print(t_score, p_value)

scipy.stats.t.ppf(1 - 0.01 / 2, 176)

4.825784830727795 3.0106331354163777e-06


2.60405168637032

#### z-test

In [None]:
confidence = 0.99
value = 70
LifeExpect = np.array(df['LifeExpect'])

ssw.ztest(LifeExpect, value = value, alternative = 'two-sided')

#### Scatter Plot

In [None]:
GDPPC = np.array(df['GDPPC']).reshape(-1, 1)

fig = plt.figure()
ax = plt.axes()
grid = plt.grid()

plt.scatter(GDPPC, LifeExpect, c = 'navy', s = 15)

#### Regression analysis

In [78]:
model = LinearRegression()
model.fit(GDPPC, LifeExpect)

r_square_3 = model.score(GDPPC, LifeExpect)
coefficent_3 = model.coef_
intercept_3 = model.intercept_

print(r_square_3)
print(coefficent_3)
print(intercept_3)

0.38432923064188507
[0.00018879]
69.53824853766625


In [92]:
GDPPC_c = sma.add_constant(GDPPC)
model_3 = sma.OLS(LifeExpect, GDPPC_c)
result_3 = model_3.fit()
result_3.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.384
Model:,OLS,Adj. R-squared:,0.381
Method:,Least Squares,F-statistic:,109.2
Date:,"Sat, 25 Nov 2023",Prob (F-statistic):,3.56e-20
Time:,15:21:45,Log-Likelihood:,-561.07
No. Observations:,177,AIC:,1126.0
Df Residuals:,175,BIC:,1133.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,69.5382,0.529,131.555,0.000,68.495,70.581
x1,0.0002,1.81e-05,10.452,0.000,0.000,0.000

0,1,2,3
Omnibus:,21.907,Durbin-Watson:,1.977
Prob(Omnibus):,0.0,Jarque-Bera (JB):,26.132
Skew:,-0.921,Prob(JB):,2.12e-06
Kurtosis:,3.389,Cond. No.,35500.0


In [111]:
data = df.drop(['No', 'Country', 'Continent'], axis = 1)
imp = SimpleImputer(strategy = 'mean')
data_4 = pd.DataFrame(imp.fit_transform(data), columns = data.columns)

LifeExpect_4 = np.array(df['LifeExpect'])

model.fit(data_4, LifeExpect_4)

LinearRegression()

In [108]:
r_square_4 = model.score(data_4, LifeExpect_4)
coefficent_4 = model.coef_
intercept_4 = model.intercept_

print(r_square_4)
print(coefficent_4)
print(intercept_4)

0.46351181618109816
[ 1.06915547e-04 -5.51938968e-06  6.39391392e-05  1.90481979e-06
  1.43038046e-03 -4.62839489e-02]
69.8219848535168


In [97]:
data_4_c = sma.add_constant(data_4)
model_results_4 = sma.OLS(LifeExpect_4, data_4_c).fit()
model_results_4.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.449
Model:,OLS,Adj. R-squared:,0.43
Method:,Least Squares,F-statistic:,23.13
Date:,"Sat, 25 Nov 2023",Prob (F-statistic):,7.23e-20
Time:,15:24:45,Log-Likelihood:,-551.18
No. Observations:,177,AIC:,1116.0
Df Residuals:,170,BIC:,1139.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,69.1624,0.540,128.130,0.000,68.097,70.228
GDPPC,0.0001,2.57e-05,4.215,0.000,5.76e-05,0.000
MfgMn$,-7.703e-06,5.4e-06,-1.428,0.155,-1.84e-05,2.95e-06
AgriMn$,3.245e-05,2.79e-05,1.164,0.246,-2.26e-05,8.75e-05
CO2kt,1.893e-06,3.05e-06,0.620,0.536,-4.13e-06,7.92e-06
HealthPC$,0.0015,0.000,4.118,0.000,0.001,0.002
Pop_mn,-0.0144,0.009,-1.557,0.121,-0.033,0.004

0,1,2,3
Omnibus:,15.367,Durbin-Watson:,2.076
Prob(Omnibus):,0.0,Jarque-Bera (JB):,17.174
Skew:,-0.76,Prob(JB):,0.000187
Kurtosis:,3.136,Cond. No.,1280000.0
