# Supervised Machine Learning Model/Linear Regression for BRICS_US Macro Data

In [196]:

#https://matplotlib.org/gallery/color/named_colors.html
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
import math 

In [197]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

from sklearn.preprocessing import PolynomialFeatures

%matplotlib inline
from sklearn.svm import SVR

In [198]:
import statsmodels.api as sm
from statsmodels.stats import diagnostic as diag
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [199]:
from pandas_datareader import wb

In [200]:
import geopandas

# Part I: Regression for India Data

In [201]:
worldbank_indicators = ["NY.GDP.MKTP.KD.ZG", "NE.GDI.TOTL.KD.ZG","SL.EMP.TOTL.SP.ZS","TX.QTY.MRCH.XD.WD","SP.POP.GROW","EG.USE.PCAP.KG.OE", "BX.KLT.DINV.WD.GD.ZS"]
 
 
world_bank = wb.download (indicator =worldbank_indicators, country = ["IND"], start = 1995, end =2020)

world_bank = world_bank.reset_index(1)                        

In [202]:
world_bank_df = world_bank.rename(columns = {"NY.GDP.MKTP.KD.ZG":"GDP",\
                                               "NE.GDI.TOTL.KD.ZG":"Gross_Capital_Formation_Growth",\
                                               "SL.EMP.TOTL.SP.ZS":"Employment_Population_Ratio",\
                                            "TX.QTY.MRCH.XD.WD":"Exports",\
                                            "SP.POP.GROW":"Population_Growth",\
                                            "EG.USE.PCAP.KG.OE":"Energy_Use",\
                                            "BX.KLT.DINV.WD.GD.ZS":"Forign_Direct_Investment%GDP"})

In [203]:
world_bank_final = world_bank_df.loc[::-1]

In [205]:
world_bank_final.head()

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Forign_Direct_Investment%GDP
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
India,1995,7.574492,7.780467,54.91,66.922085,1.918941,385.091857,0.594986
India,1996,7.549522,0.909082,54.77,75.840974,1.89522,389.426516,0.617479
India,1997,4.049821,14.880201,54.63,70.603934,1.869172,397.378186,0.860209
India,1998,6.184416,3.779184,54.49,73.737727,1.839659,399.498829,0.625286
India,1999,8.845756,17.634883,54.34,83.327389,1.80556,414.962493,0.472645


In [206]:
world_bank_final1 = world_bank_final.fillna({
    'Exports' : 500,
    'Energy_Use': 700,
    'Forign_Direct_Investment%GDP':1.6})

In [207]:
world_bank_final1.to_excel('India.xlsx', index = False)

In [208]:
world_bank_final1.describe()

Unnamed: 0,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Forign_Direct_Investment%GDP
count,26.0,26.0,26.0,26.0,26.0,26.0,26.0
mean,6.015853,7.858675,51.551923,265.855571,1.453925,528.203348,1.436854
std,3.201107,8.94863,3.465178,156.988108,0.310959,119.585799,0.746519
min,-7.251755,-10.107401,43.0,66.922085,0.989414,385.091857,0.472645
25%,4.928303,3.700722,48.63,113.007412,1.154981,418.283314,0.789253
50%,7.102806,7.627011,53.1,254.211635,1.487056,493.329632,1.511796
75%,7.907675,14.679947,54.475,419.327275,1.718967,628.877385,1.893804
max,8.845756,29.772987,54.91,500.0,1.918941,700.0,3.620522


In [209]:
X = world_bank_final1.drop("GDP", axis = 1)
Y = world_bank_final1[['GDP']]


In [210]:
Y.shape


(26, 1)

In [211]:
# Split X and y into X_
#X_train, X_test, y_train, y_test = train_test_split(X, Y)


In [212]:
X.shape

(26, 7)

In [213]:
X_train = X[:20].values
X_test = X[20:].values
y_train = Y[:20].values
y_test = Y[20:].values

In [214]:
X_test

array([['2015', 4.72952352097509, 48.08, 422.4718463, 1.11689591278599,
        700.0, 2.09211575768566],
       ['2016', 3.67456808127109, 47.64, 434.3545807, 1.09045932094686,
        700.0, 1.93736319812939],
       ['2017', 10.8362897885911, 47.21, 461.2043143, 1.06335942989132,
        700.0, 1.50731658089818],
       ['2018', 9.74553265704785, 46.79, 477.8606632, 1.03782784787162,
        700.0, 1.55926352274079],
       ['2019', 2.28274228197319, 46.74, 491.4029668, 1.01326124930492,
        700.0, 1.76312750821227],
       ['2020', -10.1074012471816, 43.0, 500.0, 0.989413800188014, 700.0,
        1.6]], dtype=object)

In [215]:
y_test

array([[ 7.99625379],
       [ 8.2563055 ],
       [ 6.79538342],
       [ 6.53298901],
       [ 4.04155419],
       [-7.25175478]])

In [216]:
# create a Linear Regression model object
regression_model_India = LinearRegression()


In [217]:
X_test

array([['2015', 4.72952352097509, 48.08, 422.4718463, 1.11689591278599,
        700.0, 2.09211575768566],
       ['2016', 3.67456808127109, 47.64, 434.3545807, 1.09045932094686,
        700.0, 1.93736319812939],
       ['2017', 10.8362897885911, 47.21, 461.2043143, 1.06335942989132,
        700.0, 1.50731658089818],
       ['2018', 9.74553265704785, 46.79, 477.8606632, 1.03782784787162,
        700.0, 1.55926352274079],
       ['2019', 2.28274228197319, 46.74, 491.4029668, 1.01326124930492,
        700.0, 1.76312750821227],
       ['2020', -10.1074012471816, 43.0, 500.0, 0.989413800188014, 700.0,
        1.6]], dtype=object)

In [218]:
# pass through the X_train & y_train data set
regression_model_India.fit(X_train, y_train)

LinearRegression()

In [219]:
# let's grab the coefficient of our model and the intercept
intercept = regression_model_India.intercept_[0]
coefficent = regression_model_India.coef_[0][0]

In [220]:
print("The intercept for our model is {:.4}".format(intercept))
print('-'*100)

The intercept for our model is 768.3
----------------------------------------------------------------------------------------------------


In [221]:
# loop through the dictionary and print the data
for coef in zip(X.columns, regression_model_India.coef_[0]):
    print("The Coefficient for {} is {:.2}".format(coef[0],coef[1]))

The Coefficient for year is -0.47
The Coefficient for Gross_Capital_Formation_Growth is 0.073
The Coefficient for Employment_Population_Ratio is 2.3
The Coefficient for Exports is 0.008
The Coefficient for Population_Growth is 3.8
The Coefficient for Energy_Use is 0.098
The Coefficient for Forign_Direct_Investment%GDP is -1.1


In [222]:
# Get multiple predictions
y_predict = regression_model_India.predict(X_test)

# Show the first 5 predictions
y_predict[:5]

array([[11.20787124],
       [ 9.82585048],
       [ 9.47424583],
       [ 7.95767352],
       [ 6.63260003]])

In [223]:
import math
# calculate the mean squared error
model_mse = mean_squared_error(y_test, y_predict)

# calculate the mean absolute error
model_mae = mean_absolute_error(y_test, y_predict)

# calulcate the root mean squared error
model_rmse =  math.sqrt(model_mse)

# display the output
print("MSE {:.3}".format(model_mse))
print("MAE {:.3}".format(model_mae))
print("RMSE {:.3}".format(model_rmse))

MSE 7.72
MAE 2.61
RMSE 2.78


In [224]:
y_test

array([[ 7.99625379],
       [ 8.2563055 ],
       [ 6.79538342],
       [ 6.53298901],
       [ 4.04155419],
       [-7.25175478]])

In [225]:
y_predict

array([[11.20787124],
       [ 9.82585048],
       [ 9.47424583],
       [ 7.95767352],
       [ 6.63260003],
       [-3.0559243 ]])

In [226]:
regression_model_India.score(X_train, y_train)

0.573574686091411

In [227]:
r2_score_India = r2_score(y_test, y_predict)
r2_score_India

0.7338682390419914

# Regression for USA Data

In [228]:
worldbank_indicators = ["NY.GDP.MKTP.KD.ZG", "NE.GDI.TOTL.KD.ZG","SL.EMP.TOTL.SP.ZS","TX.QTY.MRCH.XD.WD","SP.POP.GROW","EG.USE.PCAP.KG.OE"]
 
 
world_bank = wb.download (indicator =worldbank_indicators, country = ["USA"], start = 1995, end =2020)

world_bank = world_bank.reset_index(1)          

In [229]:
world_bank_df = world_bank.rename(columns = {"NY.GDP.MKTP.KD.ZG":"GDP",\
                                               "NE.GDI.TOTL.KD.ZG":"Gross_Capital_Formation_Growth",\
                                               "SL.EMP.TOTL.SP.ZS":"Employment_Population_Ratio",\
                                            "TX.QTY.MRCH.XD.WD":"Exports",\
                                            "SP.POP.GROW":"Population_Growth",\
                                            "EG.USE.PCAP.KG.OE":"Energy_Use"})

In [230]:
world_bank_final = world_bank_df.loc[::-1]

In [231]:
world_bank_final.head(32)

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
United States,1995,2.684217,2.864561,62.11,69.331525,1.190787,7763.755106
United States,1996,3.772566,7.591728,62.4,73.153759,1.163412,7844.468266
United States,1997,4.381775,9.481793,63.01,80.10629,1.20396,7828.581096
United States,1998,4.481408,8.660396,63.31,84.165066,1.165715,7803.697605
United States,1999,4.753236,7.951288,63.56,87.531198,1.14834,7923.223893
United States,2000,4.127484,6.097963,63.77,100.0,1.112769,8056.86385
United States,2001,0.998341,-3.992445,62.92,93.997048,0.989741,7827.886325
United States,2002,1.741695,0.472452,61.9,90.266183,0.927797,7843.344849
United States,2003,2.861211,4.131583,61.39,92.970344,0.859482,7794.23553
United States,2004,3.798891,7.82486,61.43,100.593525,0.925484,7881.578642


In [232]:
world_bank_final1 = world_bank_final.fillna({
    "Gross_Capital_Formation_Growth":12.0, 
    'Exports' :160.0,
    'Energy_Use': 6800.0})
world_bank_final1.head()

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
United States,1995,2.684217,2.864561,62.11,69.331525,1.190787,7763.755106
United States,1996,3.772566,7.591728,62.4,73.153759,1.163412,7844.468266
United States,1997,4.381775,9.481793,63.01,80.10629,1.20396,7828.581096
United States,1998,4.481408,8.660396,63.31,84.165066,1.165715,7803.697605
United States,1999,4.753236,7.951288,63.56,87.531198,1.14834,7923.223893


In [233]:
world_bank_final1.to_excel('USA.xlsx', index = False)

In [234]:
world_bank_final1.describe()

Unnamed: 0,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use
count,26.0,26.0,26.0,26.0,26.0,26.0
mean,2.251511,3.058375,60.544231,121.970683,0.864981,7390.327178
std,1.941325,5.863005,2.210808,32.303388,0.229266,479.723261
min,-3.642014,-16.595895,56.31,69.331525,0.350911,6800.0
25%,1.766792,1.07048,58.4525,93.22702,0.72843,6880.420121
50%,2.54487,4.30647,61.3,122.504765,0.899182,7592.867228
75%,3.403789,7.218287,62.1325,154.051194,0.98337,7828.407404
max,4.753236,10.457543,63.77,167.697222,1.20396,8056.86385


In [235]:
X = world_bank_final1.drop("GDP", axis = 1)
Y = world_bank_final1[['GDP']]


In [236]:
X_train = X[:20].values
X_test = X[20:].values
y_train = Y[:20].values
y_test = Y[20:].values

In [237]:
X_test

array([['2015', 5.04799124347824, 58.73, 155.0967981, 0.73621730882542,
        6803.99660728499],
       ['2016', -0.473252554625759, 59.13, 154.7712254,
        0.724676067451429, 6800.0],
       ['2017', 3.74679542893868, 59.58, 161.0373425, 0.63264399508256,
        6800.0],
       ['2018', 5.10467700795469, 59.89, 167.6972224, 0.526435395564053,
        6800.0],
       ['2019', 3.32735318928567, 60.27, 166.9141484, 0.455381285963537,
        6800.0],
       ['2020', -3.95644835328591, 56.31, 160.0, 0.350911063312921,
        6800.0]], dtype=object)

In [238]:
# create a Linear Regression model object
regression_model_USA = LinearRegression()

In [239]:
# pass through the X_train & y_train data set
regression_model_USA.fit(X_train, y_train)

LinearRegression()

In [240]:
# let's grab the coefficient of our model and the intercept
intercept = regression_model_USA.intercept_[0]
coefficent = regression_model_USA.coef_[0][0]

In [241]:
print("The intercept for our model is {:.4}".format(intercept))
print('-'*100)

The intercept for our model is -420.4
----------------------------------------------------------------------------------------------------


In [242]:
# loop through the dictionary and print the data
for coef in zip(X.columns, regression_model_USA.coef_[0]):
    print("The Coefficient for {} is {:.2}".format(coef[0],coef[1]))

The Coefficient for year is 0.2
The Coefficient for Gross_Capital_Formation_Growth is 0.24
The Coefficient for Employment_Population_Ratio is 0.47
The Coefficient for Exports is -0.027
The Coefficient for Population_Growth is 1.8
The Coefficient for Energy_Use is -0.0005


In [243]:
y_test

array([[ 3.07551465],
       [ 1.71142677],
       [ 2.3326794 ],
       [ 2.99646435],
       [ 2.16117652],
       [-3.64201388]])

In [244]:
# Get multiple predictions
y_predict = regression_model_USA.predict(X_test)

# Show the first 5 predictions
y_predict[:6]

array([[ 2.70940662],
       [ 1.73682139],
       [ 2.84120157],
       [ 3.14515422],
       [ 2.98152012],
       [-0.44568975]])

In [245]:
import math
# calculate the mean squared error
model_mse = mean_squared_error(y_test, y_predict)

# calculate the mean absolute error
model_mae = mean_absolute_error(y_test, y_predict)

# calulcate the root mean squared error
model_rmse =  math.sqrt(model_mse)

# display the output
print("MSE {:.3}".format(model_mse))
print("MAE {:.3}".format(model_mae))
print("RMSE {:.3}".format(model_rmse))

MSE 1.88
MAE 0.844
RMSE 1.37


In [246]:
regression_model_USA.score(X_train, y_train)

0.975923999233812

In [247]:
r2_score_USA = r2_score(y_test, y_predict)
r2_score_USA

0.6501675179594838

# Regression for China Data

In [248]:
worldbank_indicators = ["NY.GDP.MKTP.KD.ZG", "NE.GDI.TOTL.KD.ZG","SL.EMP.TOTL.SP.ZS","TX.QTY.MRCH.XD.WD","SP.POP.GROW","EG.USE.PCAP.KG.OE", "BX.KLT.DINV.WD.GD.ZS"]
 
 
world_bank = wb.download (indicator =worldbank_indicators, country = ["CHN"], start = 1990, end =2020)

world_bank = world_bank.reset_index(1)        

In [249]:
world_bank_df = world_bank.rename(columns = {"NY.GDP.MKTP.KD.ZG":"GDP",\
                                               "NE.GDI.TOTL.KD.ZG":"Gross_Capital_Formation_Growth",\
                                               "SL.EMP.TOTL.SP.ZS":"Employment_Population_Ratio",\
                                            "TX.QTY.MRCH.XD.WD":"Exports",\
                                            "SP.POP.GROW":"Population_Growth",\
                                            "EG.USE.PCAP.KG.OE":"Energy_Use",\
                                            "BX.KLT.DINV.WD.GD.ZS":"Forign_Direct_Investment%GDP"})

In [250]:
world_bank_final = world_bank_df.loc[::-1]

In [251]:
world_bank_final.head()

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Forign_Direct_Investment%GDP
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
China,1990,3.920251,,,25.686424,1.467303,766.995329,0.966308
China,1991,9.262786,,77.16,29.444891,1.364434,736.851801,1.138838
China,1992,14.22453,,77.11,34.084662,1.225536,752.628663,2.613162
China,1993,13.883729,,76.83,37.953573,1.149619,788.128724,6.186882
China,1994,13.036807,,76.6,48.5572,1.130261,816.16289,5.987156


In [253]:
world_bank_final1 = world_bank_final.fillna({
    'Employment_Population_Ratio':76,
    'Gross_Capital_Formation_Growth' : 8,
    'Exports' : 750,
    'Energy_Use': 1800,
    'Forign_Direct_Investment%GDP':01.0})
world_bank_final1.head()

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Forign_Direct_Investment%GDP
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
China,1990,3.920251,8.0,76.0,25.686424,1.467303,766.995329,0.966308
China,1991,9.262786,8.0,77.16,29.444891,1.364434,736.851801,1.138838
China,1992,14.22453,8.0,77.11,34.084662,1.225536,752.628663,2.613162
China,1993,13.883729,8.0,76.83,37.953573,1.149619,788.128724,6.186882
China,1994,13.036807,8.0,76.6,48.5572,1.130261,816.16289,5.987156


In [254]:
world_bank_final1.to_excel('China.xlsx', index = False)

In [255]:
Y.shape

(26, 1)

In [256]:
world_bank_final1.describe()

Unnamed: 0,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Forign_Direct_Investment%GDP
count,31.0,31.0,31.0,31.0,31.0,31.0,31.0
mean,9.115567,10.508647,71.123226,341.368676,0.748794,1381.82094,3.287395
std,2.774183,5.417067,4.338448,280.587882,0.303926,516.444531,1.42127
min,2.347514,3.560256,63.48,25.686424,0.225948,736.851801,0.966308
25%,7.543708,7.258222,67.46,69.400851,0.552416,875.14043,2.375708
50%,9.23678,8.0,70.24,283.644193,0.630326,1393.691324,3.487403
75%,10.374746,13.461574,75.73,622.129726,0.9915,1800.0,4.418271
max,14.230861,23.790644,77.16,784.492573,1.467303,2224.354898,6.186882


In [257]:
X = world_bank_final1.drop("GDP", axis = 1)
Y = world_bank_final1[['GDP']]


In [258]:
X_train = X[:28].values
X_test = X[28:].values
y_train = Y[:28].values
y_test = Y[28:].values

In [259]:
X_test

array([['2018', 6.75917007998336, 65.76, 769.440752, 0.467672053461959,
        1800.0, 1.69390529382545],
       ['2019', 3.98543867617695, 65.1, 784.4925731, 0.354740890170827,
        1800.0, 1.31071878144875],
       ['2020', 5.21683592750635, 63.48, 750.0, 0.225947587112011,
        1800.0, 1.0]], dtype=object)

In [260]:
y_test

array([[6.74977383],
       [5.94971423],
       [2.34751357]])

In [261]:
# create a Linear Regression model object
regression_model_China = LinearRegression()

In [262]:
# pass through the X_train & y_train data set
regression_model_China.fit(X_train, y_train)

LinearRegression()

In [263]:
# Get multiple predictions
y_predict = regression_model_China.predict(X_test)

# Show the first 5 predictions
y_predict[:4]

array([[8.83669115],
       [9.09432693],
       [6.90775935]])

In [264]:
# let's grab the coefficient of our model and the intercept
intercept = regression_model_China.intercept_[0]
coefficent = regression_model_China.coef_[0][0]

In [265]:
print("The intercept for our model is {:.4}".format(intercept))
print('-'*100)

The intercept for our model is 2.802e+03
----------------------------------------------------------------------------------------------------


In [266]:
# loop through the dictionary and print the data
for coef in zip(X.columns, regression_model_China.coef_[0]):
    print("The Coefficient for {} is {:.2}".format(coef[0],coef[1]))

The Coefficient for year is -1.4
The Coefficient for Gross_Capital_Formation_Growth is 0.099
The Coefficient for Employment_Population_Ratio is 0.87
The Coefficient for Exports is 0.048
The Coefficient for Population_Growth is -1.9e+01
The Coefficient for Energy_Use is -0.0034
The Coefficient for Forign_Direct_Investment%GDP is 0.8


In [267]:
import math
# calculate the mean squared error
model_mse = mean_squared_error(y_test, y_predict)

# calculate the mean absolute error
model_mae = mean_absolute_error(y_test, y_predict)

# calulcate the root mean squared error
model_rmse =  math.sqrt(model_mse)

# display the output
print("MSE {:.3}".format(model_mse))
print("MAE {:.3}".format(model_mae))
print("RMSE {:.3}".format(model_rmse))

MSE 11.7
MAE 3.26
RMSE 3.42


In [268]:
regression_model_China.score(X_train, y_train)

0.7444923763640225

In [269]:
r2_score_China =r2_score(y_test, y_predict)
r2_score_China


-2.1858247090844594

# Regression for Brazil Data

In [270]:
worldbank_indicators = ["NY.GDP.MKTP.KD.ZG", "NE.GDI.TOTL.KD.ZG","SL.EMP.TOTL.SP.ZS","TX.QTY.MRCH.XD.WD","SP.POP.GROW","EG.USE.PCAP.KG.OE","BX.KLT.DINV.WD.GD.ZS"] 
 
world_bank = wb.download (indicator =worldbank_indicators, country = ["BRA"], start = 1995, end =2020)

world_bank = world_bank.reset_index(1)       

In [271]:
world_bank_df = world_bank.rename(columns = {"NY.GDP.MKTP.KD.ZG":"GDP",\
                                               "NE.GDI.TOTL.KD.ZG":"Gross_Capital_Formation_Growth",\
                                               "SL.EMP.TOTL.SP.ZS":"Employment_Population_Ratio",\
                                            "TX.QTY.MRCH.XD.WD":"Exports",\
                                            "SP.POP.GROW":"Population_Growth",\
                                            "EG.USE.PCAP.KG.OE":"Energy_Use",\
                                            "BX.KLT.DINV.WD.GD.ZS":"Foreign Direct Investment"})

In [272]:
world_bank_final = world_bank_df.loc[::-1]

In [273]:
world_bank_final.head()

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Foreign Direct Investment
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Brazil,1995,4.223794,7.290004,59.68,72.158558,1.60971,994.289849,0.631586
Brazil,1996,2.208864,0.798463,57.88,74.083146,1.588838,1030.668072,1.475965
Brazil,1997,3.394846,8.418114,57.93,80.843315,1.563732,1066.186951,2.150452
Brazil,1998,0.338098,-0.163841,57.32,82.890311,1.528962,1075.199081,3.340887
Brazil,1999,0.467938,-8.875302,57.34,88.936536,1.48111,1083.403984,4.73377


In [274]:
world_bank_final1 = world_bank_final.fillna({
    "Gross_Capital_Formation_Growth":2.8, 
    'Exports' :230.0,
    'Energy_Use': 800.0,
    'Foreign Direct Investment':3.0})
world_bank_final1.head()

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Foreign Direct Investment
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Brazil,1995,4.223794,7.290004,59.68,72.158558,1.60971,994.289849,0.631586
Brazil,1996,2.208864,0.798463,57.88,74.083146,1.588838,1030.668072,1.475965
Brazil,1997,3.394846,8.418114,57.93,80.843315,1.563732,1066.186951,2.150452
Brazil,1998,0.338098,-0.163841,57.32,82.890311,1.528962,1075.199081,3.340887
Brazil,1999,0.467938,-8.875302,57.34,88.936536,1.48111,1083.403984,4.73377


In [275]:
world_bank_final1.to_excel('Brazil.xlsx', index = False)

In [276]:
world_bank_final1.describe()

Unnamed: 0,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Foreign Direct Investment
count,26.0,26.0,26.0,26.0,26.0,26.0,26.0
mean,2.120676,2.062374,58.372308,164.349065,1.106152,1105.185877,3.103603
std,2.853383,7.374636,2.038651,55.36371,0.29327,216.416964,1.069403
min,-4.059048,-13.9465,51.14,72.158558,0.712873,800.0,0.631586
25%,0.663174,-1.96135,57.4425,111.820248,0.860248,1003.384405,2.291222
50%,2.06502,1.630388,58.795,185.784523,1.016088,1086.876916,3.29592
75%,3.971314,6.790732,59.7225,196.584015,1.353404,1239.735885,3.748725
max,7.528226,17.853921,60.82,241.30419,1.60971,1495.541141,5.033917


In [277]:
X = world_bank_final1.drop("GDP", axis = 1)
Y = world_bank_final1[['GDP']]

In [278]:
X_train = X[:20].values
X_test = X[20:].values
y_train = Y[:20].values
y_test = Y[20:].values

In [279]:
X_test

array([['2015', -13.9465002050587, 58.65, 209.4169305, 0.838838953278811,
        800.0, 3.59214973105335],
       ['2016', -12.1298264348974, 56.39, 216.1630581, 0.823752152538353,
        800.0, 4.13737853873766],
       ['2017', -2.55735555667499, 55.84, 230.8527606, 0.807145245445964,
        800.0, 3.33826028428024],
       ['2018', 5.23175830926645, 56.15, 241.3041896, 0.783844259734468,
        800.0, 4.07748708411665],
       ['2019', 3.36410575283553, 56.76, 236.3478881, 0.751550842096973,
        800.0, 3.68375319892769],
       ['2020', -0.776685637163737, 51.14, 230.0, 0.712872791114979,
        800.0, 3.0]], dtype=object)

In [280]:
y_test

array([[-3.54576339],
       [-3.27591691],
       [ 1.32286905],
       [ 1.78366676],
       [ 1.41115299],
       [-4.05904827]])

In [281]:
# create a Linear Regression model object
regression_model_Brazil = LinearRegression()

In [282]:
# pass through the X_train & y_train data set
regression_model_Brazil.fit(X_train, y_train)

LinearRegression()

In [283]:
# Get multiple predictions
y_predict = regression_model_Brazil.predict(X_test)

# Show the first 5 predictions
y_predict[:6]

array([[2.07911175],
       [2.00993812],
       [4.68740903],
       [7.81267596],
       [7.2869438 ],
       [2.33087347]])

In [284]:
# let's grab the coefficient of our model and the intercept
intercept = regression_model_Brazil.intercept_[0]
coefficent = regression_model_Brazil.coef_[0][0]

In [285]:
print("The intercept for our model is {:.4}".format(intercept))
print('-'*100)

The intercept for our model is -996.0
----------------------------------------------------------------------------------------------------


In [286]:
# loop through the dictionary and print the data
for coef in zip(X.columns, regression_model_Brazil.coef_[0]):
    print("The Coefficient for {} is {:.2}".format(coef[0],coef[1]))

The Coefficient for year is 0.47
The Coefficient for Gross_Capital_Formation_Growth is 0.28
The Coefficient for Employment_Population_Ratio is 0.57
The Coefficient for Exports is 0.032
The Coefficient for Population_Growth is 1.5e+01
The Coefficient for Energy_Use is -0.006
The Coefficient for Foreign Direct Investment is 0.45


In [287]:
import math
# calculate the mean squared error
model_mse = mean_squared_error(y_test, y_predict)

# calculate the mean absolute error
model_mae = mean_absolute_error(y_test, y_predict)

# calulcate the root mean squared error
model_rmse =  math.sqrt(model_mse)

# display the output
print("MSE {:.3}".format(model_mse))
print("MAE {:.3}".format(model_mae))
print("RMSE {:.3}".format(model_rmse))

MSE 30.4
MAE 5.43
RMSE 5.52


In [288]:
regression_model_Brazil.score(X_train, y_train)

0.8889221249403891

In [289]:
r2_score_Brazil =r2_score(y_test, y_predict)
r2_score_Brazil


-3.5702905263488756

# Regression for South Africa Data

In [290]:
worldbank_indicators = ["NY.GDP.MKTP.KD.ZG", "NE.GDI.TOTL.KD.ZG","SL.EMP.TOTL.SP.ZS","TX.QTY.MRCH.XD.WD","SP.POP.GROW","EG.USE.PCAP.KG.OE","BX.KLT.DINV.WD.GD.ZS"] 
 
world_bank = wb.download (indicator =worldbank_indicators, country = ["ZAF"], start = 1995, end =2020)

world_bank = world_bank.reset_index(1)       

In [291]:
world_bank_df = world_bank.rename(columns = {"NY.GDP.MKTP.KD.ZG":"GDP",\
                                               "NE.GDI.TOTL.KD.ZG":"Gross_Capital_Formation_Growth",\
                                               "SL.EMP.TOTL.SP.ZS":"Employment_Population_Ratio",\
                                            "TX.QTY.MRCH.XD.WD":"Exports",\
                                            "SP.POP.GROW":"Population_Growth",\
                                            "EG.USE.PCAP.KG.OE":"Energy_Use",\
                                            "BX.KLT.DINV.WD.GD.ZS":"Foreign Direct Investment"})

In [292]:
world_bank_final = world_bank_df.loc[::-1]

In [293]:
world_bank_final.head(32)

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Foreign Direct Investment
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
South Africa,1995,3.1,14.229407,39.34,75.526461,2.126182,2498.893625,0.726948
South Africa,1996,4.3,1.027631,39.2,84.748864,1.924718,2504.903115,0.500126
South Africa,1997,2.6,2.944509,39.09,91.578779,1.751687,2527.940174,2.255071
South Africa,1998,0.5,3.340042,39.01,85.363103,1.603369,2471.477036,0.359739
South Africa,1999,2.4,-3.129856,38.87,89.073329,1.491248,2464.32336,0.99219
South Africa,2000,4.2,3.512802,38.82,100.0,1.409022,2424.881292,0.638425
South Africa,2001,2.7,0.478587,38.3,101.860087,1.333277,2461.026477,5.368357
South Africa,2002,3.700374,8.895938,36.8,99.883164,1.263923,2384.137081,1.146352
South Africa,2003,2.949075,11.441033,37.31,98.312123,1.223854,2518.332793,0.39749
South Africa,2004,4.55456,15.976655,38.88,103.802453,1.217762,2716.293461,0.2742


In [295]:
world_bank_final1 = world_bank_final.fillna({
    "Gross_Capital_Formation_Growth":1.6, 
    'Exports' :100.0,
    'Energy_Use': 2300.0,
    'Foreign Direct Investment':3.0})
world_bank_final1.head()

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Foreign Direct Investment
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
South Africa,1995,3.1,14.229407,39.34,75.526461,2.126182,2498.893625,0.726948
South Africa,1996,4.3,1.027631,39.2,84.748864,1.924718,2504.903115,0.500126
South Africa,1997,2.6,2.944509,39.09,91.578779,1.751687,2527.940174,2.255071
South Africa,1998,0.5,3.340042,39.01,85.363103,1.603369,2471.477036,0.359739
South Africa,1999,2.4,-3.129856,38.87,89.073329,1.491248,2464.32336,0.99219


In [296]:
world_bank_final1.to_excel('SouthAfrica.xlsx', index = False)

In [297]:
world_bank_final1.describe()

Unnamed: 0,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Foreign Direct Investment
count,26.0,26.0,26.0,26.0,26.0,26.0,26.0
mean,2.296737,2.81917,39.631923,114.306491,1.461051,2541.316674,1.397003
std,2.470958,8.517843,1.453638,21.606507,0.219257,190.031628,1.166651
min,-6.431975,-24.730526,36.69,75.526461,1.217762,2300.0,0.205124
25%,1.344853,-0.624441,38.9125,99.912373,1.297697,2394.323134,0.564362
50%,2.65,3.158226,39.545,110.369759,1.412985,2511.617954,1.028556
75%,3.573042,8.756069,40.6375,131.951093,1.569076,2691.267888,2.034578
max,5.603806,15.976655,43.1,147.420926,2.126182,2950.15361,5.368357


In [298]:
X = world_bank_final1.drop("GDP", axis = 1)
Y = world_bank_final1[['GDP']]

In [299]:
X_train = X[:20].values
X_test = X[20:].values
y_train = Y[:20].values
y_test = Y[20:].values

In [300]:
X_test

array([['2015', 4.00533113024204, 41.45, 142.8619585, 1.53224277341692,
        2300.0, 0.438735792056359],
       ['2016', -8.38581264256115, 40.79, 141.7059987, 1.47193342939738,
        2300.0, 0.684612553455795],
       ['2017', 2.97641067709678, 40.99, 144.2538, 1.41694725378631,
        2300.0, 0.539673956759456],
       ['2018', -0.69657782263657, 40.93, 147.4209256, 1.36370321600461,
        2300.0, 1.37571218998329],
       ['2019', -0.408032219323644, 40.12, 142.4716299, 1.31629200967216,
        2300.0, 1.31880445443166],
       ['2020', -24.7305255204144, 36.69, 100.0, 1.27335626330668,
        2300.0, 3.0]], dtype=object)

In [301]:
y_test

array([[ 1.32186224],
       [ 0.66455231],
       [ 1.15794695],
       [ 1.48761737],
       [ 0.1130537 ],
       [-6.43197483]])

In [302]:
# create a Linear Regression model object
regression_model_SouthAfrica = LinearRegression()

In [303]:
# pass through the X_train & y_train data set
regression_model_SouthAfrica.fit(X_train, y_train)

LinearRegression()

In [304]:
# Get multiple predictions
y_predict = regression_model_SouthAfrica.predict(X_test)

# Show the first 5 predictions
y_predict[:6]

array([[ 2.03319433],
       [ 0.6035058 ],
       [ 1.46430163],
       [ 1.02394865],
       [ 0.16821014],
       [-8.02045567]])

In [305]:
# let's grab the coefficient of our model and the intercept
intercept = regression_model_SouthAfrica.intercept_[0]
coefficent = regression_model_SouthAfrica.coef_[0][0]

In [306]:
print("The intercept for our model is {:.4}".format(intercept))
print('-'*100)

The intercept for our model is 967.0
----------------------------------------------------------------------------------------------------


In [307]:
# loop through the dictionary and print the data
for coef in zip(X.columns, regression_model_SouthAfrica.coef_[0]):
    print("The Coefficient for {} is {:.2}".format(coef[0],coef[1]))

The Coefficient for year is -0.48
The Coefficient for Gross_Capital_Formation_Growth is 0.082
The Coefficient for Employment_Population_Ratio is -0.39
The Coefficient for Exports is 0.16
The Coefficient for Population_Growth is -0.99
The Coefficient for Energy_Use is 0.0026
The Coefficient for Foreign Direct Investment is -0.27


In [308]:
import math
# calculate the mean squared error
model_mse = mean_squared_error(y_test, y_predict)

# calculate the mean absolute error
model_mae = mean_absolute_error(y_test, y_predict)

# calulcate the root mean squared error
model_rmse =  math.sqrt(model_mse)

# display the output
print("MSE {:.3}".format(model_mse))
print("MAE {:.3}".format(model_mae))
print("RMSE {:.3}".format(model_rmse))

MSE 0.557
MAE 0.531
RMSE 0.747


In [309]:
regression_model_SouthAfrica.score(X_train, y_train)

0.6734329573667853

In [310]:
r2_score_SouthAfrica =r2_score(y_test, y_predict)
r2_score_SouthAfrica

0.9283009195549904

# Regression for Russian Federation Data

In [311]:
worldbank_indicators = ["NY.GDP.MKTP.KD.ZG", "NE.GDI.TOTL.KD.ZG","SL.EMP.TOTL.SP.ZS","TX.QTY.MRCH.XD.WD","SP.POP.GROW","EG.USE.PCAP.KG.OE","BX.KLT.DINV.WD.GD.ZS"] 
 
world_bank = wb.download (indicator =worldbank_indicators, country = ["RUS"], start = 1995, end =2020)

world_bank = world_bank.reset_index(1)       

In [312]:
world_bank_df = world_bank.rename(columns = {"NY.GDP.MKTP.KD.ZG":"GDP",\
                                               "NE.GDI.TOTL.KD.ZG":"Gross_Capital_Formation_Growth",\
                                               "SL.EMP.TOTL.SP.ZS":"Employment_Population_Ratio",\
                                            "TX.QTY.MRCH.XD.WD":"Exports",\
                                            "SP.POP.GROW":"Population_Growth",\
                                            "EG.USE.PCAP.KG.OE":"Energy_Use",\
                                            "BX.KLT.DINV.WD.GD.ZS":"Foreign Direct Investment"})

In [313]:
world_bank_final = world_bank_df.loc[::-1]

In [314]:
world_bank_final.head()

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Foreign Direct Investment
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Russian Federation,1995,-4.143528,-10.8,55.62,,-0.021649,4290.693467,0.522257
Russian Federation,1996,-3.755069,-6.888909,54.51,,-0.145452,4252.611619,0.658454
Russian Federation,1997,1.399916,-4.100188,51.95,,-0.165342,4069.687292,1.201359
Russian Federation,1998,-5.299962,-45.199827,50.31,,-0.165486,3981.499468,1.019119
Russian Federation,1999,6.399915,-6.600615,53.18,,-0.309278,4136.751144,1.662283


In [315]:
world_bank_final1 = world_bank_final.fillna({
    "Gross_Capital_Formation_Growth":1.6, 
    'Exports' :150.0,
    'Energy_Use': 4000.0,
    'Foreign Direct Investment':1.0})
world_bank_final1.head()

Unnamed: 0_level_0,year,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Foreign Direct Investment
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Russian Federation,1995,-4.143528,-10.8,55.62,150.0,-0.021649,4290.693467,0.522257
Russian Federation,1996,-3.755069,-6.888909,54.51,150.0,-0.145452,4252.611619,0.658454
Russian Federation,1997,1.399916,-4.100188,51.95,150.0,-0.165342,4069.687292,1.201359
Russian Federation,1998,-5.299962,-45.199827,50.31,150.0,-0.165486,3981.499468,1.019119
Russian Federation,1999,6.399915,-6.600615,53.18,150.0,-0.309278,4136.751144,1.662283


In [316]:
world_bank_final1.to_excel('Russia.xlsx', index = False)

In [317]:
world_bank_final1.describe()

Unnamed: 0,GDP,Gross_Capital_Formation_Growth,Employment_Population_Ratio,Exports,Population_Growth,Energy_Use,Foreign Direct Investment
count,26.0,26.0,26.0,26.0,26.0,26.0,26.0
mean,2.563569,3.774255,56.781154,155.628475,-0.113188,4416.831743,1.93431
std,4.645481,22.28703,2.490902,26.248143,0.229201,387.010604,1.16149
min,-7.799994,-45.199827,50.31,100.0,-0.460024,3981.499468,0.502608
25%,0.329334,-6.077411,55.29,150.0,-0.322904,4017.421823,1.008932
50%,3.415666,1.307718,57.455,154.280139,-0.09751,4289.538049,1.828235
75%,6.099928,13.775086,58.975,167.483361,0.06971,4704.481421,2.670814
max,10.000067,75.201129,59.39,203.728337,0.217642,5167.010353,4.502699


In [318]:
X = world_bank_final1.drop("GDP", axis = 1)
Y = world_bank_final1[['GDP']]

In [319]:
X_train = X[:20].values
X_test = X[20:].values
y_train = Y[:20].values
y_test = Y[20:].values

In [320]:
X_test

array([['2015', -11.74313147216, 59.14, 180.5162551, 0.192557946508244,
        4000.0, 0.502608373795545],
       ['2016', -0.639176708239958, 59.28, 187.5903055,
        0.170245238698228, 4000.0, 2.54849873389468],
       ['2017', 6.40962100599796, 59.08, 194.7974436, 0.106870569429971,
        4000.0, 1.8140929436605],
       ['2018', -1.59119425116916, 59.23, 203.7283367,
        -0.0130668924933287, 4000.0, 0.530060761129424],
       ['2019', 3.25461218970347, 58.66, 197.6251167,
        -0.0495686654330689, 4000.0, 1.89485898487827],
       ['2020', -1.97334879957343, 57.62, 150.0, -0.209476796881133,
        4000.0, 1.0]], dtype=object)

In [321]:
y_test

array([[-1.97271923],
       [ 0.19369007],
       [ 1.82579006],
       [ 2.80724541],
       [ 2.03298274],
       [-2.9512739 ]])

In [322]:
# create a Linear Regression model object
regression_model_Russia = LinearRegression()

In [323]:
# pass through the X_train & y_train data set
regression_model_Russia.fit(X_train, y_train)

LinearRegression()

In [324]:
# Get multiple predictions
y_predict = regression_model_Russia.predict(X_test)

# Show the first 6 predictions
y_predict[:6]

array([[-6.49192668],
       [-4.07355472],
       [-1.18551245],
       [ 0.77930679],
       [ 2.15612091],
       [ 1.73519581]])

In [325]:
# let's grab the coefficient of our model and the intercept
intercept = regression_model_Russia.intercept_[0]
coefficent = regression_model_Russia.coef_[0][0]

In [326]:
print("The intercept for our model is {:.4}".format(intercept))
print('-'*100)

The intercept for our model is -419.2
----------------------------------------------------------------------------------------------------


In [327]:
# loop through the dictionary and print the data
for coef in zip(X.columns, regression_model_Russia.coef_[0]):
    print("The Coefficient for {} is {:.2}".format(coef[0],coef[1]))

The Coefficient for year is 0.2
The Coefficient for Gross_Capital_Formation_Growth is 0.12
The Coefficient for Employment_Population_Ratio is -0.64
The Coefficient for Exports is 0.075
The Coefficient for Population_Growth is -1.8e+01
The Coefficient for Energy_Use is 0.0075
The Coefficient for Foreign Direct Investment is 0.0016


In [328]:
import math
# calculate the mean squared error
model_mse = mean_squared_error(y_test, y_predict)

# calculate the mean absolute error
model_mae = mean_absolute_error(y_test, y_predict)

# calulcate the root mean squared error
model_rmse =  math.sqrt(model_mse)

# display the output
print("MSE {:.3}".format(model_mse))
print("MAE {:.3}".format(model_mae))
print("RMSE {:.3}".format(model_rmse))

MSE 12.3
MAE 3.11
RMSE 3.51


In [329]:
regression_model_Russia.score(X_train, y_train)

0.8913331821704882

In [330]:
r2_score_Russia =r2_score(y_test, y_predict)
r2_score_Russia

-1.6968702527367112