# Imports

In [1]:
import pickle
import wbgapi as wb
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

from sklearn.preprocessing import MinMaxScaler

from sklearn.multioutput import MultiOutputRegressor

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoLars

from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import train_test_split




# Models

In [10]:
MODELS = {
    "LassoLars": LassoLars(alpha=.1, normalize=False),
    "Linear regression": LinearRegression(),
    "Ridge": RidgeCV(),
    "Lasso": Lasso(),
    "ElasticNet":ElasticNet(random_state=42),
    "RandomForestRegressor": RandomForestRegressor(max_depth=5, random_state=42),
    "Decision Tree Regressor": DecisionTreeRegressor(max_depth=5),
    "GradientBoostingRegressor" :GradientBoostingRegressor(n_estimators=100),
    "AdaBoostRegressor" : AdaBoostRegressor(n_estimators=100),
    "XGBRegressor": XGBRegressor(max_depth=3, n_estimators=100, n_jobs=3,
                           objective='reg:squarederror',
                           random_state=42, learning_rate=0.03),
    "K-nn": KNeighborsRegressor(),
    "LinearRegression":LinearRegression(),
    "CatBoostRegressor":CatBoostRegressor(loss_function='RMSE',silent=True, depth=5),
    "LGBMRegressor":LGBMRegressor()
    
}

# Functions

In [2]:
def read_data(codes):
    wb.db = 2
    worldbank_data = wb.data.DataFrame(codes, 
                                       ['DEU', 'AUS', 'CAN', 'USA', 'FRA', 'ITA', 'JPN', 'GBR', 'ZAF', 'SAU', 'ARG', 'BRA', 'CHN', 'KOR', 'IND', 'IDN', 'MEX', 'RUS', 'TUR'],
                                      range(1990, 2020, 1))
    return worldbank_data

In [3]:
def read_data1(codes):
    wb.db = 1
    worldbank_data = wb.data.DataFrame(codes, 
                                       ['DEU', 'AUS', 'CAN', 'USA', 'FRA', 'ITA', 'JPN', 'GBR', 'ZAF', 'SAU', 'ARG', 'BRA', 'CHN', 'KOR', 'IND', 'IDN', 'MEX', 'RUS', 'TUR'],
                                      range(1990, 2020, 1))
    return worldbank_data

In [4]:
def get_data(data):
    dict_data={}
    
    for index, row in data.items():
        if len(row)>19:
            for index2, row2 in row.items():          
                if not dict_data.get(index2[1]):
                    dict_data[index2[1]]=[]
                dict_data.get(index2[1]).append(row2)
        else:
            for index2, row2 in row.items():
                if not dict_data.get("coluna"):
                    dict_data["coluna"]=[]
                dict_data.get("coluna").append(row2)
    return pd.DataFrame(dict_data)

In [5]:
def get_the_best_model(input,output):
    data_input = get_data(input)
    data_output = get_data(output)
    
    normalizer = MinMaxScaler(feature_range = (-1, 1))
    

    
    data = pd.concat([data_input,data_output],axis=1)
    
    len_in = len(data_input.columns)
    len_out = len(data_output.columns)
     
    data = normalizer.fit_transform(np.nan_to_num(data.values))
    
    x = data[:,0:len_in]
    y = data[:,len_in:len_in+len_out]
    y =np.ravel(y)
    print("x-shape: ", x.shape)
    print("y-shape: ", y.shape)

    
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, 
                                                    random_state=42)
    y_test_predict = dict()
    y_mse = dict()

    the_best ={"RMSE":10, "MSE": 10, "MAE": 10, "R_squared": -1, 'model':dict() }

    for name, estimator in MODELS.items():
        print("Model name: ", name)
        try:
            if(name =='LGBMRegressor'):
                estimator.fit(X_train, y_train)  
            else:
                estimator.fit(X_train, y_train)  
        except Exception as e:
           
            print(f'error-> {e}')
            print(X_train.dtype)
            print(y_train.dtype)
            continue
        y_test_predict[name] = estimator.predict(X_test) 

        # Metrics
        y_mse[name] = mean_squared_error(y_test, estimator.predict(X_test))
        y_true = y_test 

        cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=42)
        n_scores = cross_val_score(estimator, x, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=3)
        n_scores = np.absolute(n_scores)

        MSE = mean_squared_error(y_true, y_test_predict[name])
        RMSE = np.sqrt(MSE)
        R_squared = r2_score(y_true, y_test_predict[name],multioutput="variance_weighted")
        MAE = np.mean(n_scores)
        print("RMSE: ", np.round(RMSE, 3))
        print("MSE:" , np.round(MSE,3))
        print("R-Squared: ", np.round(R_squared, 3))
        print(f"MAE: {np.round(MAE,3)} ({np.round(np.std(n_scores),3)})")
        print()

        if(RMSE < the_best.get('RMSE') and MSE<the_best.get('MSE') and ( (1-R_squared)>0 and R_squared> the_best.get('R_squared') ) ):
            print("New best->", name) 
            the_best['RMSE'] = RMSE
            the_best['MSE'] = MSE
            the_best['R_squared']= R_squared
            the_best['model']={'name':name, 'estimator': estimator}

    print('The best model',the_best.get('model').get('name'))
    
    return [the_best.get('model').get('estimator'), normalizer]

In [6]:
def save(nome, model, normalizer):
    with open(f'models/normalizer_{nome}.pkl','wb') as f:
        pickle.dump(normalizer,f)
    with open(f'models/model_{nome}.pkl','wb') as f:
        pickle.dump(model,f)
    print('salvo')

In [8]:
def to_negative(df, ouputs_negative):
    for i in ouputs_negative:
        df.loc[df.index.get_level_values('series') == i] = df.loc[df.index.get_level_values('series') == i] * -1
    return df

# Ministérios

## Agricultura e desenvolvimento rural

## Inputs

In [9]:
inputs_agricultura= ['NV.AGR.TOTL.CD']
df_inputs_agricultura = read_data(inputs_agricultura)
df_inputs_agricultura.fillna(df_inputs_agricultura.mean(), inplace=True)

## Outputs

In [10]:
outputs_agricultura = ['SL.AGR.EMPL.ZS','SP.RUR.TOTL.ZS','AG.PRD.FOOD.XD']
df_outputs_agricultura = read_data(outputs_agricultura)
df_outputs_agricultura.fillna(df_outputs_agricultura.mean(), inplace=True)
df_outputs_agricultura=df_outputs_agricultura.groupby(level=0).sum()

## Model

In [11]:
model_agricultura, normalizer_agricultura =  get_the_best_model(df_inputs_agricultura,df_outputs_agricultura)

x-shape:  (570, 1)
y-shape:  (570,)
Model name:  LassoLars
RMSE:  0.349
MSE: 0.122
R-Squared:  -0.007
MAE: 0.238 (0.028)

New best-> LassoLars
Model name:  Linear regression
RMSE:  0.318
MSE: 0.101
R-Squared:  0.165
MAE: 0.219 (0.029)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.319
MSE: 0.102
R-Squared:  0.161
MAE: 0.219 (0.029)

Model name:  Lasso
RMSE:  0.349
MSE: 0.122
R-Squared:  -0.007
MAE: 0.238 (0.028)

Model name:  ElasticNet
RMSE:  0.349
MSE: 0.122
R-Squared:  -0.007
MAE: 0.238 (0.028)

Model name:  RandomForestRegressor
RMSE:  0.266
MSE: 0.071
R-Squared:  0.415
MAE: 0.205 (0.026)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.258
MSE: 0.067
R-Squared:  0.45
MAE: 0.214 (0.029)

New best-> Decision Tree Regressor
Model name:  GradientBoostingRegressor
RMSE:  0.267
MSE: 0.071
R-Squared:  0.411
MAE: 0.21 (0.029)

Model name:  AdaBoostRegressor
RMSE:  0.287
MSE: 0.082
R-Squared:  0.32
MAE: 0.212 (0.025)

Model name:  XGBRegressor
RMSE

In [12]:
save('agricultura',model_agricultura,normalizer_agricultura)

salvo


## Educação

## Inputs

In [13]:
inputs_educacao=["SE.XPD.PRIM.ZS", "SE.XPD.SECO.ZS", "SE.XPD.TERT.ZS"]
df_inputs_educacao = read_data(inputs_educacao)
df_inputs_educacao.fillna(df_inputs_educacao.mean(), inplace=True)

## Outputs Negative

In [14]:
ouputs_negative_educacao = ['SE.SEC.CMPT.LO.ZS','SE.SEC.UNER.LO.ZS','SE.PRM.UNER.ZS']

## Outputs

In [15]:
outputs_educacao = ["SE.ADT.LITR.ZS","SE.ADT.1524.LT.ZS","SE.PRM.CMPT.ZS"]
df_outputs_educacao = read_data(outputs_educacao)
df_outputs_educacao.fillna(df_outputs_educacao.mean(), inplace=True)
df_outputs_educacao = to_negative(df_outputs_educacao, ouputs_negative_educacao)
df_outputs_educacao=df_outputs_educacao.groupby(level=0).sum()

## Model

In [16]:
model_educacao, normalizer_educacao =  get_the_best_model(df_inputs_educacao,df_outputs_educacao)

x-shape:  (570, 3)
y-shape:  (570,)
Model name:  LassoLars
RMSE:  0.205
MSE: 0.042
R-Squared:  -0.0
MAE: 0.126 (0.016)

New best-> LassoLars
Model name:  Linear regression
RMSE:  0.203
MSE: 0.041
R-Squared:  0.02
MAE: 0.124 (0.016)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.203
MSE: 0.041
R-Squared:  0.02
MAE: 0.123 (0.016)

New best-> Ridge
Model name:  Lasso
RMSE:  0.205
MSE: 0.042
R-Squared:  -0.0
MAE: 0.126 (0.016)

Model name:  ElasticNet
RMSE:  0.205
MSE: 0.042
R-Squared:  -0.0
MAE: 0.126 (0.016)

Model name:  RandomForestRegressor
RMSE:  0.203
MSE: 0.041
R-Squared:  0.027
MAE: 0.102 (0.017)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.205
MSE: 0.042
R-Squared:  0.001
MAE: 0.112 (0.017)

Model name:  GradientBoostingRegressor
RMSE:  0.204
MSE: 0.041
R-Squared:  0.016
MAE: 0.096 (0.017)

Model name:  AdaBoostRegressor
RMSE:  0.225
MSE: 0.051
R-Squared:  -0.198
MAE: 0.128 (0.019)

Model name:  XGBRegressor
RMSE:  0.201
MSE: 0.041
R-

In [17]:
save('educacao',model_educacao,normalizer_educacao)

salvo


## Mudanças climáticas e meio ambiente

## Inputs

In [7]:
inputs_ambiente = ["EN.ATM.METH.AG.ZS","EN.ATM.NOXE.AG.ZS","EN.ATM.CO2E.GF.ZS"]
df_inputs_ambiente = read_data(inputs_ambiente)
df_inputs_ambiente.fillna(df_inputs_ambiente.mean(), inplace=True)

## Ouputs

In [8]:
outputs_ambiente = ["EG.ELC.ACCS.ZS","EG.ELC.HYRO.ZS","EG.ELC.RNWX.ZS"]
df_outputs_ambiente = read_data(outputs_ambiente)
df_outputs_ambiente.fillna(df_outputs_ambiente.mean(), inplace=True)
df_outputs_ambiente=df_outputs_ambiente.groupby(level=0).sum()

## Model

In [11]:
model_ambiente,normalizer_ambiente=get_the_best_model(df_inputs_ambiente, df_outputs_ambiente)

x-shape:  (570, 3)
y-shape:  (570,)
Model name:  LassoLars
RMSE:  0.444
MSE: 0.197
R-Squared:  0.267
MAE: 0.34 (0.031)

New best-> LassoLars
Model name:  Linear regression
RMSE:  0.394
MSE: 0.155
R-Squared:  0.423
MAE: 0.338 (0.033)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.394
MSE: 0.155
R-Squared:  0.423
MAE: 0.337 (0.033)

Model name:  Lasso
RMSE:  0.519
MSE: 0.269
R-Squared:  -0.003
MAE: 0.376 (0.04)

Model name:  ElasticNet
RMSE:  0.519
MSE: 0.269
R-Squared:  -0.003
MAE: 0.376 (0.04)

Model name:  RandomForestRegressor
RMSE:  0.125
MSE: 0.016
R-Squared:  0.942
MAE: 0.096 (0.01)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.147
MSE: 0.022
R-Squared:  0.919
MAE: 0.097 (0.009)

Model name:  GradientBoostingRegressor
RMSE:  0.112
MSE: 0.012
R-Squared:  0.954
MAE: 0.083 (0.01)

New best-> GradientBoostingRegressor
Model name:  AdaBoostRegressor
RMSE:  0.152
MSE: 0.023
R-Squared:  0.914
MAE: 0.127 (0.015)

Model name:  XGBRegressor
RMSE:

In [12]:
save('ambiente',model_ambiente,normalizer_ambiente)

salvo


## Saúde

## Input

In [22]:
inputs_saude = ['SH.XPD.CHEX.GD.ZS']
df_inputs_saude = read_data(inputs_saude)
df_inputs_saude.fillna(df_inputs_saude.mean(), inplace=True)

## Outputs

In [23]:
outputs_saude = ['SH.STA.BASS.ZS','SH.MED.BEDS.ZS','SH.MED.NUMW.P3','SH.MED.PHYS.ZS']
df_outputs_saude = read_data(outputs_saude)
df_outputs_saude.fillna(df_outputs_saude.mean(), inplace=True)
df_outputs_saude = df_outputs_saude.groupby(level=0).sum()

## Model

In [24]:
model_saude,normalizer_saude=get_the_best_model(df_inputs_saude, df_outputs_saude)

x-shape:  (570, 1)
y-shape:  (570,)
Model name:  LassoLars
RMSE:  0.41
MSE: 0.168
R-Squared:  -0.006
MAE: 0.306 (0.034)

New best-> LassoLars
Model name:  Linear regression
RMSE:  0.354
MSE: 0.126
R-Squared:  0.248
MAE: 0.208 (0.035)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.354
MSE: 0.126
R-Squared:  0.248
MAE: 0.209 (0.035)

Model name:  Lasso
RMSE:  0.41
MSE: 0.168
R-Squared:  -0.006
MAE: 0.307 (0.035)

Model name:  ElasticNet
RMSE:  0.41
MSE: 0.168
R-Squared:  -0.006
MAE: 0.307 (0.035)

Model name:  RandomForestRegressor
RMSE:  0.356
MSE: 0.127
R-Squared:  0.242
MAE: 0.187 (0.033)

Model name:  Decision Tree Regressor
RMSE:  0.356
MSE: 0.127
R-Squared:  0.24
MAE: 0.187 (0.032)

Model name:  GradientBoostingRegressor
RMSE:  0.357
MSE: 0.128
R-Squared:  0.235
MAE: 0.189 (0.031)

Model name:  AdaBoostRegressor
RMSE:  0.358
MSE: 0.128
R-Squared:  0.232
MAE: 0.191 (0.036)

Model name:  XGBRegressor
RMSE:  0.363
MSE: 0.132
R-Squared:  0.211
MAE: 0.214 (0.031)

Model name:

In [25]:
save('saude',model_saude,normalizer_saude)

salvo


## Infraestreutura, Ciência e Tecnologia

## Input

In [26]:
inputs_ciencia = ['GB.XPD.RSDV.GD.ZS']
df_inputs_ciencia = read_data(inputs_ciencia)
df_inputs_ciencia.fillna(df_inputs_ciencia.mean(), inplace=True)

## Output

In [27]:
outputs_ciencia =['IT.NET.SECR.P6','EP.PMP.SGAS.CD','IP.JRN.ARTC.SC','IP.IDS.NRCT','IP.IDS.RSCT','IS.RRS.GOOD.MT.K6']
df_outputs_ciencia = read_data(outputs_ciencia)
df_outputs_ciencia.fillna(df_outputs_ciencia.mean(), inplace=True)
df_outputs_ciencia=df_outputs_ciencia.groupby(level=0).sum()

## Model

In [28]:
model_ciencia,normalizer_ciencia=get_the_best_model(df_inputs_ciencia, df_outputs_ciencia)
save('ciencia',model_ciencia,normalizer_ciencia)

x-shape:  (570, 1)
y-shape:  (570,)
Model name:  LassoLars
RMSE:  0.354
MSE: 0.125
R-Squared:  -0.02
MAE: 0.259 (0.039)

New best-> LassoLars
Model name:  Linear regression
RMSE:  0.334
MSE: 0.112
R-Squared:  0.092
MAE: 0.251 (0.038)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.334
MSE: 0.112
R-Squared:  0.092
MAE: 0.251 (0.038)

New best-> Ridge
Model name:  Lasso
RMSE:  0.354
MSE: 0.125
R-Squared:  -0.02
MAE: 0.259 (0.039)

Model name:  ElasticNet
RMSE:  0.354
MSE: 0.125
R-Squared:  -0.02
MAE: 0.259 (0.039)

Model name:  RandomForestRegressor
RMSE:  0.276
MSE: 0.076
R-Squared:  0.38
MAE: 0.206 (0.048)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.265
MSE: 0.07
R-Squared:  0.429
MAE: 0.217 (0.053)

New best-> Decision Tree Regressor
Model name:  GradientBoostingRegressor
RMSE:  0.276
MSE: 0.076
R-Squared:  0.38
MAE: 0.216 (0.053)

Model name:  AdaBoostRegressor
RMSE:  0.319
MSE: 0.102
R-Squared:  0.172
MAE: 0.255 (0.043)

Model name:  XGB

## Desenvolvimento 

## Input

In [29]:
inputs_desenvolvimento = ['SI.POV.GINI']
df_inputs_desenvolvimento = read_data(inputs_desenvolvimento)
df_inputs_desenvolvimento.fillna(df_inputs_desenvolvimento.mean(), inplace=True)

## Negative Output

In [30]:
negative_ouputs_desenvolvimento = ['SI.DST.50MD']

## Output

In [31]:
outputs_desenvolvimento = ['SL.EMP.VULN.ZS','SL.UEM.TOTL.NE.ZS','SI.DST.50MD']
df_outputs_desenvolvimento = read_data(outputs_desenvolvimento)
df_outputs_desenvolvimento.fillna(df_outputs_desenvolvimento.mean(), inplace=True)
df_outputs_desenvolvimento = to_negative(df_outputs_desenvolvimento, negative_ouputs_desenvolvimento)
df_outputs_desenvolvimento = df_outputs_desenvolvimento.groupby(level=0).sum()

In [32]:
df_outputs_desenvolvimento

Unnamed: 0_level_0,YR1990,YR1991,YR1992,YR1993,YR1994,YR1995,YR1996,YR1997,YR1998,YR1999,...,YR2010,YR2011,YR2012,YR2013,YR2014,YR2015,YR2016,YR2017,YR2018,YR2019
economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ARG,7.06,13.03,11.1,15.380001,16.04,21.74,19.140001,16.26,12.930001,14.319999,...,5.16,5.1,6.05,6.59,7.25,20.43,15.165958,10.36,10.54,11.25
AUS,6.93,5.042564,6.25175,6.00122,5.530233,10.55,3.072927,3.60675,2.492273,1.647143,...,6.44,1.488696,1.6162,1.736458,6.93,2.35,1.834042,1.976087,1.858222,1.26675
BRA,-15.148,33.800001,15.1,14.609999,33.320001,15.33,13.13,15.289999,16.98,18.25,...,28.209999,11.94,10.289999,10.399999,10.349999,12.950001,15.98,17.300001,16.63,16.36
CAN,8.13,7.39,2.61175,2.641219,8.1,1.381951,1.742927,7.25,6.5,1.257143,...,5.65,3.248695,3.73,3.07,3.89,3.84,5.02,3.88,2.458222,1.99675
CHN,2.5,53.142563,51.741748,50.161219,50.310233,48.791951,47.652927,47.616749,47.002271,46.527142,...,38.233922,37.198697,36.896201,36.126458,35.5251,45.409999,44.959999,34.076089,33.328221,33.18675
DEU,4.89,3.29,-5.71825,-4.948781,5.4,4.76,-3.597073,-1.73325,7.98,-2.972857,...,4.68,4.01,2.5,2.01,0.8,-0.25,-1.59,-4.763913,-5.051778,-6.01325
FRA,9.36,2.602564,3.05175,2.641219,11.74,3.211951,2.922927,3.43675,2.312273,2.057143,...,5.6,6.1,6.4,7.1,7.55,6.83,7.18,6.73,6.45,1.78675
GBR,6.97,4.74,2.14175,2.001219,6.54,3.8,0.092927,-0.61325,-1.967728,0.96,...,7.53,7.6,8.08,6.55,5.71,6.55,5.12,3.31,2.978222,2.59675
IDN,2.54,54.522566,53.731751,52.36122,54.150234,65.579998,51.232928,50.09675,53.332273,52.887144,...,48.033923,44.438697,41.396198,40.056458,39.285099,37.439998,37.164042,37.296088,38.64822,37.346749
IND,7.062,83.059999,83.039999,83.050001,68.210232,83.190001,83.310001,83.559998,83.690002,83.65,...,69.673924,80.670001,67.436202,78.520001,77.829998,77.080001,76.26,75.419998,65.788222,64.556751


## Model

In [33]:
model_desenvolvimento,normalizer_desenvolvimento = get_the_best_model(df_inputs_desenvolvimento, df_outputs_desenvolvimento)
save('desenvolvimento',model_desenvolvimento,normalizer_desenvolvimento)

x-shape:  (570, 1)
y-shape:  (570,)
Model name:  LassoLars
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.014
MAE: 0.308 (0.044)

New best-> LassoLars
Model name:  Linear regression
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.012
MAE: 0.31 (0.044)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.012
MAE: 0.31 (0.044)

Model name:  Lasso
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.014
MAE: 0.308 (0.044)

Model name:  ElasticNet
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.014
MAE: 0.308 (0.044)

Model name:  RandomForestRegressor
RMSE:  0.434
MSE: 0.189
R-Squared:  -0.062
MAE: 0.306 (0.048)

Model name:  Decision Tree Regressor
RMSE:  0.451
MSE: 0.203
R-Squared:  -0.143
MAE: 0.316 (0.05)

Model name:  GradientBoostingRegressor
RMSE:  0.44
MSE: 0.193
R-Squared:  -0.089
MAE: 0.311 (0.049)

Model name:  AdaBoostRegressor
RMSE:  0.451
MSE: 0.204
R-Squared:  -0.146
MAE: 0.341 (0.051)

Model name:  XGBRegressor
RMSE:  0.436
MSE: 0.19
R-Squared:  -0.071
MAE: 0.321 (0.047)

Model name

## Banco Central

## Input

In [34]:
inputs_banco = ['FR.INR.DPST']

In [35]:
df_inputs_banco= read_data(inputs_banco)
df_inputs_banco.fillna(df_inputs_banco.mean(), inplace=True)

## Outputs

In [36]:
outputs_banco = ['FM.LBL.BMNY.GD.ZS','FM.LBL.BMNY.ZG','FP.CPI.TOTL.ZG','NY.GDP.DEFL.KD.ZG.AD','PA.NUS.FCRF']
df_outputs_banco = read_data(outputs_banco)
df_outputs_banco.fillna(df_outputs_banco.mean(), inplace=True)
df_outputs_banco=df_outputs_banco.groupby(level=0).sum()

## Model

In [37]:
model_banco,normalizer_banco=get_the_best_model(df_inputs_banco, df_outputs_banco)
save('banco',model_banco,normalizer_banco)

x-shape:  (570, 1)
y-shape:  (570,)
Model name:  LassoLars
RMSE:  0.259
MSE: 0.067
R-Squared:  -0.003
MAE: 0.135 (0.036)

New best-> LassoLars
Model name:  Linear regression
RMSE:  0.251
MSE: 0.063
R-Squared:  0.059
MAE: 0.13 (0.036)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.247
MSE: 0.061
R-Squared:  0.086
MAE: 0.13 (0.036)

New best-> Ridge
Model name:  Lasso
RMSE:  0.259
MSE: 0.067
R-Squared:  -0.003
MAE: 0.135 (0.036)

Model name:  ElasticNet
RMSE:  0.259
MSE: 0.067
R-Squared:  -0.003
MAE: 0.135 (0.036)

Model name:  RandomForestRegressor
RMSE:  0.23
MSE: 0.053
R-Squared:  0.205
MAE: 0.125 (0.037)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.265
MSE: 0.07
R-Squared:  -0.055
MAE: 0.13 (0.035)

Model name:  GradientBoostingRegressor
RMSE:  0.245
MSE: 0.06
R-Squared:  0.104
MAE: 0.126 (0.04)

Model name:  AdaBoostRegressor
RMSE:  0.289
MSE: 0.084
R-Squared:  -0.254
MAE: 0.188 (0.044)

Model name:  XGBRegressor
RMSE:  0.244
MSE: 0.06
R

# Economia

## Input

In [38]:
inputs_economia =['NV.IND.TOTL.CD','NV.IND.MANF.CD','NV.SRV.TOTL.CD']
df_inputs_economia = read_data(inputs_economia)
df_inputs_economia.fillna(df_inputs_economia.mean(), inplace=True)

## Output

In [39]:
outputs_economia = ['NY.GDS.TOTL.CD','NE.RSB.GNFS.CD','NE.CON.TOTL.CD']
df_outputs_economia = read_data(outputs_economia)

df_outputs_economia.fillna(df_outputs_economia.mean(), inplace=True)
df_outputs_economia=df_outputs_economia.groupby(level=0).sum()


## Model

In [40]:
model_economia, normalizer_economia = get_the_best_model(df_inputs_economia, df_outputs_economia)
save('economia', model_economia, normalizer_economia)

x-shape:  (570, 3)
y-shape:  (570,)
Model name:  LassoLars
RMSE:  0.238
MSE: 0.057
R-Squared:  -0.015
MAE: 0.181 (0.03)

New best-> LassoLars
Model name:  Linear regression
RMSE:  0.09
MSE: 0.008
R-Squared:  0.855
MAE: 0.025 (0.008)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.09
MSE: 0.008
R-Squared:  0.855
MAE: 0.025 (0.008)

New best-> Ridge
Model name:  Lasso
RMSE:  0.238
MSE: 0.057
R-Squared:  -0.015
MAE: 0.181 (0.03)

Model name:  ElasticNet
RMSE:  0.238
MSE: 0.057
R-Squared:  -0.015
MAE: 0.181 (0.03)

Model name:  RandomForestRegressor
RMSE:  0.079
MSE: 0.006
R-Squared:  0.889
MAE: 0.029 (0.007)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.085
MSE: 0.007
R-Squared:  0.87
MAE: 0.037 (0.008)

Model name:  GradientBoostingRegressor
RMSE:  0.091
MSE: 0.008
R-Squared:  0.852
MAE: 0.029 (0.01)

Model name:  AdaBoostRegressor
RMSE:  0.1
MSE: 0.01
R-Squared:  0.823
MAE: 0.061 (0.011)

Model name:  XGBRegressor
RMSE:  0.102
MSE: 0.01
R-Squa