# Imports

In [2]:
import pickle
import wbgapi as wb
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

from sklearn.preprocessing import MinMaxScaler

from sklearn.multioutput import MultiOutputRegressor

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoLars

from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import train_test_split




# Models

In [3]:
MODELS = {
    "LassoLars": LassoLars(alpha=.1, normalize=False),
    "Linear regression": LinearRegression(),
    "Ridge": RidgeCV(),
    "Lasso": Lasso(),
    "ElasticNet":ElasticNet(random_state=42),
    "RandomForestRegressor": MultiOutputRegressor(RandomForestRegressor(max_depth=5, random_state=42)),
    "Decision Tree Regressor": DecisionTreeRegressor(max_depth=5),
    "MultiO/P GBR" :MultiOutputRegressor(GradientBoostingRegressor(n_estimators=5)),
    "MultiO/P AdaB" :MultiOutputRegressor( AdaBoostRegressor(n_estimators=5)),
    "XGBRegressor": XGBRegressor(max_depth=3, n_estimators=100, n_jobs=3,
                           objective='reg:squarederror',
                           random_state=42, learning_rate=0.03),
    "K-nn": KNeighborsRegressor(),
    "LinearRegression":LinearRegression(),
    "CatBoostRegressor":CatBoostRegressor(loss_function='RMSE',silent=True, depth=5),
    "LGBMRegressor":LGBMRegressor()
    
}

In [4]:
negative_outputs = []

# Functions

In [5]:
def read_data(codes):
    wb.db = 2
    worldbank_data = wb.data.DataFrame(codes, 
                                       ['DEU', 'AUS', 'CAN', 'USA', 'FRA', 'ITA', 'JPN', 'GBR', 'ZAF', 'SAU', 'ARG', 'BRA', 'CHN', 'KOR', 'IND', 'IDN', 'MEX', 'RUS', 'TUR'],
                                      range(1990, 2020, 1))
    return worldbank_data

In [6]:
def read_data1(codes):
    wb.db = 1
    worldbank_data = wb.data.DataFrame(codes, 
                                       ['DEU', 'AUS', 'CAN', 'USA', 'FRA', 'ITA', 'JPN', 'GBR', 'ZAF', 'SAU', 'ARG', 'BRA', 'CHN', 'KOR', 'IND', 'IDN', 'MEX', 'RUS', 'TUR'],
                                      range(1990, 2020, 1))
    return worldbank_data

In [7]:
def get_data(data):
    dict_data={}
    
    for index, row in data.items():
        if len(row)>19:
            for index2, row2 in row.items():          
                if not dict_data.get(index2[1]):
                    dict_data[index2[1]]=[]
                if (index2[1] in negative_outputs):   
                    row2= row2*-1
                dict_data.get(index2[1]).append(row2)
        else:
            for index2, row2 in row.items():
                if not dict_data.get("coluna"):
                    dict_data["coluna"]=[]
                dict_data.get("coluna").append(row2)
    return pd.DataFrame(dict_data)

In [8]:
def get_the_best_model(input,output):
    data_input = get_data(input)
    data_output = get_data(output)
    
    normalizer = MinMaxScaler(feature_range = (-1, 1))
    

    
    data = pd.concat([data_input,data_output],axis=1)
    
    len_in = len(data_input.columns)
    len_out = len(data_output.columns)
     
    data = normalizer.fit_transform(np.nan_to_num(data.values))
    
    x = data[:,0:len_in]
    y = data[:,len_in:len_in+len_out]
    print("x-shape: ", x.shape)
    print("y-shape: ", y.shape)

    
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, 
                                                    random_state=42)
    y_test_predict = dict()
    y_mse = dict()

    the_best ={"RMSE":10, "MSE": 10, "MAE": 10, "R_squared": 0, 'model':dict() }

    for name, estimator in MODELS.items():
        print("Model name: ", name)
        try:
            if(name =='LGBMRegressor'):
                estimator.fit(X_train, np.ravel(y_train))  
            else:
                estimator.fit(X_train, y_train)  
        except Exception as e:
           
            print(f'error-> {e}')
            print(X_train.dtype)
            print(y_train.dtype)
            continue
        y_test_predict[name] = estimator.predict(X_test) 

        # Metrics
        y_mse[name] = mean_squared_error(y_test, estimator.predict(X_test))
        y_true = y_test 

        cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=42)
        n_scores = cross_val_score(estimator, x, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=3)
        n_scores = np.absolute(n_scores)

        MSE = mean_squared_error(y_true, y_test_predict[name])
        RMSE = np.sqrt(MSE)
        R_squared = r2_score(y_true, y_test_predict[name],multioutput="variance_weighted")
        MAE = np.mean(n_scores)
        print("RMSE: ", np.round(RMSE, 3))
        print("MSE:" , np.round(MSE,3))
        print("R-Squared: ", np.round(R_squared, 3))
        print(f"MAE: {np.round(MAE,3)} ({np.round(np.std(n_scores),3)})")
        print()

        if(RMSE < the_best.get('RMSE') and MSE<the_best.get('MSE') and ( (1-R_squared)>0 and R_squared> the_best.get('R_squared') ) ):
            print("New best->", name) 
            the_best['RMSE'] = RMSE
            the_best['MSE'] = MSE
            the_best['R_squared']= R_squared
            the_best['model']={'name':name, 'estimator': estimator}

    print('The best model',the_best.get('model').get('name'))
    
    return [the_best.get('model').get('estimator'), normalizer]

In [9]:
def save(nome, model, normalizer):
    with open(f'models/normalizer_{nome}.pkl','wb') as f:
        pickle.dump(normalizer,f)
    with open(f'models/model_{nome}.pkl','wb') as f:
        pickle.dump(model,f)
    print('salvo')

In [10]:
def to_negative(df, ouputs_negative):
    for i in ouputs_negative:
        df.loc[df.index.get_level_values('series') == i] = df.loc[df.index.get_level_values('series') == i] * -1
    return df

# Ministérios

## Agricultura e desenvolvimento rural

## Inputs

In [11]:
inputs_agricultura= ['NV.AGR.TOTL.CD']
df_inputs_agricultura = read_data(inputs_agricultura)
df_inputs_agricultura.fillna(df_inputs_agricultura.mean(), inplace=True)

## Outputs

In [12]:
outputs_agricultura = ['SL.AGR.EMPL.ZS','SP.RUR.TOTL.ZS','AG.PRD.FOOD.XD']
df_outputs_agricultura = read_data(outputs_agricultura)
df_outputs_agricultura.fillna(df_outputs_agricultura.mean(), inplace=True)
df_outputs_agricultura=df_outputs_agricultura.groupby(level=0).sum()

## Model

In [13]:
model_agricultura, normalizer_agricultura =  get_the_best_model(df_inputs_agricultura,df_outputs_agricultura)

x-shape:  (570, 1)
y-shape:  (570, 1)
Model name:  LassoLars
RMSE:  0.349
MSE: 0.122
R-Squared:  -0.007
MAE: 0.238 (0.028)

Model name:  Linear regression
RMSE:  0.318
MSE: 0.101
R-Squared:  0.165
MAE: 0.219 (0.029)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.319
MSE: 0.102
R-Squared:  0.161
MAE: 0.219 (0.029)

Model name:  Lasso
RMSE:  0.349
MSE: 0.122
R-Squared:  -0.007
MAE: 0.238 (0.028)

Model name:  ElasticNet
RMSE:  0.349
MSE: 0.122
R-Squared:  -0.007
MAE: 0.238 (0.028)

Model name:  RandomForestRegressor
RMSE:  0.266
MSE: 0.071
R-Squared:  0.415
MAE: 0.205 (0.026)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.258
MSE: 0.067
R-Squared:  0.45
MAE: 0.214 (0.029)

New best-> Decision Tree Regressor
Model name:  MultiO/P GBR
RMSE:  0.311
MSE: 0.097
R-Squared:  0.201
MAE: 0.218 (0.026)

Model name:  MultiO/P AdaB
RMSE:  0.281
MSE: 0.079
R-Squared:  0.351
MAE: 0.21 (0.027)

Model name:  XGBRegressor
RMSE:  0.276
MSE: 0.076
R-Squared:  0.3

In [14]:
save('agricultura',model_agricultura,normalizer_agricultura)

salvo


## Educação

## Inputs

In [15]:
inputs_educacao=["SE.XPD.PRIM.ZS", "SE.XPD.SECO.ZS", "SE.XPD.TERT.ZS"]
df_inputs_educacao = read_data(inputs_educacao)
df_inputs_educacao.fillna(df_inputs_educacao.mean(), inplace=True)

## Outputs Negative

In [16]:
ouputs_negative_educacao = ['SE.SEC.CMPT.LO.ZS','SE.SEC.UNER.LO.ZS','SE.PRM.UNER.ZS']

## Outputs

In [17]:
outputs_educacao = ["SE.ADT.LITR.ZS","SE.ADT.1524.LT.ZS","SE.PRM.CMPT.ZS", 'SE.SEC.CMPT.LO.ZS','SE.SEC.UNER.LO.ZS','SE.PRM.UNER.ZS']
df_outputs_educacao = read_data(outputs_educacao)
df_outputs_educacao.fillna(df_outputs_educacao.mean(), inplace=True)
df_outputs_educacao = to_negative(df_outputs_educacao, ouputs_negative_educacao)
df_outputs_educacao=df_outputs_educacao.groupby(level=0).sum()

## Model

In [18]:
model_educacao, normalizer_educacao =  get_the_best_model(df_inputs_educacao,df_outputs_educacao)

x-shape:  (570, 3)
y-shape:  (570, 1)
Model name:  LassoLars
RMSE:  0.429
MSE: 0.184
R-Squared:  -0.001
MAE: 0.363 (0.032)

Model name:  Linear regression
RMSE:  0.425
MSE: 0.181
R-Squared:  0.015
MAE: 0.35 (0.03)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.425
MSE: 0.181
R-Squared:  0.017
MAE: 0.35 (0.03)

New best-> Ridge
Model name:  Lasso
RMSE:  0.429
MSE: 0.184
R-Squared:  -0.001
MAE: 0.363 (0.032)

Model name:  ElasticNet
RMSE:  0.429
MSE: 0.184
R-Squared:  -0.001
MAE: 0.363 (0.032)

Model name:  RandomForestRegressor
RMSE:  0.384
MSE: 0.147
R-Squared:  0.198
MAE: 0.323 (0.028)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.411
MSE: 0.169
R-Squared:  0.081
MAE: 0.334 (0.038)

Model name:  MultiO/P GBR
RMSE:  0.415
MSE: 0.173
R-Squared:  0.061
MAE: 0.348 (0.031)

Model name:  MultiO/P AdaB
RMSE:  0.418
MSE: 0.175
R-Squared:  0.049
MAE: 0.344 (0.026)

Model name:  XGBRegressor
RMSE:  0.388
MSE: 0.15
R-Squared:  0.182
MAE: 0.331 (0.026)

In [19]:
save('educacao',model_educacao,normalizer_educacao)

salvo


## Mudanças climáticas e meio ambiente

## Inputs

In [20]:
inputs_ambiente = ['EN.ATM.METH.AG.ZS','EN.ATM.METH.AG.KT.CE','EN.ATM.NOXE.AG.ZS','EN.ATM.CO2E.KD.GD']
df_inputs_ambiente = read_data(inputs_ambiente)
df_inputs_ambiente.fillna(df_inputs_ambiente.mean(), inplace=True)

## Ouputs

In [21]:
outputs_ambiente = ["EG.ELC.ACCS.ZS","EG.ELC.HYRO.ZS","EG.ELC.RNWX.ZS"]
df_outputs_ambiente = read_data(outputs_ambiente)
df_outputs_ambiente.fillna(df_outputs_ambiente.mean(), inplace=True)
df_outputs_ambiente=df_outputs_ambiente.groupby(level=0).sum()

## Model

In [22]:
df_outputs_ambiente=df_outputs_ambiente*100

In [23]:
model_ambiente,normalizer_ambiente=get_the_best_model(df_inputs_ambiente, df_outputs_ambiente)

x-shape:  (570, 4)
y-shape:  (570, 1)
Model name:  LassoLars
RMSE:  0.491
MSE: 0.241
R-Squared:  0.101
MAE: 0.36 (0.035)

New best-> LassoLars
Model name:  Linear regression
RMSE:  0.465
MSE: 0.216
R-Squared:  0.194
MAE: 0.355 (0.028)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.473
MSE: 0.224
R-Squared:  0.167
MAE: 0.342 (0.03)

Model name:  Lasso
RMSE:  0.519
MSE: 0.269
R-Squared:  -0.003
MAE: 0.376 (0.04)

Model name:  ElasticNet
RMSE:  0.519
MSE: 0.269
R-Squared:  -0.003
MAE: 0.376 (0.04)

Model name:  RandomForestRegressor
RMSE:  0.117
MSE: 0.014
R-Squared:  0.949
MAE: 0.081 (0.011)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.127
MSE: 0.016
R-Squared:  0.94
MAE: 0.084 (0.012)

Model name:  MultiO/P GBR
RMSE:  0.36
MSE: 0.129
R-Squared:  0.519
MAE: 0.274 (0.031)

Model name:  MultiO/P AdaB
RMSE:  0.183
MSE: 0.034
R-Squared:  0.875
MAE: 0.161 (0.021)

Model name:  XGBRegressor
RMSE:  0.163
MSE: 0.027
R-Squared:  0.9
MAE: 0.135 (0.014)

In [24]:
save('ambiente',model_ambiente,normalizer_ambiente)

salvo


## Saúde

## Input

In [25]:
inputs_saude = ['SH.XPD.CHEX.GD.ZS']
df_inputs_saude = read_data(inputs_saude)
df_inputs_saude.fillna(df_inputs_saude.mean(), inplace=True)

## Outputs

In [26]:
outputs_saude = ['SH.STA.BASS.ZS','SH.MED.BEDS.ZS','SH.MED.NUMW.P3','SH.MED.PHYS.ZS']
df_outputs_saude = read_data(outputs_saude)
df_outputs_saude.fillna(df_outputs_saude.mean(), inplace=True)
df_outputs_saude = df_outputs_saude.groupby(level=0).sum()

## Model

In [27]:
model_saude,normalizer_saude=get_the_best_model(df_inputs_saude, df_outputs_saude)

x-shape:  (570, 1)
y-shape:  (570, 1)
Model name:  LassoLars
RMSE:  0.41
MSE: 0.168
R-Squared:  -0.006
MAE: 0.306 (0.034)

Model name:  Linear regression
RMSE:  0.354
MSE: 0.126
R-Squared:  0.248
MAE: 0.208 (0.035)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.354
MSE: 0.126
R-Squared:  0.248
MAE: 0.209 (0.035)

Model name:  Lasso
RMSE:  0.41
MSE: 0.168
R-Squared:  -0.006
MAE: 0.307 (0.035)

Model name:  ElasticNet
RMSE:  0.41
MSE: 0.168
R-Squared:  -0.006
MAE: 0.307 (0.035)

Model name:  RandomForestRegressor
RMSE:  0.356
MSE: 0.127
R-Squared:  0.242
MAE: 0.187 (0.033)

Model name:  Decision Tree Regressor
RMSE:  0.356
MSE: 0.127
R-Squared:  0.24
MAE: 0.187 (0.032)

Model name:  MultiO/P GBR
RMSE:  0.375
MSE: 0.141
R-Squared:  0.155
MAE: 0.245 (0.035)

Model name:  MultiO/P AdaB
RMSE:  0.363
MSE: 0.132
R-Squared:  0.211
MAE: 0.187 (0.036)

Model name:  XGBRegressor
RMSE:  0.363
MSE: 0.132
R-Squared:  0.211
MAE: 0.214 (0.031)

Model name:  K-nn
RMSE:  0.367
MSE: 0.135
R-Squ

In [28]:
save('saude',model_saude,normalizer_saude)

salvo


## Infraestreutura, Ciência e Tecnologia

## Input

In [29]:
inputs_ciencia = ['GB.XPD.RSDV.GD.ZS']
df_inputs_ciencia = read_data(inputs_ciencia)
df_inputs_ciencia.fillna(df_inputs_ciencia.mean(), inplace=True)

## Output

In [30]:
outputs_ciencia =['IT.NET.SECR.P6','EP.PMP.SGAS.CD','IP.JRN.ARTC.SC','IP.IDS.NRCT','IP.IDS.RSCT','IS.RRS.GOOD.MT.K6']
df_outputs_ciencia = read_data(outputs_ciencia)
df_outputs_ciencia.fillna(df_outputs_ciencia.mean(), inplace=True)
df_outputs_ciencia=df_outputs_ciencia.groupby(level=0).sum()

## Model

In [31]:
model_ciencia,normalizer_ciencia=get_the_best_model(df_inputs_ciencia, df_outputs_ciencia)
save('ciencia',model_ciencia,normalizer_ciencia)

x-shape:  (570, 1)
y-shape:  (570, 1)
Model name:  LassoLars
RMSE:  0.354
MSE: 0.125
R-Squared:  -0.02
MAE: 0.259 (0.039)

Model name:  Linear regression
RMSE:  0.334
MSE: 0.112
R-Squared:  0.092
MAE: 0.251 (0.038)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.334
MSE: 0.112
R-Squared:  0.092
MAE: 0.251 (0.038)

New best-> Ridge
Model name:  Lasso
RMSE:  0.354
MSE: 0.125
R-Squared:  -0.02
MAE: 0.259 (0.039)

Model name:  ElasticNet
RMSE:  0.354
MSE: 0.125
R-Squared:  -0.02
MAE: 0.259 (0.039)

Model name:  RandomForestRegressor
RMSE:  0.276
MSE: 0.076
R-Squared:  0.38
MAE: 0.206 (0.048)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.265
MSE: 0.07
R-Squared:  0.429
MAE: 0.217 (0.053)

New best-> Decision Tree Regressor
Model name:  MultiO/P GBR
RMSE:  0.308
MSE: 0.095
R-Squared:  0.226
MAE: 0.233 (0.04)

Model name:  MultiO/P AdaB
RMSE:  0.323
MSE: 0.105
R-Squared:  0.148
MAE: 0.243 (0.046)

Model name:  XGBRegressor
RMSE:  0.286
MSE: 0.082
R-

## Desenvolvimento 

## Input

In [32]:
inputs_desenvolvimento = ['SI.POV.GINI']
df_inputs_desenvolvimento = read_data(inputs_desenvolvimento)
df_inputs_desenvolvimento.fillna(df_inputs_desenvolvimento.mean(), inplace=True)

## Negative Output

In [33]:
negative_ouputs_desenvolvimento = ['SI.DST.50MD']

## Output

In [34]:
outputs_desenvolvimento = ['SL.EMP.VULN.ZS','SL.UEM.TOTL.NE.ZS','SI.DST.50MD']
df_outputs_desenvolvimento = read_data(outputs_desenvolvimento)
df_outputs_desenvolvimento.fillna(df_outputs_desenvolvimento.mean(), inplace=True)
df_outputs_desenvolvimento = to_negative(df_outputs_desenvolvimento, negative_ouputs_desenvolvimento)
df_outputs_desenvolvimento = df_outputs_desenvolvimento.groupby(level=0).sum()

## Model

In [35]:
model_desenvolvimento,normalizer_desenvolvimento = get_the_best_model(df_inputs_desenvolvimento, df_outputs_desenvolvimento)
save('desenvolvimento',model_desenvolvimento,normalizer_desenvolvimento)

x-shape:  (570, 1)
y-shape:  (570, 1)
Model name:  LassoLars
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.014
MAE: 0.308 (0.044)

Model name:  Linear regression
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.012
MAE: 0.31 (0.044)

Model name:  Ridge
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.012
MAE: 0.31 (0.044)

Model name:  Lasso
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.014
MAE: 0.308 (0.044)

Model name:  ElasticNet
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.014
MAE: 0.308 (0.044)

Model name:  RandomForestRegressor
RMSE:  0.434
MSE: 0.189
R-Squared:  -0.062
MAE: 0.306 (0.048)

Model name:  Decision Tree Regressor
RMSE:  0.451
MSE: 0.203
R-Squared:  -0.143
MAE: 0.316 (0.05)

Model name:  MultiO/P GBR
RMSE:  0.424
MSE: 0.18
R-Squared:  -0.012
MAE: 0.306 (0.045)

Model name:  MultiO/P AdaB
RMSE:  0.443
MSE: 0.196
R-Squared:  -0.102
MAE: 0.335 (0.047)

Model name:  XGBRegressor
RMSE:  0.436
MSE: 0.19
R-Squared:  -0.071
MAE: 0.321 (0.047)

Model name:  K-nn
RMSE:  0.481
MSE: 0.232
R-Squared:  -0.303
MAE: 0.327 (0.

## Banco Central

## Input

In [36]:
inputs_banco = ['FR.INR.DPST']

In [37]:
df_inputs_banco= read_data(inputs_banco)
df_inputs_banco.fillna(df_inputs_banco.mean(), inplace=True)

## Outputs

In [38]:
outputs_banco = ['FM.LBL.BMNY.GD.ZS','FM.LBL.BMNY.ZG','FP.CPI.TOTL.ZG','NY.GDP.DEFL.KD.ZG.AD','PA.NUS.FCRF']
df_outputs_banco = read_data(outputs_banco)
df_outputs_banco.fillna(df_outputs_banco.mean(), inplace=True)
df_outputs_banco=df_outputs_banco.groupby(level=0).sum()

## Model

In [39]:
model_banco,normalizer_banco=get_the_best_model(df_inputs_banco, df_outputs_banco)
save('banco',model_banco,normalizer_banco)

x-shape:  (570, 1)
y-shape:  (570, 1)
Model name:  LassoLars
RMSE:  0.259
MSE: 0.067
R-Squared:  -0.003
MAE: 0.135 (0.036)

Model name:  Linear regression
RMSE:  0.251
MSE: 0.063
R-Squared:  0.059
MAE: 0.13 (0.036)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.247
MSE: 0.061
R-Squared:  0.086
MAE: 0.13 (0.036)

New best-> Ridge
Model name:  Lasso
RMSE:  0.259
MSE: 0.067
R-Squared:  -0.003
MAE: 0.135 (0.036)

Model name:  ElasticNet
RMSE:  0.259
MSE: 0.067
R-Squared:  -0.003
MAE: 0.135 (0.036)

Model name:  RandomForestRegressor
RMSE:  0.23
MSE: 0.053
R-Squared:  0.205
MAE: 0.125 (0.037)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.265
MSE: 0.07
R-Squared:  -0.055
MAE: 0.13 (0.035)

Model name:  MultiO/P GBR
RMSE:  0.245
MSE: 0.06
R-Squared:  0.102
MAE: 0.131 (0.036)

Model name:  MultiO/P AdaB
RMSE:  0.302
MSE: 0.091
R-Squared:  -0.369
MAE: 0.175 (0.057)

Model name:  XGBRegressor
RMSE:  0.244
MSE: 0.06
R-Squared:  0.106
MAE: 0.177 (0.033)

# Economia

## Input

In [40]:
inputs_economia =['NV.IND.TOTL.CD','NV.IND.MANF.CD','NV.SRV.TOTL.CD']
df_inputs_economia = read_data(inputs_economia)
df_inputs_economia.fillna(df_inputs_economia.mean(), inplace=True)

## Output

In [41]:
outputs_economia = ['NY.GDS.TOTL.CD','NE.RSB.GNFS.CD','NE.CON.TOTL.CD']
df_outputs_economia = read_data(outputs_economia)


df_outputs_economia.fillna(df_outputs_economia.mean(), inplace=True)
df_outputs_economia=df_outputs_economia.groupby(level=0).sum()


## Model

In [42]:
model_economia, normalizer_economia = get_the_best_model(df_inputs_economia, df_outputs_economia)
save('economia', model_economia, normalizer_economia)

x-shape:  (570, 3)
y-shape:  (570, 1)
Model name:  LassoLars
RMSE:  0.238
MSE: 0.057
R-Squared:  -0.015
MAE: 0.181 (0.03)

Model name:  Linear regression
RMSE:  0.09
MSE: 0.008
R-Squared:  0.855
MAE: 0.025 (0.008)

New best-> Linear regression
Model name:  Ridge
RMSE:  0.09
MSE: 0.008
R-Squared:  0.855
MAE: 0.025 (0.008)

New best-> Ridge
Model name:  Lasso
RMSE:  0.238
MSE: 0.057
R-Squared:  -0.015
MAE: 0.181 (0.03)

Model name:  ElasticNet
RMSE:  0.238
MSE: 0.057
R-Squared:  -0.015
MAE: 0.181 (0.03)

Model name:  RandomForestRegressor
RMSE:  0.079
MSE: 0.006
R-Squared:  0.889
MAE: 0.029 (0.007)

New best-> RandomForestRegressor
Model name:  Decision Tree Regressor
RMSE:  0.085
MSE: 0.007
R-Squared:  0.87
MAE: 0.037 (0.008)

Model name:  MultiO/P GBR
RMSE:  0.156
MSE: 0.024
R-Squared:  0.566
MAE: 0.116 (0.019)

Model name:  MultiO/P AdaB
RMSE:  0.09
MSE: 0.008
R-Squared:  0.854
MAE: 0.057 (0.01)

Model name:  XGBRegressor
RMSE:  0.102
MSE: 0.01
R-Squared:  0.816
MAE: 0.081 (0.007)

Mo