<a href="https://colab.research.google.com/github/Idalen/enem-score-predictor/blob/main/notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Trabalho De ML

In [2]:
import numpy as np
import pandas as pd

import json

import plotly.express as px
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error as RMSE

from pathlib import Path
from google.colab import drive

# Redução do uso da memória

Devido ao consumo de memória do nosso dataset, decidimos aplicar algumas estratégias para a redução do uso pelo Pandas.
Primeiro, mudamos o tipo de dado utilizado pelas colunas para formatos que ocupam menos bytes e transformamos o arquivo para o formato *.parquet, que tem melhor suporte à compressão de dados. 

In [None]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

# Leitura dos arquivos

In [3]:
path = Path("/content/drive/MyDrive/datasets/dados-enem/")
drive.mount('/content//drive')

Mounted at /content//drive


## Anotações:
* Testar: se vale a pena eliminar quem está ausente plotando o gráfico pra ver a nota desse grupo de pessoas
* Como fazer a conexão do jupyther com o SSH
* https://python.plainenglish.io/how-to-create-a-interative-map-using-plotly-express-geojson-to-brazil-in-python-fb5527ae38fc

## 1) Tratar dados
* EDA Inicial
* Tratar nulos (lembre-se de discutir e avaliar as melhores estratégias)
* Mapear os valores e OneHotEncoding 

## 2) Preprocessamento
* Remover colunas (correlacionadas [>80%], baixa variância, semântica)
* (Opcional) Aplicar PCA 
* (Opcional) Feature Engineering
* Standardize/Normalize
* Tratar dados desbalanceados

## 3) Modelo
* Regressão linear<br>
a. Realizar análise dos pesos<br>
b. Aplicar técnicas de regularização<br> 

* Árvore de Decisão <br>
a. Profundidade <br>
b. Avaliar os cortes (impureza de gini / entropia) <br>

* Naive Bayes <br>
a. Quais features afetam significativamente P(nota|feature)<br>
b. GaussianNaiveBayes x BernoulliNaiveBayes<br>

* SVM<br>
a. Avaliar o hiperplano gerado/ onde o corte é realizado <br>
b. avaliar diferentes kernels <br>

In [5]:
class Model:


  _algorithms = {
      
      'ElasticNet': {
          'estimator':ElasticNet(),
          'parameters':{
              'alpha':[0.001, 0.5, 1.0],
              'l1_ratio': [0, 0.5, 1.0]
          }},

      'DecisionTree': {
          'estimator':DecisionTreeRegressor(),
          'parameters':{
              'max_depth':[100, 90, 80, 70],
              'min_samples_leaf':[1, 10, 20, 50, 100]
          }},

      # 'RandomForest': {
      #     'estimator':RandomForestRegressor(),
      #     'parameters':{
      #         'n_estimators':[11, 31, 51],
      #         'max_depth':[100, 90, 80,],
      #         'min_samples_leaf':[1, 20, 100],
      #     }},

      # 'KNN': {
      #     'estimator':KNeighborsRegressor(),
      #     'parameters':{
      #         'n_neighbors':[5, 23, 47, 83],
      #         'weights':['uniform', 'distance'],
      #         'p':[1, 1.5, 2]
      #     }},

      # 'SVM': {
      #     'estimator':SVR(),
      #     'parameters':{
      #         'kernel':['rbf', 'poly'],
      #         'gamma':[0.01, 0.5, 1.0],
      #         'C':[10, 100, 1000]
      #     }}

  }

  def __init__(self, verbose=True):
    pass

  def load(self, path, verbose=True):

    self.train_df = pd.read_parquet(path/'train.parquet').sample(40000)
    self.test_df = pd.read_parquet(path/'test.parquet').sample(10000)

    if verbose:
      print("Quantidade inicial de elementos no treino:", len(self.train_df))
      print("Quantidade inicial de elementos no teste:", len(self.test_df))
        
    self.train_df.set_index("NU_INSCRICAO", inplace=True)
    self.test_df.set_index("NU_INSCRICAO", inplace=True)

    self._targets = [col for col in self.train_df.columns if "NU_NOTA" in col]



  def prepare(self,verbose=True):

    if verbose:
      print("Mapeando valores...")    
    self._map_values(verbose)

    if verbose:
      print("Criando novas colunas...")
    self._create_features(verbose)

    if verbose:
      print("Eliminando colunas...")
    self._clear_cols(verbose)

    if verbose:
      print("Aplicando get dummies...")
    self._create_dummies(verbose)

    if verbose:
      print("Selecionando features mais importantes")
    self._feature_selection(verbose)


  def tune(self, random_state=0, verbose=True):


    X, Y = self.train_df.drop(columns=self._targets), self.train_df[self._targets] 

    self._results = {}

    gscv = None

    for name, algorithm in self._algorithms.items():
      if verbose:
        print(name)

      self._results[name] = {} 

      for target in self._targets:
        
        gscv = GridSearchCV(algorithm['estimator'], algorithm['parameters'], verbose = 3,
                             scoring='neg_root_mean_squared_error', return_train_score=True)
        gscv.fit(X, Y[target])

        self._results[name][target] = {}
        self._results[name][target]['best_params'] = gscv.best_params_
        self._results[name][target]['best_score'] = gscv.best_score_
  
    return gscv

  def to_json(self):

    with open('data.json', 'w') as fp:
      json.dump(self._results, fp)

  def ranking(self, verbose=True):

    selecteds = {}

    for name in self._results:

      for target in self._targets:

        if target in selecteds:
          if self._results[name][target]['best_score'] > self._results[selecteds[target]][target]['best_score']:
            selecteds[target] = self._results[name][target]['best_score'] 
          # Se a nota vista for > que a que está no dicionário
          # atribui
        else:
          selecteds[target] = self._results[name][target]['best_score']


  # def predict(self):

  def correlation(self, save=False, plot=True):
    
    fig = px.imshow(self.train_df.corr())
    
    if plot:
      fig.show()

    if save:
      pass


  def plot(self, column):
    
    tmp = self.train_df[column].value_counts()
    fig = px.bar(x=tmp.index, y=tmp.values)
    fig.show()
    
    melted = pd.melt(self.train_df, id_vars=[column], value_vars=self._targets, var_name='TP_NOTA', value_name='NU_NOTA')
    fig=px.box(melted.sample(1000000), x='TP_NOTA', y='NU_NOTA', color=column)
    fig.show()

  def null_analysis(self, plot=True, save=False, verbose=True):
    
    null_count = self.train_df.isna().apply(np.sum, axis=0)/self.train_df.shape[0]
    null_percentage_train = (null_count.loc[null_count!=0]*100).sort_values()
    fig_train = px.bar(x=null_percentage_train.index, y=null_percentage_train.values, title="Porcentagem de valores nulos nos dados de treino")

    null_count = self.test_df.isna().apply(np.sum, axis=0)/self.test_df.shape[0]
    null_percentage_test = (null_count.loc[null_count!=0]*100).sort_values()
    fig_test = px.bar(x=null_percentage_test.index, y=null_percentage_test.values, title="Porcentagem de valores nulos nos dados de teste")

    if plot:
      fig_train.show()
      fig_test.show()

    if save:
      pass

  def _feature_selection(self, verbose):
    
    to_drop = []
    treshold = 0.05
    for col in self.train_df.columns[1:]:
       if self.train_df[col].std() < treshold:
         to_drop.append(col)
    
    self.train_df.drop(columns=to_drop, inplace=True)
    self.test_df.drop(columns=to_drop, inplace=True)
    if verbose:
      print("[VARIANCE TRESHOLD] Removendo colunas:", to_drop)

    #################################################################################

    correlation = self.train_df.corr().abs()

    upper_triangle = correlation.where(np.triu(np.ones(correlation.shape), k=1).astype(bool))

    # Considera apenas colunas de correlação mínima de 0.85
    to_drop = [column for column in upper_triangle.columns if any(upper_triangle[column] > 0.9)]
    
    self.train_df.drop(columns=to_drop, inplace=True)
    self.test_df.drop(columns=to_drop, inplace=True)

    if verbose:
      print('[HIGH CORRELATION] Eliminando colunas redundantes:', to_drop)

    


  def _clear_cols(self, verbose):
    
    null_count = self.train_df.isna().apply(np.sum, axis=0)/self.train_df.shape[0]
    null_percentage_train = (null_count.loc[null_count!=0]*100).sort_values()

    null_count = self.test_df.isna().apply(np.sum, axis=0)/self.test_df.shape[0]
    null_percentage_test = (null_count.loc[null_count!=0]*100).sort_values()

    to_drop_columns_train = list(null_percentage_train[null_percentage_train > 30].index)
    to_drop_columns_test = list(null_percentage_test[null_percentage_test > 30].index)

    if verbose:
      print("[NULLS] Colunas dropadas no treino:", sorted(to_drop_columns_train))
      print("[NULLS] Colunas dropadas no teste:", sorted(to_drop_columns_test))

    self.train_df.drop(columns=to_drop_columns_train, inplace=True)
    self.test_df.drop(columns=to_drop_columns_test, inplace=True)

    ###################################################################################################################

    to_drop = ['CO_MUNICIPIO_RESIDENCIA', 'NO_MUNICIPIO_RESIDENCIA', 'CO_UF_RESIDENCIA', 'CO_MUNICIPIO_NASCIMENTO', 'NO_MUNICIPIO_NASCIMENTO',
    'CO_UF_NASCIMENTO', 'SG_UF_NASCIMENTO', 'TP_ANO_CONCLUIU', 'IN_TREINEIRO', 'CO_MUNICIPIO_PROVA', 'NO_MUNICIPIO_PROVA', 'CO_UF_PROVA',
    'SG_UF_PROVA']

    self.train_df.drop(columns=to_drop, inplace=True)
    self.test_df.drop(columns=to_drop, inplace=True)

    if verbose:
      print(f'[DROP COLUMNS] Colunas retiradas por falta de relevânica:{[to_drop]}')

    ##################################################################################################################

    to_drop = self.train_df[(self.train_df['TP_STATUS_REDACAO'].isna()) & (self.train_df['TP_PRESENCA_CH']=='Presente')].index
    self.train_df.drop(to_drop, inplace=True)

    to_drop = self.test_df[(self.test_df['TP_STATUS_REDACAO'].isna()) & (self.test_df['TP_PRESENCA_CH']=='Presente')].index
    self.test_df.drop(to_drop, inplace=True)

    if verbose:
      print(f'[INCONSISTENCY] Removendo inconsistências.')

    ##################################################################################################################
    

    to_drop = ['NU_NOTA_MT', 'NU_NOTA_CH', 'NU_NOTA_CN', 'NU_NOTA_LC', 'NU_NOTA_REDACAO', 'TP_STATUS_REDACAO']
    self.train_df.dropna(subset=to_drop, inplace=True)

    try:
      self.test_df.dropna(subset=to_drop, inplace=True)
    except KeyError:
      pass #

    if verbose:
      print('[NULL TARGETS] Removendo valores nulos nas colunas-alvo')


    #####################################################################################################################

    self.train_df.drop(self.train_df[self.train_df['TP_STATUS_REDACAO'] != 'Sem problemas'].index, inplace=True)
    self.test_df.drop(self.test_df[self.test_df['TP_STATUS_REDACAO'] != 'Sem problemas'].index, inplace=True)

    if verbose:
      print('[::] Removendo redações que tiraram nota 0')


  def _create_dummies(self, verbose):

    cols = [col for col in self.train_df.columns if ((self.train_df[col].dtype == 'object') or (self.train_df[col].dtype.name == 'category'))]

    self.train_df = pd.get_dummies(self.train_df, columns=cols)
    self.test_df = pd.get_dummies(self.test_df, columns=cols)

    if verbose:
      print(f"[GET DUMMIES] Colunas categóricas convertidas: {cols}")


  def _create_features(self, verbose):

    new_columns = []
    filled_columns = []
    ############################################################################################

    uf_regiao = {
      'RR':'Norte', 'AP':'Norte', 'AM':'Norte', 'PA':'Norte', 'AC':'Norte', 'RO':'Norte', 'TO':'Norte', 'MA':'Nordeste',
      'PI':'Nordeste', 'CE':'Nordeste', 'RN':'Nordeste', 'PB':'Nordeste', 'PE':'Nordeste', 'AL':'Nordeste', 'SE':'Nordeste',
      'BA':'Nordeste', 'MT':'Centro-oeste', 'DF':'Centro-oeste', 'GO':'Centro-oeste', 'MS':'Centro-oeste', 'MG':'Sudeste',
      'ES':'Sudeste', 'RJ':'Sudeste', 'SP':'Sudeste', 'PR':'Sul', 'SC':'Sul', 'RS':'Sul', 
      }

    self.train_df['NO_REGIAO_RESIDENCIA'] = self.train_df['SG_UF_RESIDENCIA'].map(uf_regiao)
    self.test_df['NO_REGIAO_RESIDENCIA'] = self.test_df['SG_UF_RESIDENCIA'].map(uf_regiao)

    new_columns.append('NO_REGIAO_RESIDENCIA')

    ############################################################################################

    mean_score_per_reg = self.train_df.groupby("NO_REGIAO_RESIDENCIA")[self._targets].mean()
    for col in self._targets:
      self.train_df["REG_NOTA_"+col.split("_")[2]+"_MEDIA"] = self.train_df['NO_REGIAO_RESIDENCIA'].apply(
          lambda row: mean_score_per_reg[col][row]) 
      self.test_df["REG_NOTA_"+col.split("_")[2]+"_MEDIA"] = self.test_df['NO_REGIAO_RESIDENCIA'].apply(
          lambda row: mean_score_per_reg[col][row]) 

      new_columns.append("REG_NOTA_"+col.split("_")[2]+"_MEDIA")
    ############################################################################################
  
    
    self.train_df['TP_MINORIA_RACIAL'] = ((self.train_df['TP_COR_RACA'] != 'Branca').astype(int) + (self.train_df['TP_COR_RACA'] != 'Amarela').astype(int)) -1
    self.test_df['TP_MINORIA_RACIAL'] = ((self.test_df['TP_COR_RACA'] != 'Branca').astype(int) + (self.test_df['TP_COR_RACA'] != 'Amarela').astype(int)) -1

    new_columns.append('TP_MINORIA_RACIAL')
    ############################################################################################

    cols = [col for col in self.train_df.columns if (("IN_" in col) and ('TREINEIRO' not in col))]

    self.train_df['TP_SITUACAO_ESPECIAL'] = self.train_df[cols].any(axis=1)
    self.test_df['TP_SITUACAO_ESPECIAL'] = self.test_df[cols].any(axis=1)

    new_columns.append('TP_SITUACAO_ESPECIAL')

    #############################################################################################


    self.train_df['TP_SOLTEIRO'] = self.train_df['TP_ESTADO_CIVIL'] == 'Solteiro(a)'
    self.test_df['TP_SOLTEIRO'] = self.test_df['TP_ESTADO_CIVIL'] == 'Solteiro(a)'

    new_columns.append('TP_SOLTEIRO')

    #############################################################################################

    median_train = self.train_df.loc[self.train_df['NU_IDADE'].notnull(), 'NU_IDADE'].median()
    
    self.train_df['NU_IDADE'] = self.train_df['NU_IDADE'].fillna(median_train)
    self.test_df['NU_IDADE'] = self.test_df['NU_IDADE'].fillna(median_train)
  
    filled_columns.append("NU_IDADE")
    #############################################################################################

    if verbose:
      print(f'[FEATURE ENGINEERING] Novas colunas: {new_columns}')
      print(f'[INPUTATION] Colunas com valores nulos preenchidos: {filled_columns}')
      


  
  def _map_values(self, verbose):
    #################################################################
    rename = {0:"0",#np.NaN,
      1:"Solteiro(a)",
      2:"Casado(a)/Mora com companheiro(a)",
      3:"Divorciado(a)/Desquitado(a)/Separado(a)",
      4:"Viúvo(a)"}

    self.train_df['TP_ESTADO_CIVIL'] = self.train_df['TP_ESTADO_CIVIL'].map(rename)
    self.test_df['TP_ESTADO_CIVIL'] = self.test_df['TP_ESTADO_CIVIL'].map(rename)

    #################################################################
    rename = {0:"0",#np.NaN,
      1:"Branca",
      2:"Preta",
      3:"Parda",
      4:"Amarela",
      5:"Indígena"}

    self.train_df['TP_COR_RACA'] = self.train_df['TP_COR_RACA'].map(rename)
    self.test_df['TP_COR_RACA'] = self.test_df['TP_COR_RACA'].map(rename)

    #################################################################
    rename = {0:"0",#np.NaN,
      1:"Brasileiro(a)",
      2:"Brasileiro(a) Naturalizado(a)",
      3:"Estrangeiro(a)",
      4:"Brasileiro(a) Nato(a), nascido(a) no exterior"
      }

    self.train_df['TP_NACIONALIDADE'] = self.train_df['TP_NACIONALIDADE'].map(rename)
    self.test_df['TP_NACIONALIDADE'] = self.test_df['TP_NACIONALIDADE'].map(rename)

    #################################################################
    rename = {1:"Já concluí o Ensino Médio",
      2:"Estou cursando e concluirei o Ensino Médio no ano corrente",
      3:"Estou cursando e concluirei o Ensino Médio após o ano corrente",
      4:"Não concluí e não estou cursando o Ensino Médio"
      }

    self.train_df['TP_ST_CONCLUSAO'] = self.train_df['TP_ST_CONCLUSAO'].map(rename)
    self.test_df['TP_ST_CONCLUSAO'] = self.test_df['TP_ST_CONCLUSAO'].map(rename)

    #################################################################
    rename = {0:"0",#np.NaN,
      1:"2018",
      2:"2017",
      3:"2016",
      4:"2015",
      5:"2014",
      6:"2013",
      7:"2012",
      8:"2011",
      9:"2010",
      10:"2009",
      11:"2008",
      12:"2007",
      13:"Antes de 2007"}

    self.train_df['TP_ANO_CONCLUIU'] = self.train_df['TP_ANO_CONCLUIU'].map(rename)
    self.test_df['TP_ANO_CONCLUIU'] = self.test_df['TP_ANO_CONCLUIU'].map(rename)

    #################################################################
    rename = {1:"0",#np.NaN,
      2:"Pública",
      3:"Privada",
      4:"Exterior"}

    self.train_df['TP_ESCOLA'] = self.train_df['TP_ESCOLA'].map(rename)
    self.test_df['TP_ESCOLA'] = self.test_df['TP_ESCOLA'].map(rename)

    #################################################################
    rename = {1:"Federal",
      2:"Estadual",
      3:"Municipal",
      4:"Privada"}

    self.train_df['TP_DEPENDENCIA_ADM_ESC'] = self.train_df['TP_DEPENDENCIA_ADM_ESC'].map(rename)
    self.test_df['TP_DEPENDENCIA_ADM_ESC'] = self.test_df['TP_DEPENDENCIA_ADM_ESC'].map(rename)

    #################################################################
    rename = {1:"Ensino Regular",
      2:"Educação Especial - Modalidade Substitutiva",
      3:"Educação de Jovens e Adultos"}

    self.train_df['TP_ENSINO'] = self.train_df['TP_ENSINO'].map(rename)
    self.test_df['TP_ENSINO'] = self.test_df['TP_ENSINO'].map(rename)

    #################################################################
    rename = {0:"Ausente",
      1:"Presente",
      2:"Eliminado"}

    for c in [col for col in self.train_df.columns if "TP_PRESENCA" in col]:
      self.train_df[c] = self.train_df[c].map(rename)
      self.test_df[c] = self.test_df[c].map(rename)

    #################################################################
    rename = {
        1:"Sem problemas",
        2:"Anulada",
        3:"Copiou texto motivador",
        4:"Em branco",
        6:"Fuga ao tema",
        7:"Não atende tipo textual",
        8:"Texto insuficiente",
        9:"Parte desconectada"
      }

    self.train_df['TP_STATUS_REDACAO'] = self.train_df['TP_STATUS_REDACAO'].map(rename)
    self.test_df['TP_STATUS_REDACAO'] = self.test_df['TP_STATUS_REDACAO'].map(rename)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5,
        'F':6,
        'G':7,
        'H':0
    }

    self.train_df['Q001'] = self.train_df['Q001'].map(rename).astype(int)
    self.test_df['Q001'] = self.test_df['Q001'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5,
        'F':6,
        'G':7,
        'H':0
    }

    self.train_df['Q002'] = self.train_df['Q002'].map(rename).astype(int)
    self.test_df['Q002'] = self.test_df['Q002'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5,
        'F':0,
    }

    self.train_df['Q003'] = self.train_df['Q003'].map(rename).astype(int)
    self.test_df['Q003'] = self.test_df['Q003'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5,
        'F':0,
    }

    self.train_df['Q004'] = self.train_df['Q004'].map(rename).astype(int)
    self.test_df['Q004'] = self.test_df['Q004'].map(rename).astype(int)

    #Q005 já é numérica

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5,
        'F':6,
        'G':7,
        'H':8,
        'I':9,
        'J':10,
        'K':11,
        'L':12,
        'M':13,
        'N':14,
        'O':15,
        'P':16,
        'Q':17
    }

    self.train_df['Q006'] = self.train_df['Q006'].map(rename).astype(int)
    self.test_df['Q006'] = self.test_df['Q006'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
    }

    self.train_df['Q007'] = self.train_df['Q007'].map(rename).astype(int)
    self.test_df['Q007'] = self.test_df['Q007'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q008'] = self.train_df['Q008'].map(rename).astype(int)
    self.test_df['Q008'] = self.test_df['Q008'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q009'] = self.train_df['Q009'].map(rename).astype(int)
    self.test_df['Q009'] = self.test_df['Q009'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q010'] = self.train_df['Q010'].map(rename).astype(int)
    self.test_df['Q010'] = self.test_df['Q010'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q011'] = self.train_df['Q011'].map(rename).astype(int)
    self.test_df['Q011'] = self.test_df['Q011'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q012'] = self.train_df['Q012'].map(rename).astype(int)
    self.test_df['Q012'] = self.test_df['Q012'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q013'] = self.train_df['Q013'].map(rename).astype(int)
    self.test_df['Q013'] = self.test_df['Q013'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q014'] = self.train_df['Q014'].map(rename).astype(int)
    self.test_df['Q014'] = self.test_df['Q014'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q015'] = self.train_df['Q015'].map(rename).astype(int)
    self.test_df['Q015'] = self.test_df['Q015'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q016'] = self.train_df['Q016'].map(rename).astype(int)
    self.test_df['Q016'] = self.test_df['Q016'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q017'] = self.train_df['Q017'].map(rename).astype(int)
    self.test_df['Q017'] = self.test_df['Q017'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':0,
        'B':1,
    }

    self.train_df['Q018'] = self.train_df['Q018'].map(rename).astype(int)
    self.test_df['Q018'] = self.test_df['Q018'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q019'] = self.train_df['Q019'].map(rename).astype(int)
    self.test_df['Q019'] = self.test_df['Q019'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':0,
        'B':1,
    }

    self.train_df['Q020'] = self.train_df['Q020'].map(rename).astype(int)
    self.test_df['Q020'] = self.test_df['Q020'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':0,
        'B':1,
    }

    self.train_df['Q021'] = self.train_df['Q021'].map(rename).astype(int)
    self.test_df['Q021'] = self.test_df['Q021'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q022'] = self.train_df['Q022'].map(rename).astype(int)
    self.test_df['Q022'] = self.test_df['Q022'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':0,
        'B':1,
    }

    self.train_df['Q023'] = self.train_df['Q023'].map(rename).astype(int)
    self.test_df['Q023'] = self.test_df['Q023'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':1,
        'B':2,
        'C':3,
        'D':4,
        'E':5
    }

    self.train_df['Q024'] = self.train_df['Q024'].map(rename).astype(int)
    self.test_df['Q024'] = self.test_df['Q024'].map(rename).astype(int)

    #################################################################
    rename = {
        'A':0,
        'B':1,
    }

    self.train_df['Q025'] = self.train_df['Q025'].map(rename).astype(int)
    self.test_df['Q025'] = self.test_df['Q025'].map(rename).astype(int)

In [6]:
model = Model()
model.load(path)

Quantidade inicial de elementos no treino: 40000
Quantidade inicial de elementos no teste: 10000


In [7]:
model.prepare()

Mapeando valores...
Criando novas colunas...
[FEATURE ENGINEERING] Novas colunas: ['NO_REGIAO_RESIDENCIA', 'REG_NOTA_CN_MEDIA', 'REG_NOTA_CH_MEDIA', 'REG_NOTA_LC_MEDIA', 'REG_NOTA_MT_MEDIA', 'REG_NOTA_REDACAO_MEDIA', 'TP_MINORIA_RACIAL', 'TP_SITUACAO_ESPECIAL', 'TP_SOLTEIRO']
[INPUTATION] Colunas com valores nulos preenchidos: ['NU_IDADE']
Eliminando colunas...
[NULLS] Colunas dropadas no treino: ['CO_ESCOLA', 'CO_MUNICIPIO_ESC', 'CO_UF_ESC', 'NO_MUNICIPIO_ESC', 'SG_UF_ESC', 'TP_DEPENDENCIA_ADM_ESC', 'TP_ENSINO', 'TP_LOCALIZACAO_ESC', 'TP_SIT_FUNC_ESC']
[NULLS] Colunas dropadas no teste: ['CO_ESCOLA', 'CO_MUNICIPIO_ESC', 'CO_UF_ESC', 'NO_MUNICIPIO_ESC', 'SG_UF_ESC', 'TP_DEPENDENCIA_ADM_ESC', 'TP_ENSINO', 'TP_LOCALIZACAO_ESC', 'TP_SIT_FUNC_ESC']
[DROP COLUMNS] Colunas retiradas por falta de relevânica:[['CO_MUNICIPIO_RESIDENCIA', 'NO_MUNICIPIO_RESIDENCIA', 'CO_UF_RESIDENCIA', 'CO_MUNICIPIO_NASCIMENTO', 'NO_MUNICIPIO_NASCIMENTO', 'CO_UF_NASCIMENTO', 'SG_UF_NASCIMENTO', 'TP_ANO_CONCLUIU',

In [8]:
m = model.tune()

ElasticNet
Fitting 5 folds for each of 9 candidates, totalling 45 fits


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0;, score=(train=-64.845, test=-64.429) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0;, score=(train=-64.705, test=-65.003) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0;, score=(train=-64.783, test=-64.706) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0;, score=(train=-64.664, test=-65.142) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0;, score=(train=-64.630, test=-65.314) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-64.841, test=-64.429) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-64.701, test=-65.008) total time=   2.9s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-64.780, test=-64.706) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-64.661, test=-65.141) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-64.626, test=-65.313) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-64.837, test=-64.430) total time=   2.1s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-64.695, test=-65.020) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-64.775, test=-64.708) total time=   2.0s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-64.658, test=-65.138) total time=   2.0s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-64.622, test=-65.313) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.5, l1_ratio=0;, score=(train=-66.004, test=-65.586) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.5, l1_ratio=0;, score=(train=-65.897, test=-66.004) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.5, l1_ratio=0;, score=(train=-65.991, test=-65.712) total time=   2.6s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.5, l1_ratio=0;, score=(train=-65.833, test=-66.280) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.5, l1_ratio=0;, score=(train=-65.834, test=-66.217) total time=   2.5s
[CV 1/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-65.771, test=-65.372) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-65.669, test=-65.762) total time=   0.3s
[CV 3/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-65.763, test=-65.448) total time=   0.3s
[CV 4/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-65.599, test=-66.078) total time=   0.3s
[CV 5/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-65.593, test=-66.019) total time=   0.3s
[CV 1/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-65.310, test=-64.933) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-65.207, test=-65.257) total time=   0.3s
[CV 3/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-65.290, test=-64.963) total time=   0.3s
[CV 4/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-65.135, test=-65.633) total time=   0.3s
[CV 5/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-65.109, t

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=1.0, l1_ratio=0;, score=(train=-66.440, test=-66.001) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=1.0, l1_ratio=0;, score=(train=-66.337, test=-66.415) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=1.0, l1_ratio=0;, score=(train=-66.425, test=-66.185) total time=   2.6s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=1.0, l1_ratio=0;, score=(train=-66.272, test=-66.705) total time=   2.6s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=1.0, l1_ratio=0;, score=(train=-66.285, test=-66.614) total time=   2.6s
[CV 1/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-66.252, test=-65.824) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-66.157, test=-66.219) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-66.249, test=-65.964) total time=   0.3s
[CV 4/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-66.083, test=-66.559) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-66.090, test=-66.449) total time=   0.3s
[CV 1/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-65.658, test=-65.295) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-65.589, test=-65.601) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-65.650, test=-65.269) total time=   0.3s
[CV 4/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-65.508, test=-66.042) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-65.481, t

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


Fitting 5 folds for each of 9 candidates, totalling 45 fits


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0;, score=(train=-69.282, test=-69.466) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0;, score=(train=-69.205, test=-69.778) total time=   3.1s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0;, score=(train=-69.312, test=-69.378) total time=   3.6s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0;, score=(train=-69.303, test=-69.390) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0;, score=(train=-69.301, test=-69.446) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-69.278, test=-69.469) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-69.202, test=-69.779) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-69.308, test=-69.379) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-69.301, test=-69.392) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-69.299, test=-69.446) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-69.272, test=-69.480) total time=   2.0s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-69.198, test=-69.779) total time=   2.0s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-69.302, test=-69.390) total time=   2.0s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-69.298, test=-69.391) total time=   2.0s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-69.296, test=-69.442) total time=   2.0s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.5, l1_ratio=0;, score=(train=-70.351, test=-70.297) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.5, l1_ratio=0;, score=(train=-70.257, test=-70.752) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.5, l1_ratio=0;, score=(train=-70.377, test=-70.288) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.5, l1_ratio=0;, score=(train=-70.366, test=-70.275) total time=   3.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.5, l1_ratio=0;, score=(train=-70.355, test=-70.376) total time=   2.5s
[CV 1/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-70.145, test=-70.116) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-70.054, test=-70.539) total time=   0.3s
[CV 3/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-70.177, test=-70.057) total time=   0.3s
[CV 4/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-70.161, test=-70.082) total time=   0.3s
[CV 5/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-70.152, test=-70.195) total time=   0.3s
[CV 1/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-69.761, test=-69.827) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-69.657, test=-70.127) total time=   0.3s
[CV 3/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-69.781, test=-69.660) total time=   0.3s
[CV 4/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-69.774, test=-69.686) total time=   0.3s
[CV 5/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-69.754, t

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=1.0, l1_ratio=0;, score=(train=-70.750, test=-70.641) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=1.0, l1_ratio=0;, score=(train=-70.645, test=-71.140) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=1.0, l1_ratio=0;, score=(train=-70.767, test=-70.691) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=1.0, l1_ratio=0;, score=(train=-70.759, test=-70.637) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=1.0, l1_ratio=0;, score=(train=-70.745, test=-70.758) total time=   2.4s
[CV 1/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-70.574, test=-70.495) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-70.475, test=-70.953) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-70.603, test=-70.487) total time=   0.3s
[CV 4/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-70.584, test=-70.474) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-70.566, test=-70.604) total time=   0.3s
[CV 1/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-70.076, test=-70.144) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-70.014, test=-70.427) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-70.131, test=-69.975) total time=   0.3s
[CV 4/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-70.116, test=-70.034) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-70.091, t

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


Fitting 5 folds for each of 9 candidates, totalling 45 fits


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0;, score=(train=-52.684, test=-53.099) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0;, score=(train=-53.000, test=-51.831) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0;, score=(train=-52.260, test=-54.779) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0;, score=(train=-53.007, test=-51.822) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0;, score=(train=-52.730, test=-52.960) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-52.681, test=-53.103) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-52.998, test=-51.831) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-52.257, test=-54.785) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-53.006, test=-51.824) total time=   2.1s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-52.729, test=-52.959) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-52.677, test=-53.114) total time=   1.9s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-52.994, test=-51.834) total time=   1.9s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-52.253, test=-54.798) total time=   2.0s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-53.005, test=-51.824) total time=   2.1s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-52.727, test=-52.954) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.5, l1_ratio=0;, score=(train=-53.683, test=-53.895) total time=   2.6s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.5, l1_ratio=0;, score=(train=-53.962, test=-52.821) total time=   2.6s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.5, l1_ratio=0;, score=(train=-53.279, test=-55.534) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.5, l1_ratio=0;, score=(train=-54.001, test=-52.669) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.5, l1_ratio=0;, score=(train=-53.706, test=-53.890) total time=   2.4s
[CV 1/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-53.517, test=-53.744) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-53.805, test=-52.645) total time=   0.3s
[CV 3/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-53.112, test=-55.379) total time=   0.3s
[CV 4/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-53.844, test=-52.499) total time=   0.3s
[CV 5/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-53.547, test=-53.741) total time=   0.3s
[CV 1/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-53.177, test=-53.475) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-53.474, test=-52.272) total time=   0.3s
[CV 3/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-52.768, test=-55.090) total time=   0.4s
[CV 4/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-53.519, test=-52.163) total time=   0.3s
[CV 5/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-53.227, t

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=1.0, l1_ratio=0;, score=(train=-54.067, test=-54.237) total time=   2.6s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=1.0, l1_ratio=0;, score=(train=-54.334, test=-53.195) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=1.0, l1_ratio=0;, score=(train=-53.661, test=-55.900) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=1.0, l1_ratio=0;, score=(train=-54.374, test=-53.043) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=1.0, l1_ratio=0;, score=(train=-54.077, test=-54.256) total time=   2.5s
[CV 1/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-53.947, test=-54.124) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-54.221, test=-53.062) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-53.543, test=-55.780) total time=   0.3s
[CV 4/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-54.260, test=-52.927) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-53.961, test=-54.163) total time=   0.3s
[CV 1/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-53.490, test=-53.745) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-53.800, test=-52.579) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-53.089, test=-55.393) total time=   0.3s
[CV 4/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-53.820, test=-52.460) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-53.540, t

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


Fitting 5 folds for each of 9 candidates, totalling 45 fits


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0;, score=(train=-90.196, test=-88.788) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0;, score=(train=-89.849, test=-90.205) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0;, score=(train=-89.689, test=-90.859) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0;, score=(train=-89.706, test=-90.764) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0;, score=(train=-89.861, test=-90.225) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-90.193, test=-88.792) total time=   2.1s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-89.845, test=-90.210) total time=   2.1s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-89.687, test=-90.858) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-89.702, test=-90.767) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-89.858, test=-90.222) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-90.190, test=-88.799) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-89.841, test=-90.223) total time=   2.1s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-89.684, test=-90.856) total time=   2.1s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-89.698, test=-90.778) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-89.856, test=-90.220) total time=   2.1s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.5, l1_ratio=0;, score=(train=-92.240, test=-90.599) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.5, l1_ratio=0;, score=(train=-91.913, test=-91.836) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.5, l1_ratio=0;, score=(train=-91.758, test=-92.546) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.5, l1_ratio=0;, score=(train=-91.707, test=-92.800) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.5, l1_ratio=0;, score=(train=-91.871, test=-92.150) total time=   2.6s
[CV 1/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-91.728, test=-90.058) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-91.391, test=-91.349) total time=   0.3s
[CV 3/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-91.231, test=-92.058) total time=   0.3s
[CV 4/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-91.199, test=-92.295) total time=   0.3s
[CV 5/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-91.354, test=-91.660) total time=   0.3s
[CV 1/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-90.696, test=-89.091) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-90.374, test=-90.472) total time=   0.3s
[CV 3/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-90.184, test=-91.222) total time=   0.3s
[CV 4/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-90.237, test=-91.225) total time=   0.3s
[CV 5/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-90.363, t

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=1.0, l1_ratio=0;, score=(train=-92.969, test=-91.317) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=1.0, l1_ratio=0;, score=(train=-92.658, test=-92.503) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=1.0, l1_ratio=0;, score=(train=-92.497, test=-93.269) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=1.0, l1_ratio=0;, score=(train=-92.434, test=-93.514) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=1.0, l1_ratio=0;, score=(train=-92.604, test=-92.889) total time=   2.4s
[CV 1/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-92.530, test=-90.845) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-92.207, test=-92.086) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-92.041, test=-92.822) total time=   0.3s
[CV 4/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-91.987, test=-93.083) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-92.152, test=-92.456) total time=   0.4s
[CV 1/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-91.171, test=-89.408) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-90.852, test=-90.830) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-90.672, test=-91.617) total time=   0.3s
[CV 4/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-90.719, test=-91.814) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-90.856, t

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


Fitting 5 folds for each of 9 candidates, totalling 45 fits


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0;, score=(train=-135.775, test=-136.457) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0;, score=(train=-136.060, test=-135.262) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0;, score=(train=-135.713, test=-136.727) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0;, score=(train=-135.785, test=-136.356) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0;, score=(train=-135.746, test=-136.593) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-135.766, test=-136.451) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-136.049, test=-135.270) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-135.705, test=-136.741) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-135.776, test=-136.361) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=0.5;, score=(train=-135.738, test=-136.588) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-135.755, test=-136.459) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-136.039, test=-135.297) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-135.696, test=-136.761) total time=   2.3s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-135.768, test=-136.375) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.001, l1_ratio=1.0;, score=(train=-135.731, test=-136.580) total time=   2.2s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=0.5, l1_ratio=0;, score=(train=-139.139, test=-139.381) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=0.5, l1_ratio=0;, score=(train=-139.338, test=-138.867) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=0.5, l1_ratio=0;, score=(train=-139.228, test=-139.141) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=0.5, l1_ratio=0;, score=(train=-139.246, test=-139.016) total time=   2.5s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=0.5, l1_ratio=0;, score=(train=-139.042, test=-140.141) total time=   2.5s
[CV 1/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-138.329, test=-138.706) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-138.586, test=-138.028) total time=   0.3s
[CV 3/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-138.420, test=-138.376) total time=   0.3s
[CV 4/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-138.441, test=-138.308) total time=   0.3s
[CV 5/5] END alpha=0.5, l1_ratio=0.5;, score=(train=-138.284, test=-139.321) total time=   0.3s
[CV 1/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-136.344, test=-137.030) total time=   0.3s
[CV 2/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-136.679, test=-135.742) total time=   0.4s
[CV 3/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-136.327, test=-136.801) total time=   0.4s
[CV 4/5] END alpha=0.5, l1_ratio=1.0;, score=(train=-136.421, test=-136.618) total time=   0.4s
[CV 5/5] END alpha=0.5, l1_ratio=1.0;, sco

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 1/5] END alpha=1.0, l1_ratio=0;, score=(train=-140.114, test=-140.208) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 2/5] END alpha=1.0, l1_ratio=0;, score=(train=-140.258, test=-139.829) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 3/5] END alpha=1.0, l1_ratio=0;, score=(train=-140.188, test=-140.068) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 4/5] END alpha=1.0, l1_ratio=0;, score=(train=-140.213, test=-139.893) total time=   2.4s


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


[CV 5/5] END alpha=1.0, l1_ratio=0;, score=(train=-139.956, test=-141.127) total time=   2.5s
[CV 1/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-139.470, test=-139.659) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-139.667, test=-139.195) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-139.569, test=-139.436) total time=   0.3s
[CV 4/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-139.580, test=-139.332) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=0.5;, score=(train=-139.366, test=-140.518) total time=   0.3s
[CV 1/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-136.955, test=-137.531) total time=   0.3s
[CV 2/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-137.317, test=-136.420) total time=   0.3s
[CV 3/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-137.024, test=-137.249) total time=   0.4s
[CV 4/5] END alpha=1.0, l1_ratio=1.0;, score=(train=-137.091, test=-137.184) total time=   0.3s
[CV 5/5] END alpha=1.0, l1_ratio=1.0;, sco

  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


In [9]:
model.to_json()

{'ElasticNet': {'NU_NOTA_CN': {'best_params': {'alpha': 0.001, 'l1_ratio': 0}, 'best_score': -64.91882274435655}, 'NU_NOTA_CH': {'best_params': {'alpha': 0.001, 'l1_ratio': 0}, 'best_score': -69.49150686115085}, 'NU_NOTA_LC': {'best_params': {'alpha': 0.001, 'l1_ratio': 0}, 'best_score': -52.89826607389468}, 'NU_NOTA_MT': {'best_params': {'alpha': 0.001, 'l1_ratio': 0}, 'best_score': -90.16801902070017}, 'NU_NOTA_REDACAO': {'best_params': {'alpha': 0.001, 'l1_ratio': 0}, 'best_score': -136.27887211821962}}}


In [None]:
model._results

{'DecisionTree': {'NU_NOTA_CH': {'best_params': {'max_depth': 100,
    'min_samples_leaf': 100},
   'best_score': -71.08253881261814},
  'NU_NOTA_CN': {'best_params': {'max_depth': 100, 'min_samples_leaf': 100},
   'best_score': -65.99895521648882},
  'NU_NOTA_LC': {'best_params': {'max_depth': 100, 'min_samples_leaf': 100},
   'best_score': -53.9850017077484},
  'NU_NOTA_MT': {'best_params': {'max_depth': 90, 'min_samples_leaf': 100},
   'best_score': -91.47403800860884},
  'NU_NOTA_REDACAO': {'best_params': {'max_depth': 100,
    'min_samples_leaf': 100},
   'best_score': -139.43192844405368}},
 'ElasticNet': {'NU_NOTA_CH': {'best_params': {'alpha': 0.001, 'l1_ratio': 0},
   'best_score': -69.52599774588066},
  'NU_NOTA_CN': {'best_params': {'alpha': 0.001, 'l1_ratio': 0.5},
   'best_score': -64.54731145502434},
  'NU_NOTA_LC': {'best_params': {'alpha': 0.001, 'l1_ratio': 0.5},
   'best_score': -52.51027994884307},
  'NU_NOTA_MT': {'best_params': {'alpha': 0.001, 'l1_ratio': 0},
   '