<a href="https://colab.research.google.com/github/FGalvao77/others-knowledge-in-python-for-data-science/blob/main/Lazy_Predict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Lazy Predict**

Esta biblioteca oferece a possibilidade de avaliar vários modelos de aprendizado de máquina ao mesmo tempo, usando sk-learn e economizando muito tempo e codificação.

- Documetação:
  - https://pypi.org/project/lazypredict/

In [1]:
# instalando a biblioteca "Lazy Predict"
!pip install lazypredict



In [22]:
# instalando a versão do "pandas" compatível com "lazypredict"
!pip install --user pandas==1.0.5



In [None]:
# atualizando os pacotes se necessário
# !pip install --upgrade lazypredict
# !pip install --upgrade pandas
# !pip install --upgrade sklearn

## **Modelos de Classificação**

In [2]:
# importando as bibliotecas
import lazypredict
from lazypredict.Supervised import LazyClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn import datasets



In [3]:
# verificando as versões utilizadas
print('Versões utilizadas')
print(f'LazyPredict: {lazypredict.__version__}')
print(f'Pandas: {pd.__version__}')

Versões utilizadas
LazyPredict: 0.2.9
Pandas: 1.0.5


In [4]:
# realizando a leitura do dataset e instanciando-o na variável "df"
df = datasets.load_breast_cancer()
df  # visualizando o arquivo

 'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'feature_names': array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
        'mean smoothness', 'mean compactness', 'mean concavity',
        'mean concave points', 'mean symmetry', 'mean fractal dimension',
        'radius error', 'texture error', 'perimeter error', 'area error',
        'smoothness error', 'compactness error', 'concavity error',
        'concave points error', 'symmetry error',
        'fractal di

In [5]:
# visualizando o tipo de arquivo
type(df)

sklearn.utils.Bunch

In [6]:
# transformando o arquivo em um dataframe para melhor visualização
df_transformado = pd.DataFrame(df.data, columns=df.feature_names)
df_transformado.head(10)  # visualizando as 10 primeiras linhas

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.12,0.28,0.3,0.15,0.24,0.08,1.09,0.91,8.59,153.4,0.01,0.05,0.05,0.02,0.03,0.01,25.38,17.33,184.6,2019.0,0.16,0.67,0.71,0.27,0.46,0.12
1,20.57,17.77,132.9,1326.0,0.08,0.08,0.09,0.07,0.18,0.06,0.54,0.73,3.4,74.08,0.01,0.01,0.02,0.01,0.01,0.0,24.99,23.41,158.8,1956.0,0.12,0.19,0.24,0.19,0.28,0.09
2,19.69,21.25,130.0,1203.0,0.11,0.16,0.2,0.13,0.21,0.06,0.75,0.79,4.58,94.03,0.01,0.04,0.04,0.02,0.02,0.0,23.57,25.53,152.5,1709.0,0.14,0.42,0.45,0.24,0.36,0.09
3,11.42,20.38,77.58,386.1,0.14,0.28,0.24,0.11,0.26,0.1,0.5,1.16,3.44,27.23,0.01,0.07,0.06,0.02,0.06,0.01,14.91,26.5,98.87,567.7,0.21,0.87,0.69,0.26,0.66,0.17
4,20.29,14.34,135.1,1297.0,0.1,0.13,0.2,0.1,0.18,0.06,0.76,0.78,5.44,94.44,0.01,0.02,0.06,0.02,0.02,0.01,22.54,16.67,152.2,1575.0,0.14,0.2,0.4,0.16,0.24,0.08
5,12.45,15.7,82.57,477.1,0.13,0.17,0.16,0.08,0.21,0.08,0.33,0.89,2.22,27.19,0.01,0.03,0.04,0.01,0.02,0.01,15.47,23.75,103.4,741.6,0.18,0.52,0.54,0.17,0.4,0.12
6,18.25,19.98,119.6,1040.0,0.09,0.11,0.11,0.07,0.18,0.06,0.45,0.77,3.18,53.91,0.0,0.01,0.02,0.01,0.01,0.0,22.88,27.66,153.2,1606.0,0.14,0.26,0.38,0.19,0.31,0.08
7,13.71,20.83,90.2,577.9,0.12,0.16,0.09,0.06,0.22,0.07,0.58,1.38,3.86,50.96,0.01,0.03,0.02,0.01,0.01,0.01,17.06,28.14,110.6,897.0,0.17,0.37,0.27,0.16,0.32,0.12
8,13.0,21.82,87.5,519.8,0.13,0.19,0.19,0.09,0.23,0.07,0.31,1.0,2.41,24.32,0.01,0.04,0.04,0.01,0.02,0.0,15.49,30.73,106.2,739.3,0.17,0.54,0.54,0.21,0.44,0.11
9,12.46,24.04,83.97,475.9,0.12,0.24,0.23,0.09,0.2,0.08,0.3,1.6,2.04,23.94,0.01,0.07,0.08,0.01,0.02,0.01,15.09,40.68,97.65,711.4,0.19,1.06,1.1,0.22,0.44,0.21


In [7]:
# visualizando o nome das colunas
df_transformado.columns

Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension'],
      dtype='object')

In [8]:
# incluindo a coluna "target" no "df_transformado"
df_transformado['target'] = df.target
df_transformado.head(10)  # visualizando as 10 primeiras linhas

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.12,0.28,0.3,0.15,0.24,0.08,1.09,0.91,8.59,153.4,0.01,0.05,0.05,0.02,0.03,0.01,25.38,17.33,184.6,2019.0,0.16,0.67,0.71,0.27,0.46,0.12,0
1,20.57,17.77,132.9,1326.0,0.08,0.08,0.09,0.07,0.18,0.06,0.54,0.73,3.4,74.08,0.01,0.01,0.02,0.01,0.01,0.0,24.99,23.41,158.8,1956.0,0.12,0.19,0.24,0.19,0.28,0.09,0
2,19.69,21.25,130.0,1203.0,0.11,0.16,0.2,0.13,0.21,0.06,0.75,0.79,4.58,94.03,0.01,0.04,0.04,0.02,0.02,0.0,23.57,25.53,152.5,1709.0,0.14,0.42,0.45,0.24,0.36,0.09,0
3,11.42,20.38,77.58,386.1,0.14,0.28,0.24,0.11,0.26,0.1,0.5,1.16,3.44,27.23,0.01,0.07,0.06,0.02,0.06,0.01,14.91,26.5,98.87,567.7,0.21,0.87,0.69,0.26,0.66,0.17,0
4,20.29,14.34,135.1,1297.0,0.1,0.13,0.2,0.1,0.18,0.06,0.76,0.78,5.44,94.44,0.01,0.02,0.06,0.02,0.02,0.01,22.54,16.67,152.2,1575.0,0.14,0.2,0.4,0.16,0.24,0.08,0
5,12.45,15.7,82.57,477.1,0.13,0.17,0.16,0.08,0.21,0.08,0.33,0.89,2.22,27.19,0.01,0.03,0.04,0.01,0.02,0.01,15.47,23.75,103.4,741.6,0.18,0.52,0.54,0.17,0.4,0.12,0
6,18.25,19.98,119.6,1040.0,0.09,0.11,0.11,0.07,0.18,0.06,0.45,0.77,3.18,53.91,0.0,0.01,0.02,0.01,0.01,0.0,22.88,27.66,153.2,1606.0,0.14,0.26,0.38,0.19,0.31,0.08,0
7,13.71,20.83,90.2,577.9,0.12,0.16,0.09,0.06,0.22,0.07,0.58,1.38,3.86,50.96,0.01,0.03,0.02,0.01,0.01,0.01,17.06,28.14,110.6,897.0,0.17,0.37,0.27,0.16,0.32,0.12,0
8,13.0,21.82,87.5,519.8,0.13,0.19,0.19,0.09,0.23,0.07,0.31,1.0,2.41,24.32,0.01,0.04,0.04,0.01,0.02,0.0,15.49,30.73,106.2,739.3,0.17,0.54,0.54,0.21,0.44,0.11,0
9,12.46,24.04,83.97,475.9,0.12,0.24,0.23,0.09,0.2,0.08,0.3,1.6,2.04,23.94,0.01,0.07,0.08,0.01,0.02,0.01,15.09,40.68,97.65,711.4,0.19,1.06,1.1,0.22,0.44,0.21,0


In [9]:
# visualizando as 10 últimas linhas
df_transformado.tail(10)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
559,11.51,23.93,74.52,403.5,0.09,0.1,0.11,0.04,0.14,0.07,0.24,2.9,1.94,16.97,0.01,0.03,0.06,0.01,0.01,0.0,12.48,37.16,82.28,474.2,0.13,0.25,0.36,0.1,0.21,0.09,1
560,14.05,27.15,91.38,600.4,0.1,0.11,0.04,0.04,0.15,0.06,0.36,1.49,2.89,29.84,0.01,0.03,0.02,0.02,0.02,0.01,15.3,33.17,100.2,706.7,0.12,0.23,0.13,0.1,0.23,0.08,1
561,11.2,29.37,70.67,386.0,0.07,0.04,0.0,0.0,0.11,0.06,0.31,3.9,2.04,22.81,0.01,0.01,0.0,0.0,0.02,0.0,11.92,38.3,75.19,439.6,0.09,0.05,0.0,0.0,0.16,0.06,1
562,15.22,30.62,103.4,716.9,0.1,0.21,0.26,0.09,0.21,0.07,0.26,1.21,2.36,22.65,0.0,0.05,0.07,0.02,0.02,0.01,17.52,42.79,128.7,915.0,0.14,0.79,1.17,0.24,0.41,0.14,0
563,20.92,25.09,143.0,1347.0,0.11,0.22,0.32,0.15,0.21,0.07,0.96,1.03,8.76,118.8,0.01,0.04,0.08,0.03,0.02,0.01,24.29,29.41,179.1,1819.0,0.14,0.42,0.66,0.25,0.29,0.1,0
564,21.56,22.39,142.0,1479.0,0.11,0.12,0.24,0.14,0.17,0.06,1.18,1.26,7.67,158.7,0.01,0.03,0.05,0.02,0.01,0.0,25.45,26.4,166.1,2027.0,0.14,0.21,0.41,0.22,0.21,0.07,0
565,20.13,28.25,131.2,1261.0,0.1,0.1,0.14,0.1,0.18,0.06,0.77,2.46,5.2,99.04,0.01,0.02,0.04,0.02,0.02,0.0,23.69,38.25,155.0,1731.0,0.12,0.19,0.32,0.16,0.26,0.07,0
566,16.6,28.08,108.3,858.1,0.08,0.1,0.09,0.05,0.16,0.06,0.46,1.07,3.42,48.55,0.01,0.04,0.05,0.02,0.01,0.0,18.98,34.12,126.7,1124.0,0.11,0.31,0.34,0.14,0.22,0.08,0
567,20.6,29.33,140.1,1265.0,0.12,0.28,0.35,0.15,0.24,0.07,0.73,1.59,5.77,86.22,0.01,0.06,0.07,0.02,0.02,0.01,25.74,39.42,184.6,1821.0,0.17,0.87,0.94,0.27,0.41,0.12,0
568,7.76,24.54,47.92,181.0,0.05,0.04,0.0,0.0,0.16,0.06,0.39,1.43,2.55,19.15,0.01,0.0,0.0,0.0,0.03,0.0,9.46,30.37,59.16,268.6,0.09,0.06,0.0,0.0,0.29,0.07,1


In [10]:
# separando nosso dados em X (variáveis independentes) e y (variável dependente)
X, y = df.data, df.target

In [11]:
# visualizando a variável "X"
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [12]:
# visualizando a variável "y"
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [13]:
# separando os dados de treino e teste com a função "train_test_split"
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,  # 20 % dos dados para teste
                                                    random_state=12)

**É aqui que a mágica acontece!**


In [14]:
# instanciando o nosso modelo
# clf = LazyClassifier(predictions=True) 
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)

In [15]:
# visualizando o nosso modelo
clf

<lazypredict.Supervised.LazyClassifier at 0x7fd41a156390>

In [16]:
# treinando os modelos
models,predictions = clf.fit(X_train, X_test, y_train, y_test)
models

100%|██████████| 29/29 [00:01<00:00, 16.72it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LinearSVC,0.97,0.97,0.97,0.97,0.04
LogisticRegression,0.97,0.97,0.97,0.97,0.04
Perceptron,0.97,0.97,0.97,0.97,0.03
SGDClassifier,0.96,0.96,0.96,0.96,0.02
PassiveAggressiveClassifier,0.96,0.96,0.96,0.96,0.02
RidgeClassifierCV,0.96,0.95,0.95,0.96,0.04
RidgeClassifier,0.96,0.95,0.95,0.96,0.02
ExtraTreesClassifier,0.96,0.95,0.95,0.96,0.15
KNeighborsClassifier,0.96,0.95,0.95,0.96,0.02
XGBClassifier,0.95,0.94,0.94,0.95,0.23


## **Modelos de Regressão**

In [17]:
# importando a biblioteca necessária
from lazypredict.Supervised import LazyRegressor

In [18]:
# realizando a leitura do dataset
boston = datasets.load_boston()

In [19]:
# separando os dados em "X" e "y"
X, y = boston.data, boston.target

In [20]:
# separando os dados de treino e teste com a função "train_test_split"
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,  # 20 % dos dados para teste
                                                    random_state=12)

In [21]:
# instanciando o nosso modelo
reg = LazyRegressor(predictions=True)

In [22]:
# treinando os modelos
models,predictions = reg.fit(X_train, X_test, y_train, y_test)
models

100%|██████████| 42/42 [00:03<00:00, 11.70it/s]


Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ExtraTreesRegressor,0.88,0.9,2.87,0.22
RandomForestRegressor,0.86,0.88,3.19,0.34
XGBRegressor,0.86,0.87,3.2,0.09
LGBMRegressor,0.85,0.87,3.22,0.08
GradientBoostingRegressor,0.84,0.86,3.34,0.19
BaggingRegressor,0.84,0.86,3.38,0.05
HistGradientBoostingRegressor,0.83,0.85,3.46,0.3
DecisionTreeRegressor,0.8,0.83,3.74,0.02
AdaBoostRegressor,0.79,0.82,3.83,0.12
PoissonRegressor,0.78,0.81,3.92,0.02
