In [1]:
import pandas as pd
import plotly.express as px
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix

## Carregar os dados

In [2]:
df_leads = pd.read_csv('../bagging/aula/leads_cleaned.csv')

In [3]:
df_leads.head()

Unnamed: 0,Lead Origin,Lead Source,Do Not Email,Do Not Call,Converted,TotalVisits,Total Time Spent on Website,Page Views Per Visit,Last Activity,Search,Newspaper Article,X Education Forums,Newspaper,Digital Advertisement,Through Recommendations,A free copy of Mastering The Interview,Last Notable Activity
0,API,Olark Chat,0,0,0,0.0,0,0.0,Page Visited on Website,0,0,0,0,0,0,0,Modified
1,API,Organic Search,0,0,0,5.0,674,2.5,Email Opened,0,0,0,0,0,0,0,Email Opened
2,Landing Page Submission,Direct Traffic,0,0,1,2.0,1532,2.0,Email Opened,0,0,0,0,0,0,1,Email Opened
3,Landing Page Submission,Direct Traffic,0,0,0,1.0,305,1.0,Unreachable,0,0,0,0,0,0,0,Modified
4,Landing Page Submission,Google,0,0,1,2.0,1428,1.0,Converted to Lead,0,0,0,0,0,0,0,Modified


In [4]:
df_leads.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9074 entries, 0 to 9073
Data columns (total 17 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Lead Origin                             9074 non-null   object 
 1   Lead Source                             9074 non-null   object 
 2   Do Not Email                            9074 non-null   int64  
 3   Do Not Call                             9074 non-null   int64  
 4   Converted                               9074 non-null   int64  
 5   TotalVisits                             9074 non-null   float64
 6   Total Time Spent on Website             9074 non-null   int64  
 7   Page Views Per Visit                    9074 non-null   float64
 8   Last Activity                           9074 non-null   object 
 9   Search                                  9074 non-null   int64  
 10  Newspaper Article                       9074 non-null   int6

## Preparação dos dados

In [5]:
X = df_leads.drop(columns=['Converted'], axis=1)
y = df_leads['Converted']

In [6]:
import joblib

preprocessor = joblib.load('../bagging/aula/preprocessor_dataset_leads.pkl')

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=51)

# Aplicar preprocessor
X_train = preprocessor.fit_transform(X_train).toarray()
X_test = preprocessor.transform(X_test).toarray()

In [8]:
print(f'Conjunto de Treinamento: {X_train.shape}')
print(f'Conjunto de Teste: {X_test.shape}')

Conjunto de Treinamento: (7259, 68)
Conjunto de Teste: (1815, 68)


In [9]:
preprocessor.get_feature_names_out()

array(['num__Do Not Email', 'num__Do Not Call', 'num__TotalVisits',
       'num__Total Time Spent on Website', 'num__Page Views Per Visit',
       'num__Search', 'num__Newspaper Article', 'num__X Education Forums',
       'num__Newspaper', 'num__Digital Advertisement',
       'num__Through Recommendations',
       'num__A free copy of Mastering The Interview',
       'cat__Lead Origin_API', 'cat__Lead Origin_Landing Page Submission',
       'cat__Lead Origin_Lead Add Form', 'cat__Lead Origin_Lead Import',
       'cat__Lead Source_Click2call', 'cat__Lead Source_Direct Traffic',
       'cat__Lead Source_Facebook', 'cat__Lead Source_Google',
       'cat__Lead Source_Live Chat', 'cat__Lead Source_NC_EDM',
       'cat__Lead Source_Olark Chat', 'cat__Lead Source_Organic Search',
       'cat__Lead Source_Pay per Click Ads', 'cat__Lead Source_Reference',
       'cat__Lead Source_Referral Sites', 'cat__Lead Source_Social Media',
       'cat__Lead Source_WeLearn', 'cat__Lead Source_Welingak Webs

## Treinamento do Modelo de Voting Classifier

In [10]:
# Criar o modelo de VotingClassifier
lr_model = LogisticRegression(random_state=51)

# Para executar o modelo do Soft voting, é necessário colocar o hiperparâmetro probability = True no modelo SVC
svc_model = SVC(probability=True,kernel='linear')
tree_model = DecisionTreeClassifier(random_state=51)

voting_model = VotingClassifier(
    estimators=[
        ('logistic regression', lr_model),
        ('svc', svc_model),
        ('decision tree',tree_model)
    ],

    # Hard faz a votação pela maioria das predições dos estimadores
    # Soft faz a votação pela média ponderada das probabilidades de cada classe vindas de cada estimador
    voting='soft'
)


In [11]:
voting_model.fit(X_train,y_train)

0,1,2
,"estimators  estimators: list of (str, estimator) tuples Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones of those original estimators that will be stored in the class attribute ``self.estimators_``. An estimator can be set to ``'drop'`` using :meth:`set_params`. .. versionchanged:: 0.21  ``'drop'`` is accepted. Using None was deprecated in 0.22 and  support was removed in 0.24.","[('logistic regression', ...), ('svc', ...), ...]"
,"voting  voting: {'hard', 'soft'}, default='hard' If 'hard', uses predicted class labels for majority rule voting. Else if 'soft', predicts the class label based on the argmax of the sums of the predicted probabilities, which is recommended for an ensemble of well-calibrated classifiers.",'soft'
,"weights  weights: array-like of shape (n_classifiers,), default=None Sequence of weights (`float` or `int`) to weight the occurrences of predicted class labels (`hard` voting) or class probabilities before averaging (`soft` voting). Uses uniform weights if `None`.",
,"n_jobs  n_jobs: int, default=None The number of jobs to run in parallel for ``fit``. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary ` for more details. .. versionadded:: 0.18",
,"flatten_transform  flatten_transform: bool, default=True Affects shape of transform output only when voting='soft' If voting='soft' and flatten_transform=True, transform method returns matrix with shape (n_samples, n_classifiers * n_classes). If flatten_transform=False, it returns (n_classifiers, n_samples, n_classes).",True
,"verbose  verbose: bool, default=False If True, the time elapsed while fitting will be printed as it is completed. .. versionadded:: 0.23",False

0,1,2
,"penalty  penalty: {'l1', 'l2', 'elasticnet', None}, default='l2' Specify the norm of the penalty: - `None`: no penalty is added; - `'l2'`: add a L2 penalty term and it is the default choice; - `'l1'`: add a L1 penalty term; - `'elasticnet'`: both L1 and L2 penalty terms are added. .. warning::  Some penalties may not work with some solvers. See the parameter  `solver` below, to know the compatibility between the penalty and  solver. .. versionadded:: 0.19  l1 penalty with SAGA solver (allowing 'multinomial' + L1) .. deprecated:: 1.8  `penalty` was deprecated in version 1.8 and will be removed in 1.10.  Use `l1_ratio` instead. `l1_ratio=0` for `penalty='l2'`, `l1_ratio=1` for  `penalty='l1'` and `l1_ratio` set to any float between 0 and 1 for  `'penalty='elasticnet'`.",'deprecated'
,"C  C: float, default=1.0 Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization. `C=np.inf` results in unpenalized logistic regression. For a visual example on the effect of tuning the `C` parameter with an L1 penalty, see: :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_path.py`.",1.0
,"l1_ratio  l1_ratio: float, default=0.0 The Elastic-Net mixing parameter, with `0 <= l1_ratio <= 1`. Setting `l1_ratio=1` gives a pure L1-penalty, setting `l1_ratio=0` a pure L2-penalty. Any value between 0 and 1 gives an Elastic-Net penalty of the form `l1_ratio * L1 + (1 - l1_ratio) * L2`. .. warning::  Certain values of `l1_ratio`, i.e. some penalties, may not work with some  solvers. See the parameter `solver` below, to know the compatibility between  the penalty and solver. .. versionchanged:: 1.8  Default value changed from None to 0.0. .. deprecated:: 1.8  `None` is deprecated and will be removed in version 1.10. Always use  `l1_ratio` to specify the penalty type.",0.0
,"dual  dual: bool, default=False Dual (constrained) or primal (regularized, see also :ref:`this equation `) formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer `dual=False` when n_samples > n_features.",False
,"tol  tol: float, default=1e-4 Tolerance for stopping criteria.",0.0001
,"fit_intercept  fit_intercept: bool, default=True Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function.",True
,"intercept_scaling  intercept_scaling: float, default=1 Useful only when the solver `liblinear` is used and `self.fit_intercept` is set to `True`. In this case, `x` becomes `[x, self.intercept_scaling]`, i.e. a ""synthetic"" feature with constant value equal to `intercept_scaling` is appended to the instance vector. The intercept becomes ``intercept_scaling * synthetic_feature_weight``. .. note::  The synthetic feature weight is subject to L1 or L2  regularization as all other features.  To lessen the effect of regularization on synthetic feature weight  (and therefore on the intercept) `intercept_scaling` has to be increased.",1
,"class_weight  class_weight: dict or 'balanced', default=None Weights associated with classes in the form ``{class_label: weight}``. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified. .. versionadded:: 0.17  *class_weight='balanced'*",
,"random_state  random_state: int, RandomState instance, default=None Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the data. See :term:`Glossary ` for details.",51
,"solver  solver: {'lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'}, default='lbfgs' Algorithm to use in the optimization problem. Default is 'lbfgs'. To choose a solver, you might want to consider the following aspects: - 'lbfgs' is a good default solver because it works reasonably well for a wide  class of problems. - For :term:`multiclass` problems (`n_classes >= 3`), all solvers except  'liblinear' minimize the full multinomial loss, 'liblinear' will raise an  error. - 'newton-cholesky' is a good choice for  `n_samples` >> `n_features * n_classes`, especially with one-hot encoded  categorical features with rare categories. Be aware that the memory usage  of this solver has a quadratic dependency on `n_features * n_classes`  because it explicitly computes the full Hessian matrix. - For small datasets, 'liblinear' is a good choice, whereas 'sag'  and 'saga' are faster for large ones; - 'liblinear' can only handle binary classification by default. To apply a  one-versus-rest scheme for the multiclass setting one can wrap it with the  :class:`~sklearn.multiclass.OneVsRestClassifier`. .. warning::  The choice of the algorithm depends on the penalty chosen (`l1_ratio=0`  for L2-penalty, `l1_ratio=1` for L1-penalty and `0 < l1_ratio < 1` for  Elastic-Net) and on (multinomial) multiclass support:  ================= ======================== ======================  solver l1_ratio multinomial multiclass  ================= ======================== ======================  'lbfgs' l1_ratio=0 yes  'liblinear' l1_ratio=1 or l1_ratio=0 no  'newton-cg' l1_ratio=0 yes  'newton-cholesky' l1_ratio=0 yes  'sag' l1_ratio=0 yes  'saga' 0<=l1_ratio<=1 yes  ================= ======================== ====================== .. note::  'sag' and 'saga' fast convergence is only guaranteed on features  with approximately the same scale. You can preprocess the data with  a scaler from :mod:`sklearn.preprocessing`. .. seealso::  Refer to the :ref:`User Guide ` for more  information regarding :class:`LogisticRegression` and more specifically the  :ref:`Table `  summarizing solver/penalty supports. .. versionadded:: 0.17  Stochastic Average Gradient (SAG) descent solver. Multinomial support in  version 0.18. .. versionadded:: 0.19  SAGA solver. .. versionchanged:: 0.22  The default solver changed from 'liblinear' to 'lbfgs' in 0.22. .. versionadded:: 1.2  newton-cholesky solver. Multinomial support in version 1.6.",'lbfgs'

0,1,2
,"C  C: float, default=1.0 Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty. For an intuitive visualization of the effects of scaling the regularization parameter C, see :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.",1.0
,"kernel  kernel: {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf' Specifies the kernel type to be used in the algorithm. If none is given, 'rbf' will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape ``(n_samples, n_samples)``. For an intuitive visualization of different kernel types see :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.",'linear'
,"degree  degree: int, default=3 Degree of the polynomial kernel function ('poly'). Must be non-negative. Ignored by all other kernels.",3
,"gamma  gamma: {'scale', 'auto'} or float, default='scale' Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses  1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features - if float, must be non-negative. .. versionchanged:: 0.22  The default value of ``gamma`` changed from 'auto' to 'scale'.",'scale'
,"coef0  coef0: float, default=0.0 Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'.",0.0
,"shrinking  shrinking: bool, default=True Whether to use the shrinking heuristic. See the :ref:`User Guide `.",True
,"probability  probability: bool, default=False Whether to enable probability estimates. This must be enabled prior to calling `fit`, will slow down that method as it internally uses 5-fold cross-validation, and `predict_proba` may be inconsistent with `predict`. Read more in the :ref:`User Guide `.",True
,"tol  tol: float, default=1e-3 Tolerance for stopping criterion.",0.001
,"cache_size  cache_size: float, default=200 Specify the size of the kernel cache (in MB).",200
,"class_weight  class_weight: dict or 'balanced', default=None Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.",

0,1,2
,"criterion  criterion: {""gini"", ""entropy"", ""log_loss""}, default=""gini"" The function to measure the quality of a split. Supported criteria are ""gini"" for the Gini impurity and ""log_loss"" and ""entropy"" both for the Shannon information gain, see :ref:`tree_mathematical_formulation`.",'gini'
,"splitter  splitter: {""best"", ""random""}, default=""best"" The strategy used to choose the split at each node. Supported strategies are ""best"" to choose the best split and ""random"" to choose the best random split.",'best'
,"max_depth  max_depth: int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.",
,"min_samples_split  min_samples_split: int or float, default=2 The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and  `ceil(min_samples_split * n_samples)` are the minimum  number of samples for each split. .. versionchanged:: 0.18  Added float values for fractions.",2
,"min_samples_leaf  min_samples_leaf: int or float, default=1 The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and  `ceil(min_samples_leaf * n_samples)` are the minimum  number of samples for each node. .. versionchanged:: 0.18  Added float values for fractions.",1
,"min_weight_fraction_leaf  min_weight_fraction_leaf: float, default=0.0 The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.",0.0
,"max_features  max_features: int, float or {""sqrt"", ""log2""}, default=None The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and  `max(1, int(max_features * n_features_in_))` features are considered at  each split. - If ""sqrt"", then `max_features=sqrt(n_features)`. - If ""log2"", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. .. note::  The search for a split does not stop until at least one  valid partition of the node samples is found, even if it requires to  effectively inspect more than ``max_features`` features.",
,"random_state  random_state: int, RandomState instance or None, default=None Controls the randomness of the estimator. The features are always randomly permuted at each split, even if ``splitter`` is set to ``""best""``. When ``max_features < n_features``, the algorithm will select ``max_features`` at random at each split before finding the best split among them. But the best found split may vary across different runs, even if ``max_features=n_features``. That is the case, if the improvement of the criterion is identical for several splits and one split has to be selected at random. To obtain a deterministic behaviour during fitting, ``random_state`` has to be fixed to an integer. See :term:`Glossary ` for details.",51
,"max_leaf_nodes  max_leaf_nodes: int, default=None Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.",
,"min_impurity_decrease  min_impurity_decrease: float, default=0.0 A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following::  N_t / N * (impurity - N_t_R / N_t * right_impurity  - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19",0.0


## Análise dos Resultados

In [12]:
y_pred = voting_model.predict(X_test)

In [13]:
y_pred

array([1, 0, 0, ..., 0, 0, 1], shape=(1815,))

In [14]:
# Calcular métricas
accuracy = accuracy_score(y_test,y_pred)
precision = precision_score(y_test,y_pred)
recall = recall_score(y_test,y_pred)
f1 = f1_score(y_test,y_pred)

In [15]:
print(f'Acurácia: {accuracy}')
print(f'Precisão: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')

Acurácia: 0.7862258953168044
Precisão: 0.7203125
Recall: 0.6880597014925374
F1-Score: 0.7038167938931298


In [16]:
# mostrar a matriz de confusao

conf_matrix = confusion_matrix(y_test,y_pred)

fig = px.imshow(conf_matrix,
                labels=dict(x='Predição', y='Real'),
                x=['Not Converted', 'Converted'],
                y=['Not Converted', 'Converted'],
                color_continuous_scale='Viridis' 
)

fig.update_traces(text=conf_matrix, texttemplate='%{z}')
fig.update_layout(coloraxis_showscale=False)

fig.show()

In [17]:
# Carregar as importâncias dos modelos
importances = []

for estimator in voting_model.estimators_:

    # Para modelos lineares, retorna coef
    if hasattr(estimator, 'coef_'):
        importances.append(np.abs(estimator.coef_[0]))
    # Para modelos baseados em árvores
    elif hasattr(estimator,'feature_importances_'):
        importances.append(estimator.feature_importances_)
    else:
        print(f'Não foi possível carregar as importâncias do modelo {type(estimator).__name__}')

        

In [18]:
importancia_media = np.mean(importances,axis=0)

importancia_media

array([0.22523852, 0.03587513, 0.11334446, 0.85206433, 0.08244887,
       0.0072784 , 0.03554339, 0.05729069, 0.04262539, 0.00572446,
       0.03675622, 0.01354635, 0.59017145, 0.62217065, 1.33160526,
       0.1270707 , 0.15406345, 0.332447  , 0.43163126, 0.12453701,
       0.01089549, 0.31246918, 0.32214603, 0.19972264, 0.04277403,
       0.27364251, 0.25843587, 0.35100332, 0.19591516, 0.88615977,
       0.1092695 , 0.33488185, 0.05459321, 0.14172053, 0.08172463,
       0.68320062, 0.60979761, 0.55129466, 0.02167269, 0.1209071 ,
       0.10067165, 0.29920279, 0.25233692, 0.6822895 , 0.48054617,
       0.31871189, 0.3549633 , 0.16075804, 0.22973273, 0.10262642,
       0.05419765, 0.02177073, 0.03318338, 0.18525745, 0.48984408,
       0.1209071 , 0.45366622, 0.1378337 , 0.05822045, 0.60502148,
       0.52612309, 0.57091876, 0.24872689, 0.35476814, 0.35041113,
       0.90905856, 0.02855717, 0.25441671])

In [19]:
feature_names = preprocessor.get_feature_names_out()
feature_names

array(['num__Do Not Email', 'num__Do Not Call', 'num__TotalVisits',
       'num__Total Time Spent on Website', 'num__Page Views Per Visit',
       'num__Search', 'num__Newspaper Article', 'num__X Education Forums',
       'num__Newspaper', 'num__Digital Advertisement',
       'num__Through Recommendations',
       'num__A free copy of Mastering The Interview',
       'cat__Lead Origin_API', 'cat__Lead Origin_Landing Page Submission',
       'cat__Lead Origin_Lead Add Form', 'cat__Lead Origin_Lead Import',
       'cat__Lead Source_Click2call', 'cat__Lead Source_Direct Traffic',
       'cat__Lead Source_Facebook', 'cat__Lead Source_Google',
       'cat__Lead Source_Live Chat', 'cat__Lead Source_NC_EDM',
       'cat__Lead Source_Olark Chat', 'cat__Lead Source_Organic Search',
       'cat__Lead Source_Pay per Click Ads', 'cat__Lead Source_Reference',
       'cat__Lead Source_Referral Sites', 'cat__Lead Source_Social Media',
       'cat__Lead Source_WeLearn', 'cat__Lead Source_Welingak Webs

In [20]:
df_feature_importances = pd.DataFrame({'Feature': feature_names, 'Importance': importancia_media})

df_feature_importances

Unnamed: 0,Feature,Importance
0,num__Do Not Email,0.225239
1,num__Do Not Call,0.035875
2,num__TotalVisits,0.113344
3,num__Total Time Spent on Website,0.852064
4,num__Page Views Per Visit,0.082449
...,...,...
63,cat__Last Notable Activity_Resubscribed to emails,0.354768
64,cat__Last Notable Activity_SMS Sent,0.350411
65,cat__Last Notable Activity_Unreachable,0.909059
66,cat__Last Notable Activity_Unsubscribed,0.028557


In [21]:
df_feature_importances = df_feature_importances.sort_values(by='Importance', ascending=True)
df_feature_importances

Unnamed: 0,Feature,Importance
9,num__Digital Advertisement,0.005724
5,num__Search,0.007278
20,cat__Lead Source_Live Chat,0.010895
11,num__A free copy of Mastering The Interview,0.013546
38,cat__Last Activity_Email Link Clicked,0.021673
...,...,...
35,cat__Last Activity_Approached upfront,0.683201
3,num__Total Time Spent on Website,0.852064
29,cat__Lead Source_Welingak Website,0.886160
65,cat__Last Notable Activity_Unreachable,0.909059


In [22]:
fig = px.bar(df_feature_importances,
             x='Importance',
             y='Feature',
             orientation='h',
             title='Importância das Features (Voting Classifier)'
)

fig.update_layout(height=1280, width=1000)
fig.show()

## Propriedades do Modelo

In [25]:
X_test[7]

array([-0.28575686, -0.01660108,  0.49682342, -0.80365386,  0.29349295,
       -0.03895716, -0.01660108, -0.01173793, -0.01173793, -0.02033349,
       -0.02876185, -0.67670029,  0.        ,  1.        ,  0.        ,
        0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
        0.        ,  0.        ,  0.        ])

In [None]:
# Mostrar evidências
# Selecionar um registro da base para fazer a predição

X_sample = X_test[7].reshape(1,-1)

# Predições individuais dos estimadores
log_pred = voting_model.named_estimators_['logistic regression'].predict(X_sample)
svc_pred = voting_model.named_estimators_['svc'].predict(X_sample)
tree_pred = voting_model.named_estimators_['decision tree'].predict(X_sample)

voting_pred = voting_model.predict(X_sample)


In [26]:
X_sample

array([[-0.28575686, -0.01660108,  0.49682342, -0.80365386,  0.29349295,
        -0.03895716, -0.01660108, -0.01173793, -0.01173793, -0.02033349,
        -0.02876185, -0.67670029,  0.        ,  1.        ,  0.        ,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  1.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ,  0.        ,  0.        ]])

In [24]:
print(f'Predição da Regressão Logística: {log_pred[0]}')
print(f'Predição do SVC: {svc_pred[0]}')
print(f'Predição da Árvore de Decisão: {tree_pred[0]}')
print(f'Predição do Voting model: {voting_pred[0]}')


Predição da Regressão Logística: 0
Predição do SVC: 0
Predição da Árvore de Decisão: 1
Predição do Voting model: 1


In [31]:
# Mostrar evidências
# Selecionar um registro da base para fazer a predição

X_sample = X_test[7].reshape(1,-1)

# Predições individuais dos estimadores
log_proba = voting_model.named_estimators_['logistic regression'].predict_proba(X_sample)
svc_proba = voting_model.named_estimators_['svc'].predict_proba(X_sample)
tree_proba = voting_model.named_estimators_['decision tree'].predict_proba(X_sample)

voting_proba = voting_model.predict_proba(X_sample)


In [32]:
print(f'Predição da Regressão Logística: {log_proba[0]}')
print(f'Predição do SVC: {svc_proba[0]}')
print(f'Predição da Árvore de Decisão: {tree_proba[0]}')
print(f'Predição do Voting model: {voting_proba[0]}')


Predição da Regressão Logística: [0.67050795 0.32949205]
Predição do SVC: [0.69534976 0.30465024]
Predição da Árvore de Decisão: [0. 1.]
Predição do Voting model: [0.4552859 0.5447141]
