In [1]:
# Imports
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
from sklearn.ensemble import VotingClassifier, BaggingClassifier, AdaBoostClassifier

In [2]:
# Leitura e exibição do dataset
df = pd.read_csv("data_cancer.csv")
df

Unnamed: 0,Sample code number,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2
...,...,...,...,...,...,...,...,...,...,...,...
678,776715,3,1,1,1,3,2,1,1,1,2
679,841769,2,1,1,1,2,1,1,1,1,2
680,888820,5,10,10,3,7,3,8,10,2,4
681,897471,4,8,6,4,3,4,10,6,1,4


In [3]:
# Separação do dataset em X e y
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [4]:
# Normalização de X
sc = StandardScaler()
X_norm = sc.fit_transform(X)

In [5]:
# Separação entre treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.2, random_state=0)

In [6]:
# Criação, treino e exibição dos resultados utilizando AdaBoost
boost_class = AdaBoostClassifier()
boost_class.fit(X_train, y_train)
y_boost = boost_class.predict(X_test)
print(classification_report(y_test, y_boost))

              precision    recall  f1-score   support

           2       0.98      0.97      0.97        87
           4       0.94      0.96      0.95        50

    accuracy                           0.96       137
   macro avg       0.96      0.96      0.96       137
weighted avg       0.96      0.96      0.96       137



In [7]:
# Criação, treino e exibição dos resultados utilizando o método de bagging
bag_class = BaggingClassifier()
bag_class.fit(X_train, y_train)
y_bag = bag_class.predict(X_test)
print(classification_report(y_test, y_bag))

              precision    recall  f1-score   support

           2       0.97      0.95      0.96        87
           4       0.92      0.94      0.93        50

    accuracy                           0.95       137
   macro avg       0.94      0.95      0.95       137
weighted avg       0.95      0.95      0.95       137



In [8]:
# Criação do modelo de voting classifier
voting_classifier = VotingClassifier([
    ('KNN', KNeighborsClassifier()),
    ('Tree', DecisionTreeClassifier()),
    ('SVM', SVC())
])

In [9]:
# Criação do param grid para o grid search
param_grid = {
    "KNN__n_neighbors": [3, 5, 7],
    "Tree__max_depth": [10, 50, 90],
    "Tree__criterion": ["gini", "entropy"],
    "SVM__kernel": ["linear", "rbf"],
    "SVM__C": [0.1, 100, 500],
    "SVM__gamma": [0.1, 100, 500]
}

In [10]:
# Criação, treino e exibição dos resultados para o método de grid search utilizando o voting classifier
grid_class = GridSearchCV(voting_classifier, param_grid, verbose=1)
grid_class.fit(X_train, y_train)
y_grid = grid_class.predict(X_test)
print(classification_report(y_test, y_grid))

Fitting 5 folds for each of 324 candidates, totalling 1620 fits


              precision    recall  f1-score   support

           2       0.99      0.95      0.97        87
           4       0.92      0.98      0.95        50

    accuracy                           0.96       137
   macro avg       0.96      0.97      0.96       137
weighted avg       0.96      0.96      0.96       137

