In [90]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier as kNN
from sklearn.svm import SVC as SVM
from sklearn.metrics import accuracy_score, confusion_matrix

In [77]:
pd.set_option('display.max_columns', None)
data_breast_cancer = load_breast_cancer()
data = pd.DataFrame(data = data_breast_cancer.data, columns = data_breast_cancer.feature_names)
data['diagnosis'] = data_breast_cancer.target
data.info() # hurray all data is numerical
X = data.drop(columns = 'diagnosis').values
y = data['diagnosis'].values

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         5

In [81]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
outliers = np.abs((y_train - y_train.mean()) / y_train.std()) > 3
X_train_no_outliers = X_train[~outliers, :]
y_train_no_outliers = y_train[~outliers]

In [83]:
scaler = StandardScaler()
scaler.fit(X_train_no_outliers)
X_train = scaler.transform(X_train_no_outliers)
X_test = scaler.transform(X_test)

In [92]:
models = [kNN(), SVM()]
for model in models:
    model.fit(X_train, y_train_no_outliers)
    y_pred = model.predict(X_test)
    result = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    print(cm)
    print(result)

[[47  5]
 [ 1 61]]
0.9473684210526315
[[50  2]
 [ 0 62]]
0.9824561403508771
