# Predicting Breast Cancer Class Using Support Vector Classification

## Importing the libraries

In [249]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [250]:
dataset = pd.read_csv('breast-cancer.data')
X = dataset.iloc[:, 1:].values
y = dataset.iloc[:, 0].values

## Filling in Missing Values

In [251]:
from sklearn.impute import SimpleImputer
imputer1 = SimpleImputer(missing_values="?", strategy = "most_frequent")
X_new1 = X[:, -5].reshape(-1, 1)
imputer1.fit(X_new1)
X_new1 = imputer1.transform(X_new1)

imputer2 = SimpleImputer(missing_values="?", strategy = "most_frequent")
X_new2 = X[:, -2].reshape(-1, 1)
imputer1.fit(X_new2)
X_new2 = imputer1.transform(X_new2)


In [252]:
X[:, -2]

array(['right_up', 'left_low', 'left_up', 'right_low', 'left_low',
       'left_low', 'left_low', 'left_low', 'left_up', 'central',
       'left_low', 'right_up', 'right_up', 'left_up', 'left_low',
       'left_low', 'left_low', 'left_low', 'right_up', 'left_low',
       'left_low', 'left_low', 'left_up', 'left_up', 'left_up',
       'left_low', 'left_up', 'left_low', 'left_low', 'left_up',
       'left_low', 'left_low', 'left_up', 'left_up', 'left_up', 'central',
       'central', 'left_low', 'left_low', 'left_low', 'left_low',
       'central', 'left_up', 'right_up', 'left_up', 'left_low', 'left_up',
       'right_up', 'left_low', 'left_low', 'right_low', 'left_low',
       'left_up', 'left_up', 'right_up', 'central', 'right_up', 'left_up',
       'left_up', 'central', 'right_low', 'left_low', 'right_up',
       'right_low', 'left_up', 'right_low', 'left_up', 'left_low',
       'left_low', 'left_low', 'left_up', 'left_up', 'right_low',
       'right_low', 'left_low', 'left_low', 'cen

In [253]:
X[:, -5]

array(['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no',
       'no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'yes',
       'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'yes', 'yes', 'no',
       'no', 'yes', '?', 'no', 'yes', 'no', 'no',

## Categorical Encoding for Matrix of Features (Label Encoding)

In [254]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 0] = le.fit_transform(X[:, 0])

le2 = LabelEncoder()
X[:, 1] = le2.fit_transform(X[:, 1])

le3 = LabelEncoder()
X[:, 2] = le3.fit_transform(X[:, 2])

le4 = LabelEncoder()
X[:, 3] = le4.fit_transform(X[:, 3])

le5 = LabelEncoder()
X[:, -5] = le5.fit_transform(X[:, -5])

le6 = LabelEncoder()
X[:, -3] = le4.fit_transform(X[:, -3])

le7 = LabelEncoder()
X[:, -1] = le7.fit_transform(X[:, -1])



## Categorical Encoding for Matrix of Features (OneHot Encoding)

In [255]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder1', OneHotEncoder(), [-2])], 
                                     remainder = "passthrough")
X = np.array(ct.fit_transform(X))

## Splitting the dataset into the Training set and Test set

In [256]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [257]:
print(X_train)

[[0.0 0.0 1.0 ... 2 1 1]
 [0.0 0.0 0.0 ... 2 0 1]
 [0.0 0.0 0.0 ... 2 0 0]
 ...
 [0.0 0.0 1.0 ... 1 1 0]
 [0.0 0.0 0.0 ... 1 1 0]
 [0.0 0.0 0.0 ... 2 1 0]]


In [258]:
print(X_test)

[[0.0 1.0 0.0 ... 1 1 0]
 [0.0 0.0 1.0 ... 1 1 1]
 [0.0 0.0 0.0 ... 2 1 0]
 ...
 [0.0 0.0 0.0 ... 3 1 0]
 [0.0 0.0 0.0 ... 2 1 1]
 [0.0 0.0 0.0 ... 3 0 0]]


In [259]:
print(y_train)

['recurrence-events' 'no-recurrence-events' 'recurrence-events'
 'recurrence-events' 'no-recurrence-events' 'recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'recurrence-events' 'recurrence-events'
 'recurrence-events' 'no-recurrence-events' 'recurrence-events'
 'recurrence-events' 'recurrence-events' 'recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-eve

In [260]:
print(y_test)

['no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'recurrence-events' 'no-recurrence-events'
 'recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'recurrence-events' 'no-recurrence-events' 'recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'recurrence-events'
 'recurrence-events' 'no-recurrence-events' 'recurrence-events'
 'recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'recurrence-events' 'no-recurrence-events' 'recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'no-recurrence-events' 'no-recurrence-events'
 'no-recurrence-events' 'recurrence-events' 'recurrence-eve

## Feature Scaling

In [261]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [262]:
print(X_train)

[[ 0.         -0.27524094  1.32899728 ... -0.02523873  1.07308674
   1.78226558]
 [ 0.         -0.27524094 -0.75244699 ... -0.02523873 -0.93189112
   1.78226558]
 [ 0.         -0.27524094 -0.75244699 ... -0.02523873 -0.93189112
  -0.56108361]
 ...
 [ 0.         -0.27524094  1.32899728 ... -1.36920099  1.07308674
  -0.56108361]
 [ 0.         -0.27524094 -0.75244699 ... -1.36920099  1.07308674
  -0.56108361]
 [ 0.         -0.27524094 -0.75244699 ... -0.02523873  1.07308674
  -0.56108361]]


In [263]:
print(X_test)

[[ 0.          3.63318042 -0.75244699 ... -1.36920099  1.07308674
  -0.56108361]
 [ 0.         -0.27524094  1.32899728 ... -1.36920099  1.07308674
   1.78226558]
 [ 0.         -0.27524094 -0.75244699 ... -0.02523873  1.07308674
  -0.56108361]
 ...
 [ 0.         -0.27524094 -0.75244699 ...  1.31872354  1.07308674
  -0.56108361]
 [ 0.         -0.27524094 -0.75244699 ... -0.02523873  1.07308674
   1.78226558]
 [ 0.         -0.27524094 -0.75244699 ...  1.31872354 -0.93189112
  -0.56108361]]


## Training the SVM model on the Training set

In [264]:
from sklearn.svm import SVC
classifier = SVC(C=1.0, kernel='rbf', random_state=0)
classifier.fit(X_train, y_train)

SVC(random_state=0)

## Making the Confusion Matrix

In [265]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[44  2]
 [18  8]]


0.7222222222222222