In [None]:
import pandas as pd
import sklearn
from sklearn import model_selection, svm, metrics
import numpy as np
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
import pickle

In [None]:
breastCancer_data = pd.read_csv("./BreastCancer-Wisconsin/breast-cancer-wisconsin.data")
#print(breastCancer_data)

classes = ['malignant', 'benign']
features = ["clump_thickness", "cell_size_uniformity", "cell_shape_uniformity", "marginal_adhesion",
            "cell_size_epithelial", "bland_chromatin", "normal_nucleoli", "mitoses"]

X = breastCancer_data.drop(["id_number","bare_nuclei", "class"], axis=1)
Y = breastCancer_data["class"]
#print(X)
#print(Y)

In [None]:
accuracy = 0.0
while (accuracy < 1):
    x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X.values, Y.values, test_size=0.2)
    model = svm.SVC(kernel='rbf')
    model.fit(x_train, y_train)
    predictions = model.predict(x_test)
    accuracy = metrics.accuracy_score(y_test, predictions)
    if accuracy == 1:
        file = open('best-model.pkl', 'wb')
        pickle.dump(model, file)
        pickle.dump(x_train, file)
        pickle.dump(x_test, file)
        pickle.dump(y_train, file)
        pickle.dump(y_test, file)
        file.close()
print("Accuracy: ", accuracy*100, "%")

print("Features \t\t\t\t\tPredicted \t\t Target")
for (x, p, y) in zip(x_test, predictions, y_test):
    print(x, "\t\t\t", classes[int(p / 2) - 1], "\t\t", classes[int(y / 2) - 1])

In [None]:
file = open('best-model.pkl', 'rb')
model = pickle.load(file)
x_train = pickle.load(file)
x_test = pickle.load(file)
y_train = pickle.load(file)
y_test = pickle.load(file)
file.close()

predictions = model.predict(x_test)
accuracy = metrics.accuracy_score(y_test, predictions)
print("Accuracy: ", accuracy*100, "%")

print("Features \t\t\t\t\tPredicted \t\t Target")
for (x, p, y) in zip(x_test, predictions, y_test):
    print(x, "\t\t\t", classes[int(p / 2) - 1], "\t\t", classes[int(y / 2) - 1])

### Plotting 2 features at a time

In [None]:
def getGraph(kernel='linear'):
    X1 = breastCancer_data[["clump_thickness", "cell_size_uniformity"]]
    Y1 = breastCancer_data["class"]
    accuracy = 0.0
    while (accuracy <= 0.97):
        x_train1, x_test1, y_train1, y_test1 = sklearn.model_selection.train_test_split(X1.values, Y1.values, test_size=0.2)
        model1 = svm.SVC(kernel=kernel)
        model1.fit(x_train1, y_train1)
        predictions = model1.predict(x_test1)
        accuracy = metrics.accuracy_score(y_test1, predictions)
    plot_decision_regions(X=X1.values,
                          y=Y1.values,
                          clf=model1,
                          legend=2)

    # Update plot object with X/Y axis labels and Figure Title
    plt.xlabel(X1.columns[0], size=14)
    plt.ylabel(X1.columns[1], size=14)
    plt.title('SVM - '+kernel+' Kernel', size=16)
    plt.show()

##### Linear Kernel

In [None]:
getGraph(kernel='linear')

##### RBF Kernel

In [None]:
getGraph(kernel='rbf')

##### Poly Kernel

In [None]:
getGraph(kernel='poly')