In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix


# Cancer dataset example

## Data show case

In [None]:
data = load_breast_cancer()

In [None]:
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
df['name'] = data.target_names[df['target']]
df.head()

In [None]:
plt.figure(figsize=(10, 8))
colors = {0: 'red', 1: 'blue'}

for target in df['target'].unique():
    subset = df[df['target'] == target]
    plt.scatter(subset['mean radius'], subset['mean texture'],
                label=data.target_names[target],
                color=colors[target],
                alpha=0.7)

plt.xlabel('Mean Radius')
plt.ylabel('Mean Texture')
plt.title('Cancer Data')
plt.legend(loc='best')
plt.show()

In [None]:
X = data.data[:, :2]
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## SVM

In [None]:
svm_clf = SVC(kernel='linear', probability=True, random_state=42)

svm_clf.fit(X_train, y_train)

In [None]:
y_pred = svm_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

#count = 0
#for line i in data count += y_pred[i] == y_test[i] ? 1 : 0
#acc = count/total
print("Accuracy: {:.2f}%".format(accuracy*100))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=data.target_names))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
#boundaries
margin = 0
x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin

#Coloring the background
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))
grid_points = np.c_[xx.ravel(), yy.ravel()]

Z = svm_clf.predict(grid_points)
Z = Z.reshape(xx.shape)

plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)

# Plot the data points for the two classes
plt.scatter(X[y == 0, 0], X[y == 0, 1], color='red', label=data.target_names[0], edgecolor='k', s=50)
plt.scatter(X[y == 1, 0], X[y == 1, 1], color='blue', label=data.target_names[1], edgecolor='k', s=50)



plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xlabel("Mean Radius")
plt.ylabel("Mean Texture")
plt.title("SVM Decision Boundary with Colored Zones on Cancer Data")
plt.legend()
plt.show()


## Naive Bayes

In [None]:
nb_clf = MultinomialNB()

nb_clf.fit(X_train, y_train)

In [None]:
y_pred = nb_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy: {:.2f}%".format(accuracy*100))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=data.target_names))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
#boundaries
margin = 0.5
x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin

#Coloring the background
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))
grid_points = np.c_[xx.ravel(), yy.ravel()]

Z = nb_clf.predict(grid_points)
Z = Z.reshape(xx.shape)

plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)

# Plot the data points for the two classes
plt.scatter(X[y == 0, 0], X[y == 0, 1], color='red', label=data.target_names[0], edgecolor='k', s=50)
plt.scatter(X[y == 1, 0], X[y == 1, 1], color='blue', label=data.target_names[1], edgecolor='k', s=50)



plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xlabel("Mean Radius")
plt.ylabel("Mean Texture")
plt.title("NB Decision Boundary with Colored Zones on Cancer Data")
plt.legend()
plt.show()

## KNN

In [None]:
knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(X_train, y_train)

In [None]:
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy: {:.2f}%".format(accuracy*100))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=data.target_names))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
#boundaries
margin = 0.5
x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin

#Coloring the background
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))
grid_points = np.c_[xx.ravel(), yy.ravel()]

Z = knn.predict(grid_points)
Z = Z.reshape(xx.shape)

plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)

# Plot the data points for the two classes
plt.scatter(X[y == 0, 0], X[y == 0, 1], color='red', label=data.target_names[0], edgecolor='k', s=50)
plt.scatter(X[y == 1, 0], X[y == 1, 1], color='blue', label=data.target_names[1], edgecolor='k', s=50)



plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xlabel("Mean Radius")
plt.ylabel("Mean Texture")
plt.title("KNN(k = 3) Decision Boundary with Colored Zones on Cancer Data")
plt.legend()
plt.show()