In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset 
breast_cancer_wisconsin_diagnostic = fetch_ucirepo(id=17)

# data (as pandas dataframes) 
input_data = breast_cancer_wisconsin_diagnostic.data.features
output_data = breast_cancer_wisconsin_diagnostic.data.targets

In [None]:
input_data

In [None]:
output_data

In [None]:
input_data_list = [[r1, r2, r3, t1, t2, t3] for r1, r2, r3, t1, t2, t3 in
                   zip(input_data.radius1, input_data.radius2, input_data.radius3, input_data.texture1,
                       input_data.texture2, input_data.texture3)]
print(*input_data_list, sep="\n")

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(np.array(input_data_list))

normalized_input_data = scaler.transform(input_data_list)
print(*normalized_input_data, sep="\n")

In [None]:
output_data_array = [0 if diag == "B" else 1 for diag in output_data.Diagnosis]
print(output_data_array)

In [None]:
from random import shuffle
from sklearn.linear_model import SGDClassifier

model = SGDClassifier()

indexes = [i for i in range(len(input_data))]
shuffle(indexes)
train_indexes = indexes[:int(0.75 * len(input_data))]
test_indexes = indexes[int(0.75 * len(input_data)):]

input_train_data = [normalized_input_data[i] for i in range(len(normalized_input_data)) if i in train_indexes]
output_train_data = [output_data_array[i] for i in range(len(output_data_array)) if i in train_indexes]
input_test_data = [normalized_input_data[i] for i in range(len(normalized_input_data)) if i in test_indexes]
output_test_data = [output_data_array[i] for i in range(len(output_data_array)) if i in test_indexes]

model.fit(input_train_data, output_train_data)

predicted = model.predict(input_test_data)
print(predicted)

In [None]:
from sklearn.metrics import confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns

cm = confusion_matrix(output_test_data, predicted)

fpr, tpr, thresholds = roc_curve(output_test_data, predicted)
roc_auc = auc(fpr, tpr)

In [None]:
plt.figure(figsize=(5, 5))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square=True, cmap='Blues', xticklabels=["B", "M"], yticklabels=["B", "M"])
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
plt.title('Confusion Matrix', size=15)
plt.show()

In [None]:
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error

error = mean_squared_error(predicted, output_test_data)
print(f"The error is {error}")

In [None]:
result = model.predict([[18, 18, 18, 10, 10, 10]])
if result[0] == 1:
    print("Malign")
else:
    print("Benign")