# **Classification Techniques on Breast Cancer Wisconsin (Diagnostic) dataset**

Import the necessary libraries

In [28]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


Load the data and split train and test sample.

In [29]:
# Load the Breast Cancer Wisconsin (Diagnostic) dataset
df = load_breast_cancer()
# Separate the input features and target labels
X = df.data
y = df.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize the data using the training set mean and variance
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)


**Using KNN Classifier with k=3**

In [30]:
# KNN classifier
kNN_Classifier = KNeighborsClassifier(n_neighbors=3)
kNN_Classifier.fit(X_train_std, y_train)
kNN_prediction = kNN_Classifier.predict(X_test_std )


In [31]:
# Calculate accuracy, precision, recall, and F1 score for each classifier
print("KNN Classifier:")
print("Accuracy: ", accuracy_score(y_test, kNN_prediction))
print("Precision: ", precision_score(y_test, kNN_prediction, average='macro'))
print("Recall: ", recall_score(y_test, kNN_prediction, average='macro'))
print("F1 Score: ", f1_score(y_test, kNN_prediction, average='macro'))

KNN Classifier:
Accuracy:  0.9473684210526315
Precision:  0.9439895185063871
Recall:  0.9439895185063871
F1 Score:  0.9439895185063871


**Using K Means clustering with k=3**

In [32]:
# KMeans clustering
KMeans = KMeans(n_clusters=3, random_state=42)
KMeans.fit(X_train_std)
KMeans_prediction = KMeans.predict(X_test_std)



In [33]:
print("KMeans Clustering:")
print("Accuracy: ", accuracy_score(y_test, KMeans_prediction))
print("Precision: ", precision_score(y_test, KMeans_prediction, average='macro'))
print("Recall: ", recall_score(y_test, KMeans_prediction, average='macro'))
print("F1 Score: ", f1_score(y_test, KMeans_prediction, average='macro'))

KMeans Clustering:
Accuracy:  0.08771929824561403
Precision:  0.12263535551206783
Recall:  0.06529097062998145
F1 Score:  0.0658553076402975


  _warn_prf(average, modifier, msg_start, len(result))


**Using Logistic Regression to make probability estimates for each classification**

In [34]:
# Logistic regression classifier
logisticReg_model = LogisticRegression(random_state=42)
logisticReg_model.fit(X_train_std, y_train)
logisticReg_prediction = logisticReg_model.predict(X_test_std)

In [35]:
print("\nLogistic Regression Classifier:")
print("Accuracy: ", accuracy_score(y_test, logisticReg_prediction))
print("Precision: ", precision_score(y_test, logisticReg_prediction, average='macro'))
print("Recall: ", recall_score(y_test, logisticReg_prediction, average='macro'))
print("F1 Score: ", f1_score(y_test, logisticReg_prediction, average='macro'))


Logistic Regression Classifier:
Accuracy:  0.9736842105263158
Precision:  0.9742063492063492
Recall:  0.9697019325253848
F1 Score:  0.97186343068696


**Support Vector Machine Classifier**

In [36]:
# SVM classifier
SVM_model = SVC(kernel='linear', random_state=42)
SVM_model.fit(X_train_std, y_train)
SVM_prediction = SVM_model.predict(X_test_std)

In [37]:
print("\nSVM Classifier:")
print("Accuracy: ", accuracy_score(y_test, SVM_prediction))
print("Precision: ", precision_score(y_test, SVM_prediction, average='macro'))
print("Recall: ", recall_score(y_test, SVM_prediction, average='macro'))
print("F1 Score: ", f1_score(y_test, SVM_prediction, average='macro'))


SVM Classifier:
Accuracy:  0.956140350877193
Precision:  0.9516233766233766
Recall:  0.9556174254831313
F1 Score:  0.9535338713621913
