In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [3]:
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('target', axis=1), df['target'], test_size=0.2, random_state=42)


In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
print(df.head())

   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter  worst area  \
0             

Classification algorithm implementation

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Initialize the LogisticRegression object
lr_model = LogisticRegression(max_iter=1000)

#Fit the model to the training data
lr_model.fit(X_train_scaled, y_train)

#Make predictions on the testing data
lr_pred = lr_model.predict(X_test_scaled)

#Print the accuracy of the model
print("Logistic Regression Accuracy:", accuracy_score(y_test, lr_pred))

#Print the classification report
print("Classification Report:")
print(classification_report(y_test, lr_pred))

#Print the confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, lr_pred))

Logistic Regression Accuracy: 0.9736842105263158
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Confusion Matrix:
[[41  2]
 [ 1 70]]


In [15]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Initialize the DecisionTreeClassifier object
dt_model = DecisionTreeClassifier(random_state=42)

#Fit the model to the training data
dt_model.fit(X_train_scaled, y_train)

#Make predictions on the testing data
dt_pred = dt_model.predict(X_test_scaled)

#Print the accuracy of the model
print("Decision Tree Classifier Accuracy:", accuracy_score(y_test, dt_pred))

#Print the classification report
print("Decision Tree Classifier Classification Report:")
print(classification_report(y_test, dt_pred))

#Print the confusion matrix
print("Decision Tree Classifier Confusion Matrix:")
print(confusion_matrix(y_test, dt_pred))

Decision Tree Classifier Accuracy: 0.9473684210526315
Decision Tree Classifier Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

Decision Tree Classifier Confusion Matrix:
[[40  3]
 [ 3 68]]


In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Initialize the RandomForestClassifier object
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

#Fit the model to the training data
rf_model.fit(X_train_scaled, y_train)

#Make predictions on the testing data
rf_pred = rf_model.predict(X_test_scaled)

#Print the accuracy of the model
print("Random Forest Classifier Accuracy:", accuracy_score(y_test, rf_pred))

#Print the classification report
print("Random Forest Classifier Classification Report:")
print(classification_report(y_test, rf_pred))

#Print the confusion matrix
print("Random Forest Classifier Confusion Matrix:")
print(confusion_matrix(y_test, rf_pred))


Random Forest Classifier Accuracy: 0.9649122807017544
Random Forest Classifier Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

Random Forest Classifier Confusion Matrix:
[[40  3]
 [ 1 70]]


In [19]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Initialize the SVC object
svm_model = SVC(kernel='linear', probability=True, random_state=42)

#Fit the model to the training data
svm_model.fit(X_train_scaled, y_train)

#Make predictions on the testing data
svm_pred = svm_model.predict(X_test_scaled)

#Print the accuracy of the model
print("Support Vector Machine Accuracy:", accuracy_score(y_test, svm_pred))

#Print the classification report
print("Support Vector Machine Classification Report:")
print(classification_report(y_test, svm_pred))

#Print the confusion matrix
print("Support Vector Machine Confusion Matrix:")
print(confusion_matrix(y_test, svm_pred))

Support Vector Machine Accuracy: 0.956140350877193
Support Vector Machine Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.95      0.94        43
           1       0.97      0.96      0.96        71

    accuracy                           0.96       114
   macro avg       0.95      0.96      0.95       114
weighted avg       0.96      0.96      0.96       114

Support Vector Machine Confusion Matrix:
[[41  2]
 [ 3 68]]


In [21]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Initialize the KNeighborsClassifier object
knn_model = KNeighborsClassifier(n_neighbors=5)

#Fit the model to the training data
knn_model.fit(X_train_scaled, y_train)

#Make predictions on the testing data
knn_pred = knn_model.predict(X_test_scaled)

#Print the accuracy of the model
print("k-Nearest Neighbors Accuracy:", accuracy_score(y_test, knn_pred))

#Print the classification report
print("k-Nearest Neighbors Classification Report:")
print(classification_report(y_test, knn_pred))

#Print the confusion matrix
print("k-Nearest Neighbors Confusion Matrix:")
print(confusion_matrix(y_test, knn_pred))

k-Nearest Neighbors Accuracy: 0.9473684210526315
k-Nearest Neighbors Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

k-Nearest Neighbors Confusion Matrix:
[[40  3]
 [ 3 68]]


In [23]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Calculate the accuracy of each model
lr_accuracy = accuracy_score(y_test, lr_pred)
dt_accuracy = accuracy_score(y_test, dt_pred)
rf_accuracy = accuracy_score(y_test, rf_pred)
svm_accuracy = accuracy_score(y_test, svm_pred)
knn_accuracy = accuracy_score(y_test, knn_pred)

#Print the accuracy of each model
print("Logistic Regression Accuracy:", lr_accuracy)
print("Decision Tree Accuracy:", dt_accuracy)
print("Random Forest Accuracy:", rf_accuracy)
print("SVM Accuracy:", svm_accuracy)
print("k-NN Accuracy:", knn_accuracy)

Logistic Regression Accuracy: 0.9736842105263158
Decision Tree Accuracy: 0.9473684210526315
Random Forest Accuracy: 0.9649122807017544
SVM Accuracy: 0.956140350877193
k-NN Accuracy: 0.9473684210526315
