In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
column_names = ['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean',
                'compactness_mean', 'concavity_mean', 'concave_points_mean', 'symmetry_mean',
                'fractal_dimension_mean', 'radius_se', 'texture_se', 'perimeter_se', 'area_se',
                'smoothness_se', 'compactness_se', 'concavity_se', 'concave_points_se', 'symmetry_se',
                'fractal_dimension_se', 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst',
                'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave_points_worst',
                'symmetry_worst', 'fractal_dimension_worst']
data = pd.read_csv(url, header=None, names=column_names)

In [3]:
# Drop unnecessary columns
data.drop('id', axis=1, inplace=True)

In [4]:
# Convert the diagnosis column to binary labels (Malignant = 1, Benign = 0)
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})

In [5]:
# Split the data into features (X) and target (y)
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# SVM model
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_y_pred = svm_model.predict(X_test)

In [9]:
# Random Forest model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)

In [10]:
# Calculate accuracy
svm_accuracy = accuracy_score(y_test, svm_y_pred)
rf_accuracy = accuracy_score(y_test, rf_y_pred)

In [11]:
print("SVM Accuracy:", svm_accuracy)
print("Random Forest Accuracy:", rf_accuracy)

SVM Accuracy: 0.9824561403508771
Random Forest Accuracy: 0.956140350877193


In [12]:
# Create confusion matrices
svm_confusion_mtx = confusion_matrix(y_test, svm_y_pred)
rf_confusion_mtx = confusion_matrix(y_test, rf_y_pred)

print("SVM Confusion Matrix:")
print(svm_confusion_mtx)

print("Random Forest Confusion Matrix:")
print(rf_confusion_mtx)


SVM Confusion Matrix:
[[71  0]
 [ 2 41]]
Random Forest Confusion Matrix:
[[69  2]
 [ 3 40]]
