# Application and Comparison Between Random Forest and SVM Classifiers on Breast Cancer Dataset

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [4]:
# Load the dataset

data = pd.read_csv('Breast_Cancer.csv', header=None)
data.columns = ["id", "diagnosis"] + ["feature_" + str(i) for i in range(1, 31)]

# Seperate features and labels
x = data.iloc[:, 2:]
y = data["diagnosis"]


In [6]:
# Split the dataset into training and testing sets

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.3, random_state=42)

# Training and Predictions

In [7]:
# Random Forest Classifier

rf = RandomForestClassifier(random_state=42)
rf.fit(x_train, y_train)
rf_preds = rf.predict(x_test)


print("Random Forest Classifier Results:")
print("Accuracy:", accuracy_score(y_test, rf_preds))
print("Classification report:\n", classification_report(y_test, rf_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, rf_preds))
print("-"*50)


Random Forest Classifier Results:
Accuracy: 0.9707602339181286
Classification report:
               precision    recall  f1-score   support

           B       0.96      0.99      0.98       108
           M       0.98      0.94      0.96        63

    accuracy                           0.97       171
   macro avg       0.97      0.96      0.97       171
weighted avg       0.97      0.97      0.97       171

Confusion Matrix:
 [[107   1]
 [  4  59]]
--------------------------------------------------


In [10]:
# Support Vector Machines Classifier
svc=SVC(kernel= 'linear', random_state=42)
svc.fit(x_train, y_train)
svc_preds = svc.predict(x_test)
print("Support Vector Machines Classifier Results:")
print("Accuracy:", accuracy_score(y_test, svc_preds))
print("Classification Report:\n", classification_report(y_test, svc_preds))
print("Confusion Matrix:\n", confusion_matrix(y_test, svc_preds))

Support Vector Machines Classifier Results:
Accuracy: 0.9649122807017544
Classification Report:
               precision    recall  f1-score   support

           B       0.96      0.98      0.97       108
           M       0.97      0.94      0.95        63

    accuracy                           0.96       171
   macro avg       0.97      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171

Confusion Matrix:
 [[106   2]
 [  4  59]]
