# Loading Libraries and Dataset

In [1]:
# Installing and loading libraries
import pandas as pd
import numpy as np

# Support Vector Machine SVC
from sklearn.svm import SVC

# Train test split
from sklearn.model_selection import train_test_split

# Evaluation metrics
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
df = pd.read_csv("Dataset/PreprocessedDataset.csv")

# Train Test Split

In [3]:
# Selecting the good featuers from feature selection
good_features = ["biopsies", "histologicalclass", "consumed_alcohol", "menopause", "is_sad"]

In [4]:
# Assigning the X and Y
X_normal = df.drop(columns = ["cancer"])
X_good = df[good_features]
y = df["cancer"]

In [5]:
# Creating the train test split for the experiment that's using the normal features
X_normal_train, X_normal_test, y_normal_train, y_normal_test = train_test_split(X_normal, y, test_size = 0.3, random_state = 42)

In [6]:
# Creating the train test split for the experiment that's using the good features
X_good_train, X_good_test, y_good_train, y_good_test = train_test_split(X_good, y, test_size = 0.3, random_state = 42)

# Model

## All Features

In [7]:
# Initializing and fitting the model
model1 = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
model1.fit(X_normal_train, y_normal_train)

In [8]:
# Making predictions using the model
y_pred = model1.predict(X_normal_test)

In [9]:
# Evaluating the model
accuracy = accuracy_score(y_normal_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_normal_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_normal_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.99
Confusion Matrix:
[[162   3]
 [  0 341]]
Classification Report:
              precision    recall  f1-score   support

       False       1.00      0.98      0.99       165
        True       0.99      1.00      1.00       341

    accuracy                           0.99       506
   macro avg       1.00      0.99      0.99       506
weighted avg       0.99      0.99      0.99       506



## Good Features

In [10]:
# Initializing and fitting the model
model2 = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
model2.fit(X_good_train, y_good_train)

In [11]:
# Making predictions using the model
y_pred = model2.predict(X_good_test)

In [12]:
# Evaluating the model
accuracy = accuracy_score(y_good_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_good_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_good_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.83
Confusion Matrix:
[[ 97  68]
 [ 16 325]]
Classification Report:
              precision    recall  f1-score   support

       False       0.86      0.59      0.70       165
        True       0.83      0.95      0.89       341

    accuracy                           0.83       506
   macro avg       0.84      0.77      0.79       506
weighted avg       0.84      0.83      0.82       506

