In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [5]:
df = pd.read_csv("/content/PreprocessedDataset.csv")

In [6]:
good_features = ['biopsies', 'histologicalclass', 'exercise', 'menopause', 'consumed_alcohol']

In [7]:
X = df.drop(columns = ['cancer'])
X_good = df[good_features]
y = df['cancer']

In [8]:
X_good_train, X_good_test, y_good_train, y_good_test = train_test_split(X_good, y, test_size = 0.3, random_state = 42)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

**Random Forest Classifier** Normal

In [10]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [11]:
y_pred = model.predict(X_test)

In [12]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 1.00
Confusion Matrix:
[[165   0]
 [  0 341]]
Classification Report:
              precision    recall  f1-score   support

          No       1.00      1.00      1.00       165
         Yes       1.00      1.00      1.00       341

    accuracy                           1.00       506
   macro avg       1.00      1.00      1.00       506
weighted avg       1.00      1.00      1.00       506




**Random Forest Classifier** Good


In [13]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_good_train, y_good_train)

In [14]:
y_pred = model.predict(X_good_test)

In [15]:
accuracy = accuracy_score(y_good_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_good_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_good_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.99
Confusion Matrix:
[[164   1]
 [  5 336]]
Classification Report:
              precision    recall  f1-score   support

          No       0.97      0.99      0.98       165
         Yes       1.00      0.99      0.99       341

    accuracy                           0.99       506
   macro avg       0.98      0.99      0.99       506
weighted avg       0.99      0.99      0.99       506



**Logistic Regression** Normal

In [16]:
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

In [17]:
y_pred = model.predict(X_test)

In [18]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 1.00
Confusion Matrix:
[[165   0]
 [  0 341]]
Classification Report:
              precision    recall  f1-score   support

          No       1.00      1.00      1.00       165
         Yes       1.00      1.00      1.00       341

    accuracy                           1.00       506
   macro avg       1.00      1.00      1.00       506
weighted avg       1.00      1.00      1.00       506



**Logistic Regression** Good

In [19]:
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_good_train, y_good_train)

In [20]:
y_pred = model.predict(X_good_test)

In [21]:
accuracy = accuracy_score(y_good_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_good_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_good_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.99
Confusion Matrix:
[[162   3]
 [  1 340]]
Classification Report:
              precision    recall  f1-score   support

          No       0.99      0.98      0.99       165
         Yes       0.99      1.00      0.99       341

    accuracy                           0.99       506
   macro avg       0.99      0.99      0.99       506
weighted avg       0.99      0.99      0.99       506



**KNN** Normal

In [23]:
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)

In [24]:
y_pred = model.predict(X_test)

In [25]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 1.00
Confusion Matrix:
[[165   0]
 [  0 341]]
Classification Report:
              precision    recall  f1-score   support

          No       1.00      1.00      1.00       165
         Yes       1.00      1.00      1.00       341

    accuracy                           1.00       506
   macro avg       1.00      1.00      1.00       506
weighted avg       1.00      1.00      1.00       506



**KNN** Good

In [32]:
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_good_train, y_good_train)

In [33]:
y_pred = model.predict(X_good_test)

In [34]:
accuracy = accuracy_score(y_good_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_good_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_good_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.98
Confusion Matrix:
[[159   6]
 [  6 335]]
Classification Report:
              precision    recall  f1-score   support

          No       0.96      0.96      0.96       165
         Yes       0.98      0.98      0.98       341

    accuracy                           0.98       506
   macro avg       0.97      0.97      0.97       506
weighted avg       0.98      0.98      0.98       506



**SVM** Normal

In [36]:
model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
model.fit(X_train, y_train)

In [37]:
y_pred = model.predict(X_test)

In [38]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.99
Confusion Matrix:
[[162   3]
 [  0 341]]
Classification Report:
              precision    recall  f1-score   support

          No       1.00      0.98      0.99       165
         Yes       0.99      1.00      1.00       341

    accuracy                           0.99       506
   macro avg       1.00      0.99      0.99       506
weighted avg       0.99      0.99      0.99       506



**SVM** Good

In [39]:
model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
model.fit(X_good_train, y_good_train)

In [40]:
y_pred = model.predict(X_good_test)

In [41]:
accuracy = accuracy_score(y_good_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

conf_matrix = confusion_matrix(y_good_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_good_test, y_pred)
print("Classification Report:")
print(class_report)

Accuracy: 0.83
Confusion Matrix:
[[ 97  68]
 [ 19 322]]
Classification Report:
              precision    recall  f1-score   support

          No       0.84      0.59      0.69       165
         Yes       0.83      0.94      0.88       341

    accuracy                           0.83       506
   macro avg       0.83      0.77      0.79       506
weighted avg       0.83      0.83      0.82       506

