In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, mean_squared_error
from google.colab import drive

In [2]:
drive.mount('/content/drive', force_remount=True)
os.chdir('/content/drive/Shareddrives/171 Group/')

Mounted at /content/drive


In [3]:
# Load preprocessed breast cancer dataset
data = pd.read_csv("breast-cancer.csv")
data = data.drop("id", axis = 1)

In [4]:
# deleting the rows with any instance of 0 in it, assume they are not real data
data = data[(data['concavity_mean'] != 0 )]

In [5]:
# Assuming your target variable is in a column named 'target'
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

In [6]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Initialize logistic regression model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train.values.ravel())

# Make predictions on the test set
y_pred = model.predict(X_test)

In [9]:
from sklearn.metrics import classification_report

In [10]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", round(accuracy, 4))

# Classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)


Accuracy:  0.9911
Classification Report:
               precision    recall  f1-score   support

           B       0.98      1.00      0.99        60
           M       1.00      0.98      0.99        52

    accuracy                           0.99       112
   macro avg       0.99      0.99      0.99       112
weighted avg       0.99      0.99      0.99       112



In [20]:
# Repeat the process for multiple epochs
num_runs = 6
average_accuracy = 0

for _ in range(num_runs):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None)

    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model = LogisticRegression()

    model.fit(X_train, y_train.values.ravel())

    y_pred = model.predict(X_test)

    average_accuracy += accuracy_score(y_test, y_pred)

average_accuracy /= num_runs

print("\nAverage accuracy (over", num_runs, "epochs): ", round(average_accuracy, 4))

# Classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)


Average accuracy (over 6 epochs):  0.9836
Classification Report:
               precision    recall  f1-score   support

           B       0.99      0.99      0.99        70
           M       0.98      0.98      0.98        42

    accuracy                           0.98       112
   macro avg       0.98      0.98      0.98       112
weighted avg       0.98      0.98      0.98       112

