In [3]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from google.colab import drive

In [4]:
drive.mount('/content/drive', force_remount=True)
os.chdir('/content/drive/Shareddrives/171 Group/')

Mounted at /content/drive


In [5]:
# Load preprocessed breast cancer dataset
data = pd.read_csv("breast-cancer.csv")
data = data.drop("id", axis = 1)

In [6]:
# deleting the rows with any instance of 0 in it, assume they are not real data
data = data[(data['concavity_mean'] != 0 )]

In [7]:
# Assuming your target variable is in a column named 'target'
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

In [8]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Initialize MinMaxScaler
scaler = MinMaxScaler()

In [10]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
# Initialize logistic regression model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train.values.ravel())

# Make predictions on the test set
y_pred = model.predict(X_test)

In [13]:
from sklearn.metrics import accuracy_score, classification_report

In [14]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: ", round(accuracy, 4))

# Classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)


Accuracy:  0.9821
Classification Report:
               precision    recall  f1-score   support

           B       0.97      1.00      0.98        60
           M       1.00      0.96      0.98        52

    accuracy                           0.98       112
   macro avg       0.98      0.98      0.98       112
weighted avg       0.98      0.98      0.98       112



In [15]:
# Repeat the process for multiple epochs
num_runs = 6
average_accuracy = 0

for _ in range(num_runs):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None)

    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model = LogisticRegression()

    model.fit(X_train, y_train.values.ravel())

    y_pred = model.predict(X_test)

    average_accuracy += accuracy_score(y_test, y_pred)

average_accuracy /= num_runs

print("\nAverage accuracy (over", num_runs, "epochs): ", round(average_accuracy, 4))

# Classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:\n", class_report)


Average accuracy (over 6 epochs):  0.9613
Classification Report:
               precision    recall  f1-score   support

           B       0.92      1.00      0.96        67
           M       1.00      0.87      0.93        45

    accuracy                           0.95       112
   macro avg       0.96      0.93      0.94       112
weighted avg       0.95      0.95      0.95       112



In [18]:
# Input values for prediction
input_data = np.array([13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,0.2699,0.7886,2.058,23.56,0.008462,0.0146,0.02387,0.01315,0.0198,0.0023,15.11,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259]).reshape(1, -1)

# Standardize the input data using the previously fit scaler
input_data_standardized = scaler.transform(input_data)

# Make predictions
prediction = model.predict(input_data_standardized)

# Output the prediction
print("Predicted Diagnosis:", prediction[0])


Predicted Diagnosis: B


