## Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from torchmetrics.functional.classification import binary_accuracy, binary_auroc, binary_f1_score, binary_precision, binary_recall
import tensorflow as tf
import time
from keras.models import Sequential
from keras.layers import Dense, InputLayer, Dropout, InputSpec
from keras.utils import *
from keras.layers import *
from keras.models import *
from keras.callbacks import *

## Read Data

In [2]:
data = pd.read_csv('Breastcancer_data.csv')
data.head()

Unnamed: 0,radius1,texture1,perimeter1,area1,smoothness1,compactness1,concavity1,concave_points1,symmetry1,fractal_dimension1,...,texture3,perimeter3,area3,smoothness3,compactness3,concavity3,concave_points3,symmetry3,fractal_dimension3,Diagnosis
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,M
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,M
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,M
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,M
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,M


In [3]:
data = data.drop_duplicates(ignore_index=True)
data = data.dropna(axis=1)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   radius1             569 non-null    float64
 1   texture1            569 non-null    float64
 2   perimeter1          569 non-null    float64
 3   area1               569 non-null    float64
 4   smoothness1         569 non-null    float64
 5   compactness1        569 non-null    float64
 6   concavity1          569 non-null    float64
 7   concave_points1     569 non-null    float64
 8   symmetry1           569 non-null    float64
 9   fractal_dimension1  569 non-null    float64
 10  radius2             569 non-null    float64
 11  texture2            569 non-null    float64
 12  perimeter2          569 non-null    float64
 13  area2               569 non-null    float64
 14  smoothness2         569 non-null    float64
 15  compactness2        569 non-null    float64
 16  concavit

In [5]:
data['Diagnosis'] = data['Diagnosis'].map({'M': 1, 'B': 0})

## Data Preprocessing

In [6]:
scaler = MinMaxScaler()
scaler.fit(data)

# Transform the DataFrame to obtain the normalized data
data_normalized = scaler.transform(data)

In [7]:
X = data_normalized[:, :-1]
y = data_normalized[:, -1]

## Modelling

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [9]:
class MultiChannelWeightedDropout(tf.keras.Model):
    def __init__(self, out, p=0.5):
        super(MultiChannelWeightedDropout, self).__init__()

        self.in_layer = tf.keras.layers.Dense(3, activation='softmax')

        self.dropout1 = tf.keras.layers.Dropout(p)
        self.dropout2 = tf.keras.layers.Dropout(p)
        self.dropout3 = tf.keras.layers.Dropout(p)

        self.out_layer = tf.keras.layers.Dense(out, activation='sigmoid')

    def call(self, x, training=False):
        x = self.in_layer(x)
        weights = x

        # Apply weighted dropout
        channel1 = self.dropout1(x[:, 0], training=training) * weights[:, 0]
        channel2 = self.dropout2(x[:, 1], training=training) * weights[:, 1]
        channel3 = self.dropout3(x[:, 2], training=training) * weights[:, 2]

        x = tf.stack([channel1, channel2, channel3], axis=1)
        x = self.out_layer(x)
        return x

In [10]:
def DNNModel(in_dim, out_dim):
    model = Sequential([
        tf.keras.layers.Input(shape=in_dim),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(8, activation='relu'),
        MultiChannelWeightedDropout(out_dim, p=0.5),
    ])
    model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), metrics=['accuracy'])
    return model

## Cross Validation

In [11]:
kfold = KFold(n_splits=10, shuffle=True, random_state=42)

In [12]:
fold_test = []
fold_pred = []

In [13]:
for fold, (train_idx, test_idx) in enumerate(kfold.split(X, y)):
    x_train, x_test, y_train, y_test = (X[train_idx]), (X[test_idx]), (y[train_idx]), (y[test_idx])
    classifier = DNNModel(30, 1)
    
    train_init = time.time()
    classifier.fit(x_train, y_train, epochs=100, batch_size=32, verbose=0)
    train_time = time.time() - train_init

    test_init = time.time()
    prediction = classifier.predict(x_test)
    test_time = time.time() - test_init

    fold_test.append(y_test)
    fold_pred.append(prediction)

    preds = torch.tensor(prediction.flatten())
    trues = torch.tensor(y_test)

    print(f"Fold: {fold+1} | Accuracy: {binary_accuracy(preds=preds, target=trues).item():.5f} | Train Time: {train_time} | Test Time: {test_time}")  

Fold: 1 | Accuracy: 0.98246 | Train Time: 6.241074800491333 | Test Time: 0.1910247802734375
Fold: 2 | Accuracy: 0.98246 | Train Time: 4.919419288635254 | Test Time: 0.08650565147399902
Fold: 3 | Accuracy: 0.98246 | Train Time: 4.8503899574279785 | Test Time: 0.09991669654846191
Fold: 4 | Accuracy: 1.00000 | Train Time: 4.994814872741699 | Test Time: 0.08595108985900879
Fold: 5 | Accuracy: 0.94737 | Train Time: 4.848230600357056 | Test Time: 0.09337449073791504
Fold: 6 | Accuracy: 0.94737 | Train Time: 4.822282075881958 | Test Time: 0.10423946380615234
Fold: 7 | Accuracy: 0.98246 | Train Time: 4.926878929138184 | Test Time: 0.10317397117614746
Fold: 8 | Accuracy: 0.91228 | Train Time: 4.82804799079895 | Test Time: 0.09551858901977539
Fold: 9 | Accuracy: 0.92982 | Train Time: 4.8587751388549805 | Test Time: 0.08229517936706543
Fold: 10 | Accuracy: 0.92857 | Train Time: 6.453928470611572 | Test Time: 0.09842419624328613


In [14]:
accuracy = []
precision = []
recall = []
f1_score = []
auroc = []

for i in range(10):
    trues = torch.tensor(fold_test[i])
    preds = torch.tensor(fold_pred[i].flatten())
    accuracy.append(binary_accuracy(preds=preds, target=trues).item())
    precision.append(binary_precision(preds=preds, target=trues).item())
    recall.append(binary_recall(preds=preds, target=trues).item())
    f1_score.append(binary_f1_score(preds=preds, target=trues).item())
    auroc.append(binary_auroc(preds.float(), trues.long()).item())

print(f"Accuracy: {np.mean(accuracy):.5f} | Precision: {np.mean(precision):.5f} | Recall: {np.mean(recall):.5f} | F1 Score: {np.mean(f1_score):.5f} | AUC ROC: {np.mean(auroc):.5f}")

Accuracy: 0.95952 | Precision: 0.97090 | Recall: 0.92655 | F1 Score: 0.94598 | AUC ROC: 0.99343


In [15]:
import pickle

with open(f'../Results/breastcancer_pred.pkl', 'wb') as file:
    pickle.dump(fold_pred, file)
with open(f'../Results/breastcancer_true.pkl', 'wb') as file:
    pickle.dump(fold_test, file)