<a href="https://colab.research.google.com/github/mohammadpzy/duke-breast-mri/blob/main/duke_breast_mri_mlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# -*- coding: utf-8 -*-
"""duke breast mri mlp.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1LV4AhS10_n0yHpVUqwNnTmGbu9ZI-yIR
"""



# Import necessary libraries


In [3]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split


# Set pandas display options to show all rows and columns


In [4]:

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


# Mount Google Drive to access datasets stored there


In [5]:

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [6]:
# Load image features dataset from Excel file
image_features = pd.read_excel('/content/drive/MyDrive/duke breast mri/Imaging_Features (3).xlsx', index_col=0)

# Load clinical features dataset from Excel file
clinical_features = pd.read_excel('/content/drive/MyDrive/duke breast mri/Clinical_and_Other_Features (2).xlsx', header=1, index_col=0)


#preperation of data


In [7]:

# Extract 'Histologic type' column from clinical features
histologic_type = clinical_features['Histologic type']

# Exclude the first entry (possible header row)
histologic_type = histologic_type[1:]

# Check for missing values in image features
image_features.isnull().sum()

# Fill missing values with the mean of each column
image_features.fillna(image_features.mean(), inplace=True)

# Get the shape of the image features dataframe
image_features.shape

# Verify that there are no more missing values
image_features.isnull().sum()

# Encode the histologic types as integers
label_encoder = LabelEncoder()
histologic_type = label_encoder.fit_transform(histologic_type)

# Display the encoded labels
histologic_type

# Check the data type of histologic_type
type(histologic_type)

# Ensure that histologic_type is of integer type
histologic_type = histologic_type.astype(int)

# Perform one-hot encoding of the histologic types

# Find the unique categories
categories = np.unique(histologic_type)

# Create a mapping from category to index
category_to_index = {category: index for index, category in enumerate(categories)}

# Convert the original array to indices based on the mapping
indices = np.array([category_to_index[category] for category in histologic_type])

# Initialize the one-hot encoded array
one_hot_encoded = np.zeros((histologic_type.size, len(categories)), dtype=int)

# Set the appropriate elements to 1
one_hot_encoded[np.arange(histologic_type.size), indices] = 1

# Now, one_hot_encoded is your one-hot encoded array
print(one_hot_encoded)

# Remove the first column to avoid dummy variable trap (if necessary)
one_hot_encoded = one_hot_encoded[:, 1:]


[[0 1 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 ...
 [0 0 1 ... 0 0 0]
 [0 1 0 ... 0 0 0]
 [0 1 0 ... 0 0 0]]


# Get the feature matrix and target variable


In [8]:
x = image_features.values  # Features
y = one_hot_encoded        # Labels (one-hot encoded)




# Standardize the features by removing the mean and scaling to unit variance


In [9]:
scaler = StandardScaler()
x = scaler.fit_transform(x)

# Split the dataset into training and testing sets


In [10]:

X_train, X_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42, stratify=None)


# Create a custom dataset class for PyTorch


In [11]:

class TumorDataset(Dataset):
    def __init__(self, X, y):
        # Convert data to PyTorch tensors
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        # Return the size of the dataset
        return len(self.y)

    def __getitem__(self, idx):
        # Retrieve an item by index
        return self.X[idx], self.y[idx]

# Create dataset instances


In [12]:
train_dataset = TumorDataset(X_train, y_train)
test_dataset = TumorDataset(X_test, y_test)


In [13]:
# Define batch size
batch_size = 10

# Create data loaders for batching the data
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Set device to GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define the neural network architecture (Multi-Layer Perceptron)


In [14]:
class MLP(nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLP, self).__init__()
        # First fully connected layer
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        # Second fully connected layer
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        # Third fully connected layer
        self.fc3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        # Output layer
        self.output = nn.Linear(64, num_classes)
        # Softmax activation for output layer
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        # Forward pass through the network
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.relu3(out)
        out = self.output(out)
        out = self.softmax(out)
        return out

In [15]:
# Initialize the neural network model


In [16]:

input_size = x.shape[1]  # Number of features
num_classes = 6          # Number of output classes
model = MLP(input_size, num_classes).to(device)  # Move model to device

# Define the loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [17]:

# Print the device being used
print('Using device:', device)

Using device: cpu


# Training loop


In [18]:
num_epochs = 50
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for features, labels in train_loader:
        # Move data to device
        features = features.to(device)
        labels = labels.float()  # Convert labels to float for BCELoss
        labels = labels.to(device)

        optimizer.zero_grad()    # Zero the gradients
        outputs = model(features)  # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()           # Backpropagation
        optimizer.step()          # Update weights
        running_loss += loss.item()
    # Compute average loss for the epoch
    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')


Epoch [1/50], Loss: 0.2624
Epoch [2/50], Loss: 0.1825
Epoch [3/50], Loss: 0.1476
Epoch [4/50], Loss: 0.1269
Epoch [5/50], Loss: 0.1134
Epoch [6/50], Loss: 0.0916
Epoch [7/50], Loss: 0.0828
Epoch [8/50], Loss: 0.0645
Epoch [9/50], Loss: 0.0602
Epoch [10/50], Loss: 0.0395
Epoch [11/50], Loss: 0.0305
Epoch [12/50], Loss: 0.0148
Epoch [13/50], Loss: 0.0186
Epoch [14/50], Loss: 0.0244
Epoch [15/50], Loss: 0.0223
Epoch [16/50], Loss: 0.0272
Epoch [17/50], Loss: 0.0354
Epoch [18/50], Loss: 0.0307
Epoch [19/50], Loss: 0.0349
Epoch [20/50], Loss: 0.0227
Epoch [21/50], Loss: 0.0066
Epoch [22/50], Loss: 0.0117
Epoch [23/50], Loss: 0.0075
Epoch [24/50], Loss: 0.0014
Epoch [25/50], Loss: 0.0008
Epoch [26/50], Loss: 0.0006
Epoch [27/50], Loss: 0.0005
Epoch [28/50], Loss: 0.0005
Epoch [29/50], Loss: 0.0005
Epoch [30/50], Loss: 0.0005
Epoch [31/50], Loss: 0.0005
Epoch [32/50], Loss: 0.0004
Epoch [33/50], Loss: 0.0004
Epoch [34/50], Loss: 0.0004
Epoch [35/50], Loss: 0.0004
Epoch [36/50], Loss: 0.0004
E

In [19]:
# Set model to evaluation mode
model.eval()



MLP(
  (fc1): Linear(in_features=529, out_features=256, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=128, out_features=64, bias=True)
  (relu3): ReLU()
  (output): Linear(in_features=64, out_features=6, bias=True)
  (softmax): Softmax(dim=1)
)

# Evaluation on test data


In [20]:
with torch.no_grad():
    total = 0
    correct_predictions = 0
    for features, labels in test_loader:
        features = features.to(device)
        labels = labels.to(device)
        outputs = model(features)
        # Get predicted classes
        predict = torch.argmax(outputs, dim=1).tolist()
        # Get true classes
        label = torch.argmax(labels, dim=1).tolist()
        total += len(label)
        # Compare predictions with true labels
        for i in range(len(label)):
            if_correct = int(label[i] == predict[i])
            correct_predictions += if_correct
    # Calculate accuracy
    accuracy = correct_predictions / total
    print(accuracy)

0.6972972972972973
