<a href="https://colab.research.google.com/github/Alton01/ML-breast-cancer-prediction-with-pytorch/blob/main/breast_cancer_prediction_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [3]:
#load breast cancer dataset
data = load_breast_cancer()
x, y = data.data, data.target

In [4]:
#dataset splitting into training and test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [5]:
print(x.shape)
print(x_train.shape)
print(x_test.shape)
print(y.shape)
print(y_train.shape)
print(y_test.shape)

(569, 30)
(455, 30)
(114, 30)
(569,)
(455,)
(114,)


In [7]:
# standardize the data using standard scaler. This ensures the data is normally distributed. mean is going to be (0) zero and S.D is 1
#logistic regression expects the data to be normally distributed.
# Standardizing the data is important for many machine learning algorithms, including neural networks,
# as it can help improve performance and speed up training. It ensures that all features have a similar scale,
# preventing features with larger values from dominating the learning process.
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test) # we dont fit on test data in order to prevent data leakage.

In [8]:
type(x_train)

numpy.ndarray

In [9]:
# convert data from numpy.ndarray to pytorch tensors as we cannot use numpy.ndarray in pytorch.
x_train = torch.tensor(x_train, dtype=torch.float32).to(device)
x_test = torch.tensor(x_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

In [10]:
# Neural Network Architecture
# input_size represents how many number of neurons should be present in input layer
# input layer contains the same number of neurons as the features
# hidden_size represents how many number of neurons should be present in hidden layer
# output_size represents how many number of neurons should be present in output layer
# outpt_size depends on how many classes the classification is into. in this binary classification, it is 2 neureons.
# ReLU (Rectified Linear Unit) activation function. This non-linear function is typically applied after the first fully connected layer
# to introduce non-linearity into the model, allowing it to learn more complex patterns.
#  Sigmoid activation function is often used in the output layer of binary classification models to squash the output values between 0 and 1.

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) #1st fully connected layer
        self.relu = nn.ReLU() #(Rectified Linear Unit) activation function
        self.fc2 = nn.Linear(hidden_size, output_size) #2nd fully connected layer
        self.sigmoid = nn.Sigmoid()

    def forward(self, x): #forward  propagation
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

In [12]:
# Define hyperparameters
# The learning rate determines how much the model's weights are updated during each training step.
# A smaller learning rate can lead to more stable training but may take longer,
# while a larger learning rate can speed up training but may cause the model to overshoot the optimal solution
input_size = x_train.shape[1] # Number of features in the input data which is 30
hidden_size = 64
output_size = 1  ## a single neuron that would give the value as either 0 or 1. output of logistic regression
learning_rate = 0.001
num_epochs = 100

In [13]:
# initialize the neural network and move it to the GPU

model = NeuralNet(input_size, hidden_size, output_size).to(device)


In [15]:
# Define loss and optimizer.  loss function is commonly used for binary classification problems.
# It measures how well the predicted probabilities from the model match the actual binary labels (0 or 1).
# The optimizer is responsible for updating the model's weights during training based on the calculated loss.
criterion = nn.BCELoss() # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
#Training the neural network model
# zero_grad resets the gradient to zero at start of each iteration as gradients are accumulated during backpropagation
# in order to avoid summing gradients from previous iterations.
for epoch in range(num_epochs):
   model.train()
   optimizer.zero_grad()
   outputs = model(x_train)
   loss = criterion(outputs, y_train.view(-1,1)) # It compares the model's predictions (outputs) with the actual training labels (y_train).
   # y_train.view(-1,1) reshapes y_train to have the same shape as outputs for the loss calculation.
   loss.backward() # This performs backpropagation. It calculates the gradients of the loss with respect to all the model's parameters.
   optimizer.step() #This updates model's parameters using calculated gradients and defined optimizer (Adam). This is where the learning happens.

   #calculate accuracy
   with torch.no_grad():
    predicted = outputs.round() # This rounds the predicted probabilities to the nearest integer (0 or 1) to get the predicted class labels.
    correct = (predicted == y_train.view(-1, 1)).float().sum() #counting how many predictions (are true) are the same as the y_train and summing it.
    accuracy = correct / y_train.size(0)

   if (epoch+1) % 10 == 0: # This checks if the current epoch number is a multiple of 10.
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy.item() * 100:.2f}%')


Epoch [10/100], Loss: 0.1044, Accuracy: 97.36%
Epoch [20/100], Loss: 0.0971, Accuracy: 97.80%
Epoch [30/100], Loss: 0.0909, Accuracy: 98.02%
Epoch [40/100], Loss: 0.0856, Accuracy: 98.02%
Epoch [50/100], Loss: 0.0809, Accuracy: 98.24%
Epoch [60/100], Loss: 0.0767, Accuracy: 98.24%
Epoch [70/100], Loss: 0.0731, Accuracy: 98.24%
Epoch [80/100], Loss: 0.0699, Accuracy: 98.24%
Epoch [90/100], Loss: 0.0671, Accuracy: 98.24%
Epoch [100/100], Loss: 0.0645, Accuracy: 98.46%


In [19]:
#model evaluation on training data
# model.eval sets the model to evaluation mode.
# important because some layers (like dropout or batch normalization) behave differently during training and evaluation.
#  In evaluation mode, these layers are typically turned off or use pre-calculated statistics.
model.eval()
with torch.no_grad(): # This disables gradient calculation because you don't need to compute gradients to update model's weights;
# we are only interested in getting the model's performance metrics. Disabling gradient calculation saves memory and computation time.
  outputs = model(x_train)
  predicted = outputs.round()
  correct = (predicted == y_train.view(-1, 1)).float().sum() # compares predicted labels with training labels & sums up total correct predictions.
  accuracy = correct / y_train.size(0)
  print(f'Training Accuracy: {accuracy.item() * 100:.2f}%')

Training Accuracy: 98.46%


In [20]:
#model evaluation on test data
model.eval()
with torch.no_grad():
  outputs = model(x_test)
  predicted = outputs.round()
  correct = (predicted == y_test.view(-1, 1)).float().sum()
  accuracy = correct / y_test.size(0)
  print(f'Test Accuracy: {accuracy.item() * 100:.2f}%')

Test Accuracy: 99.12%
