<a href="https://colab.research.google.com/github/Alton01/ML-breast-cancer-prediction-with-pytorch/blob/main/breast_cancer_prediction_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [3]:
#load breast cancer dataset
data = load_breast_cancer()
x, y = data.data, data.target

In [4]:
#dataset splitting into training and test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [5]:
print(x.shape)
print(x_train.shape)
print(x_test.shape)

(569, 30)
(455, 30)
(114, 30)


In [6]:
# standardize the data using standard scaler. This ensures the data is normally distributed. mean is going to be (0) zero and S.D is 1
#logistic regression expects the data to be normally distributed.
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [7]:
type(x_train)

numpy.ndarray

In [8]:
# convert data from numpy.ndarray to pytorch tensors as we cannot use numpy.ndarray in pytorch.
x_train = torch.tensor(x_train, dtype=torch.float32).to(device)
x_test = torch.tensor(x_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

In [9]:
# Neural Network Architecture
# input_size represents how many number of neurons should be present in input layer
# input layer contains the same number of neurons as the features
# hidden_size represents how many number of neurons should be present in hidden layer
# output_size represents how many number of neurons should be present in output layer
# outpt_size depends on how many classes the classification is into. in this binary classification, it is 2 neureons.
# ReLU (Rectified Linear Unit) activation function. This non-linear function is typically applied after the first fully connected layer
# to introduce non-linearity into the model, allowing it to learn more complex patterns.
#  Sigmoid activation function is often used in the output layer of binary classification models to squash the output values between 0 and 1.

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) #1st fully connected layer
        self.relu = nn.ReLU() #(Rectified Linear Unit) activation function
        self.fc2 = nn.Linear(hidden_size, output_size) #2nd fully connected layer
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.sigmoid(out)
        return out

In [10]:
# Define hyperparameters
input_size = x_train.shape[1] # Number of features in the input data which is 30
hidden_size = 64
output_size = 1  ## a single neuron that would give the value as either 0 or 1. output of logistic regression
learning_rate = 0.001
num_epochs = 100