In [1]:
import torch
import torch.nn as nn
import pandas as pd
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv('breast_cancer_data.csv')
data.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [3]:
X = data.iloc[:, 1:]
Y = data.diagnosis

X.shape, Y.shape

((569, 30), (569,))

In [4]:
encoder = LabelEncoder()
Y = encoder.fit_transform(Y)
Y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,

In [5]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
X

array([[ 1.09706398, -2.07333501,  1.26993369, ...,  2.29607613,
         2.75062224,  1.93701461],
       [ 1.82982061, -0.35363241,  1.68595471, ...,  1.0870843 ,
        -0.24388967,  0.28118999],
       [ 1.57988811,  0.45618695,  1.56650313, ...,  1.95500035,
         1.152255  ,  0.20139121],
       ...,
       [ 0.70228425,  2.0455738 ,  0.67267578, ...,  0.41406869,
        -1.10454895, -0.31840916],
       [ 1.83834103,  2.33645719,  1.98252415, ...,  2.28998549,
         1.91908301,  2.21963528],
       [-1.80840125,  1.22179204, -1.81438851, ..., -1.74506282,
        -0.04813821, -0.75120669]])

In [6]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((455, 30), (114, 30), (455,), (114,))

In [7]:
# convert from nupy to tensor
x_train_tensor = torch.from_numpy(x_train).float()
x_test_tensor = torch.from_numpy(x_test).float()
y_train_tensor = torch.from_numpy(y_train).float()
y_test_tensor = torch.from_numpy(y_test).float()

In [8]:
x_train_tensor.shape, x_test_tensor.shape, y_train_tensor.shape, y_test_tensor.shape

(torch.Size([455, 30]),
 torch.Size([114, 30]),
 torch.Size([455]),
 torch.Size([114]))

- Building the neural network using nn module
    - NN: core library that provide a wide range of classes and designed to help devs to build neural networks
        - Layers
        - Activation Functions
        - Loss Functions
        - Container Modules: `nn.Sequential` container to stack layers in order
        - Regularization and Dropout
- Using built-in activation functions
- Using built-in loss function
- Using built-in optimizers

In [9]:
class MyClassifier(nn.Module):

    def __init__(self, num_features):
        super().__init__()
        self.linear = nn.Linear(num_features, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, features):
        out = self.linear(features)
        out = self.sigmoid(out)
        return out


In [10]:
model = MyClassifier(num_features=x_train_tensor.shape[1])

# model.forward(x_train_tensor)
# In nn module __call__ is overwritten therefore we don't need to call mode.forward()

# standard way
y_pred = model(x_train_tensor)

In [11]:
print(y_pred)

tensor([[0.1634],
        [0.7257],
        [0.2603],
        [0.2129],
        [0.1619],
        [0.2197],
        [0.5243],
        [0.3549],
        [0.2957],
        [0.7696],
        [0.3080],
        [0.8943],
        [0.4194],
        [0.2006],
        [0.3790],
        [0.6285],
        [0.8304],
        [0.3066],
        [0.4650],
        [0.2648],
        [0.5858],
        [0.3768],
        [0.4338],
        [0.2950],
        [0.4354],
        [0.2999],
        [0.3121],
        [0.8681],
        [0.2743],
        [0.4747],
        [0.3613],
        [0.3847],
        [0.6520],
        [0.8487],
        [0.3961],
        [0.5379],
        [0.4909],
        [0.4658],
        [0.1978],
        [0.4542],
        [0.2573],
        [0.9338],
        [0.7423],
        [0.3019],
        [0.4962],
        [0.3733],
        [0.7171],
        [0.2342],
        [0.2508],
        [0.5155],
        [0.4497],
        [0.2633],
        [0.4029],
        [0.4550],
        [0.2455],
        [0

In [12]:
# weights
print(model.linear.weight)

Parameter containing:
tensor([[ 0.0102, -0.0325,  0.1008,  0.1693, -0.0714,  0.1468,  0.1789,  0.0957,
         -0.1768,  0.0938,  0.1159,  0.1575,  0.0618,  0.1307, -0.0839,  0.0252,
          0.0029, -0.1108, -0.0085, -0.1221,  0.1148, -0.0905,  0.1093,  0.0911,
         -0.0538, -0.0964,  0.0126,  0.0705, -0.1778, -0.1521]],
       requires_grad=True)


In [13]:
# linear
print(model.linear.bias)

Parameter containing:
tensor([-0.1061], requires_grad=True)


Print model summary:
- Just like in Keras we had model.summary() in PyTorch we have a library `torchinfo` to output  the model summary.

In [14]:
!pip install torchinfo --quiet
from torchinfo import summary

In [15]:
summary(model, input_size = x_train_tensor.shape)

Layer (type:depth-idx)                   Output Shape              Param #
MyClassifier                             [455, 1]                  --
├─Linear: 1-1                            [455, 1]                  31
├─Sigmoid: 1-2                           [455, 1]                  --
Total params: 31
Trainable params: 31
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.01
Input size (MB): 0.05
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.06

Trainable Parameters: 30 features/columns + bias


In [16]:
class ModelWithHiddenLayer(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.linear1 = nn.Linear(num_features, 3)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(3, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, features):
        out = self.linear1(features)
        out = self.relu(out)

        out = self.linear2(out)
        out = self.sigmoid(out)

        return out

In [17]:
model2 = ModelWithHiddenLayer(x_train_tensor.shape[1])
y_pred_2 = model2(x_train_tensor)

print(y_pred_2)

tensor([[0.3215],
        [0.2509],
        [0.3916],
        [0.3316],
        [0.2804],
        [0.3298],
        [0.2310],
        [0.3581],
        [0.3678],
        [0.3497],
        [0.3955],
        [0.2887],
        [0.3267],
        [0.3826],
        [0.3520],
        [0.3649],
        [0.2463],
        [0.2997],
        [0.3643],
        [0.3319],
        [0.3965],
        [0.2985],
        [0.3695],
        [0.3508],
        [0.3561],
        [0.3903],
        [0.3548],
        [0.2804],
        [0.3759],
        [0.3723],
        [0.3674],
        [0.3754],
        [0.3319],
        [0.2643],
        [0.3844],
        [0.3616],
        [0.3519],
        [0.3431],
        [0.3755],
        [0.3461],
        [0.3088],
        [0.1241],
        [0.2854],
        [0.3651],
        [0.3542],
        [0.3486],
        [0.3369],
        [0.4156],
        [0.3951],
        [0.4156],
        [0.2680],
        [0.4138],
        [0.3356],
        [0.3256],
        [0.3079],
        [0

In [18]:
model2.linear1.weight, model2.linear2.weight

(Parameter containing:
 tensor([[ 0.1320,  0.0504,  0.1808,  0.1188,  0.1814, -0.1766,  0.1618, -0.1053,
          -0.0437,  0.1072, -0.1629, -0.0269,  0.1214,  0.1280,  0.1640,  0.0295,
          -0.0859, -0.0154,  0.1741,  0.1770,  0.1799,  0.1225, -0.0919,  0.1252,
           0.0946, -0.1450,  0.0978,  0.1695, -0.0740, -0.1784],
         [ 0.1477, -0.1301, -0.0893,  0.0362,  0.1825, -0.0351, -0.1222,  0.1563,
           0.1712, -0.1217,  0.0979,  0.0784,  0.0537, -0.1643, -0.1643,  0.0889,
          -0.0365,  0.0436,  0.1266,  0.1572, -0.0355,  0.1459, -0.1453,  0.0098,
           0.0766,  0.0919,  0.1268,  0.1000,  0.0420, -0.0821],
         [ 0.0867,  0.1465,  0.0423,  0.0712,  0.0971, -0.1395, -0.1593, -0.0485,
           0.1069, -0.1450, -0.0936, -0.0412, -0.1218,  0.0304, -0.1735, -0.0282,
          -0.1111,  0.1157,  0.1305, -0.0995,  0.1179,  0.0463, -0.0843, -0.1808,
           0.0844, -0.0633, -0.1330, -0.1126, -0.1060, -0.0188]],
        requires_grad=True),
 Parameter con

- layer 1 : 30x3 weights
- layer 2 : 3 weights

In [19]:
summary(model2, input_size = x_train_tensor.shape)

Layer (type:depth-idx)                   Output Shape              Param #
ModelWithHiddenLayer                     [455, 1]                  --
├─Linear: 1-1                            [455, 3]                  93
├─ReLU: 1-2                              [455, 3]                  --
├─Linear: 1-3                            [455, 1]                  4
├─Sigmoid: 1-4                           [455, 1]                  --
Total params: 97
Trainable params: 97
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.04
Input size (MB): 0.05
Forward/backward pass size (MB): 0.01
Params size (MB): 0.00
Estimated Total Size (MB): 0.07

Initialization and passing the layer is cumbersome job. We can use PyTorch `nn.Sequential` module.

In [20]:
class ModelWithSequentialModule(nn.Module):
    def __init__(self, num_features):
        super().__init__()

        self.network = nn.Sequential(
            nn.Linear(num_features, 3),
            nn.ReLU(),
            nn.Linear(3, 1),
            nn.Sigmoid()
        )

    def forward(self, features):
        out = self.network(features)
        return out


In [21]:
model3 = ModelWithSequentialModule(x_train_tensor.shape[1])
y_pred_3 = model3(x_train_tensor)
print(y_pred_3)

tensor([[0.6207],
        [0.6327],
        [0.7045],
        [0.6807],
        [0.6547],
        [0.6207],
        [0.7351],
        [0.6285],
        [0.6457],
        [0.6937],
        [0.6695],
        [0.7257],
        [0.6497],
        [0.6638],
        [0.6378],
        [0.6316],
        [0.6801],
        [0.6253],
        [0.6445],
        [0.6612],
        [0.6882],
        [0.6681],
        [0.6387],
        [0.6950],
        [0.6731],
        [0.6207],
        [0.6182],
        [0.7103],
        [0.6187],
        [0.6833],
        [0.6207],
        [0.6720],
        [0.6354],
        [0.7641],
        [0.6304],
        [0.6486],
        [0.6566],
        [0.6996],
        [0.6737],
        [0.6312],
        [0.6970],
        [0.6795],
        [0.6873],
        [0.7318],
        [0.6602],
        [0.6469],
        [0.7386],
        [0.6599],
        [0.6477],
        [0.7107],
        [0.7123],
        [0.6342],
        [0.6713],
        [0.6731],
        [0.6478],
        [0

In [22]:
class NNClassifier(nn.Module):
    def __init__(self, num_features):
        super().__init__()

        self.network = nn.Sequential(
            nn.Linear(num_features, 3),
            nn.ReLU(),
            nn.Linear(3, 1),
            nn.Sigmoid()
        )

    def forward(self, features):
        return self.network(features)


    def backward(self, features, target, learning_rate, epochs):

        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
        loss_fn = nn.BCELoss()
        for epoch in range(epochs):
            optimizer.zero_grad()

            y_pred = self.forward(features)

            loss = loss_fn(y_pred, target)

            optimizer.zero_grad()

            loss.backward()

            optimizer.step()

            print(f'epoch: {epoch + 1}, Loss: {loss.item()}')

    def predict(self, features):
        with torch.no_grad():
            y_prediction = self.forward(features)
            return (y_prediction > 0.5).float()


    def accuracy(self, features, target):
        y_pred_class = self.predict(features)
        return  (y_pred_class == target).float().mean().item()



In [26]:
classifier = NNClassifier(x_train_tensor.shape[1])

classifier.backward(x_train_tensor, y_train_tensor.view(-1, 1), learning_rate=0.1, epochs=25)

epoch: 1, Loss: 0.6235623359680176
epoch: 2, Loss: 0.6235623359680176
epoch: 3, Loss: 0.6235623359680176
epoch: 4, Loss: 0.6235623359680176
epoch: 5, Loss: 0.6235623359680176
epoch: 6, Loss: 0.6235623359680176
epoch: 7, Loss: 0.6235623359680176
epoch: 8, Loss: 0.6235623359680176
epoch: 9, Loss: 0.6235623359680176
epoch: 10, Loss: 0.6235623359680176
epoch: 11, Loss: 0.6235623359680176
epoch: 12, Loss: 0.6235623359680176
epoch: 13, Loss: 0.6235623359680176
epoch: 14, Loss: 0.6235623359680176
epoch: 15, Loss: 0.6235623359680176
epoch: 16, Loss: 0.6235623359680176
epoch: 17, Loss: 0.6235623359680176
epoch: 18, Loss: 0.6235623359680176
epoch: 19, Loss: 0.6235623359680176
epoch: 20, Loss: 0.6235623359680176
epoch: 21, Loss: 0.6235623359680176
epoch: 22, Loss: 0.6235623359680176
epoch: 23, Loss: 0.6235623359680176
epoch: 24, Loss: 0.6235623359680176
epoch: 25, Loss: 0.6235623359680176


- When you use an optimizer (e.g., SGD, Adam), you don't need to manually zero out gradients for each layer in your model. The optimizer will automatically handle the gradient zeroing for all parameters it is responsible for.
- optimizer.zero_grad() will automatically zero out the gradients of all parameters in your model before each backward pass. This is part of the standard practice in PyTorch when using optimizers.
- The reason we can rely on optimizer.zero_grad() instead of manually calling .zero_() on the individual layers is that optimizer.zero_grad() works with the parameters registered with the optimizer, which typically includes all model parameters (e.g., weights and biases).

In [27]:
classifier.eval()

NNClassifier(
  (network): Sequential(
    (0): Linear(in_features=30, out_features=3, bias=True)
    (1): ReLU()
    (2): Linear(in_features=3, out_features=1, bias=True)
    (3): Sigmoid()
  )
)

In [28]:
accuracy = classifier.accuracy(x_test_tensor, y_test_tensor)
print(accuracy)

0.6206524968147278
