In [11]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, log_loss
from torchsummary import summary
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import matplotlib.pyplot as plt

In [12]:
b_cancer = pd.read_csv("BreastCancer.csv", index_col=0)
lbl = LabelEncoder()
y = lbl.fit_transform( b_cancer['Class'] )
X = b_cancer.drop('Class', axis=1)

In [13]:
scaler = MinMaxScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, 
                                                    random_state=24,stratify=y)
X_scl_trn = scaler.fit_transform(X_train) 
X_scl_tst = scaler.transform(X_test) 

In [14]:
X_torch = torch.from_numpy(X_scl_trn)
y_torch = torch.from_numpy(y_train)

In [15]:
print(X_torch.size())
print(y_torch.size())

torch.Size([489, 9])
torch.Size([489])


In [16]:
torch.manual_seed(24)
model = nn.Sequential(nn.Linear(in_features=X_scl_trn.shape[1], out_features=4),
                      nn.Tanh(),
                      nn.Linear(in_features=4,out_features=1))

In [17]:
summary(model, (1,X_scl_trn.shape[1]))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1, 4]              40
              Tanh-2                 [-1, 1, 4]               0
            Linear-3                 [-1, 1, 1]               5
Total params: 45
Trainable params: 45
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [18]:
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.4)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    fused: None
    lr: 0.4
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [19]:
for p in model.parameters():
    print(p)

y_pred = model(X_torch.float())
y_pred[:3]

Parameter containing:
tensor([[ 0.1763, -0.0833, -0.2833,  0.0205,  0.3107, -0.1487, -0.1085,  0.2607,
         -0.0464],
        [-0.1273, -0.0671,  0.0122, -0.2716, -0.0953,  0.3232, -0.0715,  0.3036,
         -0.1301],
        [-0.1341, -0.0994, -0.2980, -0.2008,  0.2015, -0.2501,  0.1139,  0.3136,
         -0.0424],
        [ 0.1458,  0.1557, -0.2379, -0.1285, -0.1044,  0.3217, -0.2975,  0.2018,
          0.3131]], requires_grad=True)
Parameter containing:
tensor([ 0.2636,  0.1421,  0.1484, -0.0178], requires_grad=True)
Parameter containing:
tensor([[ 0.2516,  0.4558, -0.1608,  0.4831]], requires_grad=True)
Parameter containing:
tensor([0.0795], requires_grad=True)


tensor([[0.1895],
        [0.2073],
        [0.1303]], grad_fn=<SliceBackward0>)

In [20]:
y_torch = y_torch.unsqueeze(1)
print(y_torch.shape)
print(y_pred.shape)

torch.Size([489, 1])
torch.Size([489, 1])


## Initial Loss

In [22]:
for epoch in np.arange(0,1000):
       # Forward pass: Compute predicted y by passing x to the model
       y_pred_prob = model(X_torch.float())

       # Compute and print loss
       loss = criterion(y_pred_prob, y_torch.float())
       if epoch%100 == 0:
          print('epoch: ', epoch+1,' loss: ', loss.item())

       # Zero gradients, perform a backward pass, and update the weights.
       optimizer.zero_grad()

       # perform a backward pass (backpropagation)
       loss.backward()

       # Update the parameters
       optimizer.step()
#print('epoch: ', epoch+1,' loss: ', loss.item())

epoch:  1  loss:  0.7016115784645081
epoch:  101  loss:  0.09638867527246475
epoch:  201  loss:  0.08764047920703888
epoch:  301  loss:  0.08430737257003784
epoch:  401  loss:  0.08201033622026443
epoch:  501  loss:  0.08003214001655579
epoch:  601  loss:  0.07824277132749557
epoch:  701  loss:  0.07666906714439392
epoch:  801  loss:  0.07532775402069092
epoch:  901  loss:  0.07419484853744507


In [23]:
X_torch_test = torch.from_numpy(X_scl_tst)

### Inferencing on test set
lin_output = model(X_torch_test.float()) # Equivalent predict_proba / predict

np_out = lin_output.detach().numpy()
y_pred_prob = 1 / (1 + np.exp(-np_out))

In [24]:
y_pred_prob

array([[5.4153678e-04],
       [6.1622630e-03],
       [9.7464120e-01],
       [9.7933894e-01],
       [1.8712914e-03],
       [2.9068576e-02],
       [3.2519829e-03],
       [1.8712914e-03],
       [2.5007320e-03],
       [5.4153678e-04],
       [9.7983229e-01],
       [9.6979505e-01],
       [7.7200838e-04],
       [6.0617952e-03],
       [9.8843145e-01],
       [4.4579604e-03],
       [8.8065158e-04],
       [9.7965372e-01],
       [9.3341446e-01],
       [9.8837847e-01],
       [8.3483737e-03],
       [9.8966706e-01],
       [6.0526174e-03],
       [8.2790560e-01],
       [9.8297793e-01],
       [5.9698939e-01],
       [9.6646434e-01],
       [9.8195028e-01],
       [7.5569545e-04],
       [7.7200838e-04],
       [7.6795008e-04],
       [1.3924410e-03],
       [3.2613240e-02],
       [3.3326230e-03],
       [2.6863385e-02],
       [2.3869760e-02],
       [1.9077801e-03],
       [7.5988017e-04],
       [9.9585187e-01],
       [5.4153678e-04],
       [2.0143601e-01],
       [1.027179

In [25]:
y_pred_prob = y_pred_prob.reshape(y_test.shape[0],) 

y_pred = np.where(y_pred_prob >= 0.5,1,0)

## Test Set Accuracy Score
print(accuracy_score(y_test,y_pred))

log_loss(y_test, y_pred_prob)

0.9714285714285714


0.08131366585486448