In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim


In [2]:
data_path = 'data/data.csv'

In [3]:
df = pd.read_csv(data_path)

In [4]:
print(f'training set shape : {df.shape}')
print(df.columns)

training set shape : (569, 33)
Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')


In [5]:
df = df.drop('Unnamed: 32', axis=1)
x = df.drop(['id', 'diagnosis'], axis=1)
y = df.iloc[:,1]



In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(x, y, shuffle=True, test_size=0.2)


In [7]:
x_train = torch.tensor(X_train.values.astype(np.float32), dtype=torch.float32, requires_grad=True)
Y_train = Y_train.replace('M', 0)
Y_train = Y_train.replace('B', 1)
y_train = torch.tensor(Y_train.values.astype(np.float32), dtype=torch.float, requires_grad=True)

x_test = torch.tensor(X_test.values.astype(np.float32), dtype=torch.float32, requires_grad=True)
Y_test = Y_test.replace('M', 0)
Y_test = Y_test.replace('B', 1)
y_test = torch.tensor(Y_test.values.astype(np.float32), dtype=torch.float32, requires_grad=True)

In [8]:
print(y_train)
print(x_train)

tensor([1., 0., 0., 1., 0., 1., 1., 1., 1., 0., 1., 0., 1., 0., 0., 0., 1., 1.,
        1., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 0., 0., 0., 1., 0., 0., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1.,
        1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0.,
        0., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 1., 1.,
        1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1.,
        1., 1., 0., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 1., 1., 1., 1.,
        1., 0., 1., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 1.,
        0., 1., 0., 1., 0., 1., 0., 0., 1., 1., 1., 1., 0., 1., 0., 1., 0., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 0.,
        1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1.,
        0., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 0.,
        0., 1., 1., 1., 1., 0., 1., 0., 

In [9]:
def generates_batches(x, y, batch_size):
    batches = []
    num_batches = int(x.shape[0]/batch_size)

    for i in range(num_batches):
        batches.append(
                     (x[i*batch_size: (i+1)*batch_size], 
                      y[i*batch_size: (i+1)*batch_size])
            )

    if(num_batches * batch_size < x.shape[0]):
        batches.append(
            (x[num_batches*batch_size : x.shape[0]],
                y[num_batches*batch_size : x.shape[0]])
            
        )
    return batches
    
    

In [10]:
train_set = generates_batches(x_train, y_train, 4)
print(train_set.__len__())


test_set = generates_batches(x_test, y_test, 4)


114


In [11]:

class feedforward(nn.Module):
    def __init__(self):
        super().__init__()

        self.hidden_layers = nn.Sequential(
            nn.Linear(30, 120),
            nn.ReLU(),
            nn.Linear(120, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        x = self.hidden_layers(x)
        x = x.view(-1)
        return x
    
    @property
    def model_name(self):
        return "feedforward"
        

In [12]:
from Trainer import trainer

model = feedforward()
loss_fn = nn.BCELoss()
optimizer = optimizer = optim.SGD(model.parameters(), lr=1e-4)
max_epochs = 100
model_trainer = trainer(model, optimizer, loss_fn, train_set, test_set, max_epochs, model_name = model.model_name , when_to_stop=4)



In [13]:
model_trainer.fit()

epoch : 1, train_loss : 0.3635 ,  val_loss : 0.3422
epoch : 2, train_loss : 0.3081 ,  val_loss : 0.2636
epoch : 3, train_loss : 0.2833 ,  val_loss : 0.2349
epoch : 4, train_loss : 0.2651 ,  val_loss : 0.2162
epoch : 5, train_loss : 0.2546 ,  val_loss : 0.2061
epoch : 6, train_loss : 0.2473 ,  val_loss : 0.2000
epoch : 7, train_loss : 0.2422 ,  val_loss : 0.1968
epoch : 8, train_loss : 0.2373 ,  val_loss : 0.1942
epoch : 9, train_loss : 0.2341 ,  val_loss : 0.1938
epoch : 10, train_loss : 0.2324 ,  val_loss : 0.1945
epoch : 11, train_loss : 0.2309 ,  val_loss : 0.1955
epoch : 12, train_loss : 0.2295 ,  val_loss : 0.1964
epoch : 13, train_loss : 0.2282 ,  val_loss : 0.1975
Overfitting!
*************************************************************
[Checkpoint: epoch: 9, val_loss: 0.194 
saved on checkpoints/feedforward/checkpoint_13.pth]
*************************************************************
Early Stopping! Overfitting
Best Validation Loss : 0.19381077587604523, Current_Loss : 0.19

In [14]:
def check_accuracy(x, y):
    y_pred = model(x)
    num_correct = 0
    for i in range(y_pred.shape[0]):
        if(abs(y_pred[i].item() - y[i]) < 0.2):
            num_correct += 1
    acc = num_correct/x.shape[0] * 100
    print(f'{acc:.2f}%' )

In [15]:

for batch in test_set:
    x, y = batch
    check_accuracy(x, y)

100.00%
50.00%
50.00%
75.00%
100.00%
100.00%
75.00%
25.00%
100.00%
100.00%
100.00%
50.00%
75.00%
50.00%
100.00%
75.00%
75.00%
100.00%
100.00%
50.00%
75.00%
50.00%
50.00%
75.00%
75.00%
100.00%
100.00%
50.00%
100.00%
