# Neptune


In [1]:
!pip install neptune

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting neptune
  Downloading neptune-1.1.1-py3-none-any.whl (442 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m442.6/442.6 KB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting boto3>=1.16.0
  Downloading boto3-1.26.103-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.6/135.6 KB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting PyJWT
  Downloading PyJWT-2.6.0-py3-none-any.whl (20 kB)
Collecting swagger-spec-validator>=2.7.4
  Downloading swagger_spec_validator-3.0.3-py2.py3-none-any.whl (27 kB)
Collecting GitPython>=2.0.8
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 KB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
Collecting websocket-client!=1.0.0,>=0.35.0
  Downloading websocket_client-1.5.1-py3-none-any.whl (55 kB)


In [2]:
import neptune





# Data Loading


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
from sklearn.model_selection import train_test_split
import numpy as np
import torch
from PIL import Image


X_train = np.load('/content/drive/MyDrive/healthcare_practicals/miniproject/data/X_guys_2d.npy')
y_train = np.load('/content/drive/MyDrive/healthcare_practicals/miniproject/data/y_guys_128_2d.npy')
print(X_train.shape, y_train.shape)


X_train, valtest_images, y_train, valtest_labels = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(valtest_images, valtest_labels, test_size=0.5, random_state=42)


X_train = X_train.reshape(X_train.shape[0],1,X_train.shape[1],X_train.shape[2])
X_val = X_val.reshape(X_val.shape[0],1,X_val.shape[1],X_val.shape[2])
X_test = X_test.reshape(X_test.shape[0],1,X_test.shape[1],X_test.shape[2])

(317, 128, 128) (317, 128, 128)


In [5]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)

print(np.average(X_train))
print(np.average(X_val))
print(np.average(X_test))

print(np.std(X_train))
print(np.std(X_val))
print(np.std(X_test))

(253, 1, 128, 128)
(32, 1, 128, 128)
(32, 1, 128, 128)
287.9302712420177
268.7814110944717
316.16472715740406
367.3290279231455
334.4379147954599
398.5666321580297


In [6]:
from torch.utils.data import Dataset, DataLoader

class numpy_dataset(Dataset):  # Inherit from Dataset class
    def __init__(self, data, target):
        ## Add code here 
        self.data = torch.from_numpy(data)
        self.target = torch.from_numpy(target)

    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        return x, y

    def __len__(self):
        return len(self.data)
    
train_dataset = numpy_dataset(X_train, y_train)
val_dataset = numpy_dataset(X_val, y_val)

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, drop_last=True)

# UNet


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
from torch.autograd import Variable
import numpy as np
from sklearn.metrics import accuracy_score


class UNet(nn.Module):
    def __init__(self, in_channels=1, init_features=4, out_channels=2):
        super(UNet, self).__init__()

        features = init_features
        self.encoder1 = UNet._block(1, features, "encoder1")
        self.pool1 = nn.MaxPool2d(2, stride=3)
        self.encoder2 = UNet._block(features, 2*features, "encoder2")
        self.pool2 = nn.MaxPool2d(2, stride=3)
        self.bottleneck = UNet._block(2*features, 4*features, "bottleneck")
        self.upconv2 = nn.Upsample(size=(43, 43), mode='nearest')
        self.decoder2 = UNet._block(4*features + 2*features, 2*features, "decoder2")
        self.upconv1 = nn.Upsample(size=(128, 128), mode='nearest')
        self.decoder1 = UNet._block(2*features + features, features, "decoder1")
        self.conv = nn.Conv2d(features, in_channels, kernel_size=(3,3), padding=1)
        self.activation = torch.sigmoid
        self.double()

    def forward(self, x):
        enc1 = self.encoder1(x)
        enc2 = self.encoder2(self.pool1(enc1))
        enc3 = self.bottleneck(self.pool2(enc2))
        upconv2 = self.upconv2(enc3)

        concat_1 = torch.cat([enc2, upconv2], dim=1)
        dec1 = self.decoder2(concat_1)
        dec2 = self.decoder1(torch.cat([enc1, self.upconv1(dec1)], dim=1))
        last_layer = self.conv(dec2)
        output = self.activation(last_layer)
        return torch.squeeze(output)

    @staticmethod
    def _block(in_channels, features, name):
        return nn.Sequential(
            nn.Conv2d(in_channels, features, kernel_size=(3,3), padding=1),
                    nn.ReLU(),
                    nn.BatchNorm2d(features),
        )

## L2 Regularization

In [8]:
class L2RegLoss(nn.Module):
    def __init__(self, loss_fn=None, mu=1):
        super(L2RegLoss, self).__init__()
        self.eps = 1e-7
        self.mu = mu
        self.loss_fn = loss_fn

    def forward(self, x, target, model):
        main_loss = self.loss_fn(x, target)
        reg_loss = torch.mean(torch.stack([p.norm()**2 for p in model.parameters()]))
        return main_loss + self.mu * reg_loss
    

In [9]:
def train(net, dataloader, optim, loss_fn, epoch):
    start_time = time.perf_counter()
    net.train()  #Put the network in train mode
    total_loss = 0
    batches = 0
    pred_store = []
    true_store = []
    
    for batch_idx, (data, target) in enumerate(dataloader):
        data, target = Variable(data), Variable(target)
        batches += 1

        # Training loop
        pred = net(data)
        loss = loss_fn(pred, target, net)
        net.zero_grad()
        optim.zero_grad()
        loss.backward()
        optim.step()
        
        total_loss += loss
        pred_store.append(np.argmax(pred.detach().numpy(), axis=1))
        true_store.append(np.argmax(target.detach().numpy(), axis=1))


        if batch_idx % 100 == 0: #Report stats every x batches
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, (batch_idx+1) * len(data), len(dataloader.dataset),
                           100. * (batch_idx+1) / len(dataloader), loss.item()), flush=True)
    av_loss = total_loss / batches
    av_loss = av_loss.detach().cpu().numpy()
    print('\nTraining set: Average loss: {:.4f}'.format(av_loss,  flush=True))
    total_time = time.perf_counter() - start_time

    model_weight = torch.mean(torch.stack([p.norm() ** 2 for p in net.parameters()]))

    print('Time taken for epoch = ', total_time)
    return av_loss, model_weight, (data,pred,target)

def val(net, val_dataloader, optim, loss_fn, epoch):
    net.eval()  #Put the model in eval mode
    total_loss = 0    
    pred_store = []
    true_store = []
    batches = 0
    with torch.no_grad():  # So no gradients accumulate
        for batch_idx, (data, target) in enumerate(val_dataloader):
            batches += 1
            data, target = Variable(data), Variable(target)
            # Eval steps
            pred = net(data)
            loss =  loss_fn(pred, target, net)

            total_loss += loss
            pred_store.append(np.argmax(pred.detach().numpy(), axis=1))
            true_store.append(np.argmax(target.detach().numpy(), axis=1))
        av_loss = total_loss / batches
        
    av_loss = av_loss.detach().numpy()

    pred_store = np.array(pred_store).reshape(-1)
    true_store = np.array(true_store).reshape(-1)
    acc = accuracy_score(pred_store, true_store)
        
    print('Validation set: Average loss: {:.4f}'.format(av_loss,  flush=True))
    print('\n')
    return av_loss


def predict(net, test_dataloader):
    pred_store = []
    true_store = []
    
    with torch.no_grad():
        for inputs, target in test_dataloader:            
            pred = net(inputs)

            pred_store.append(np.argmax(pred.detach().numpy(), axis=1))
            true_store.append(np.argmax(target.detach().numpy(), axis=1))
    
    pred_store = np.array(pred_store).reshape(-1)
    true_store = np.array(true_store).reshape(-1)
    
    return pred_store, true_store


In [10]:
class dice_loss(nn.Module):
    def __init__(self):
        super(dice_loss, self).__init__()
        self.eps=1e-7

    def forward(self, x, target):
        num_classes = target.shape[1]   # Channels first
        target = target.type(x.type())
        dims = (0,) + tuple(range(2, target.ndimension()))
        intersection = torch.sum(x * target, dims)
        cardinality = torch.sum(x + target, dims)
        dice_loss = (2. * intersection / (cardinality + self.eps)).mean()
        return 1-dice_loss

# Training

In [14]:
import matplotlib.pyplot as plt

train_dataset = numpy_dataset(X_train, y_train)
val_dataset = numpy_dataset(X_val, y_val)

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, drop_last=True)

net = UNet(in_channels=1, init_features=4, out_channels=2)

# Calculate the number of traininable params
# print('Trainable params: ', params)


# Neptune
run = neptune.init_run(
    capture_hardware_metrics=True,
    capture_stderr=True,
    capture_stdout=True,
)  

params = {"lr": 0.05, "optimizer": "SGD", "loss": "dice", "epoch": 150, "mu": 1, "model": "UNet"}
run["parameters"] = params


loss_dict = {"bce": nn.BCELoss(), "dice": dice_loss()}
optim_dict = {"Adam": torch.optim.Adam(net.parameters(), lr = params["lr"]),
              "SGD" : torch.optim.SGD(net.parameters(), lr = params["lr"])
              }


class_loss = loss_dict[params["loss"]]
# Mu to vary between 1, 0.1
class_loss = L2RegLoss(class_loss, mu=params["mu"])

optim = optim_dict[params["optimizer"]]

losses = []
max_epochs = params["epoch"]
for epoch in range(1, max_epochs+1):
    train_loss, model_weight, (last_data,last_pred,last_target) = train(net, train_dataloader, optim, class_loss, epoch)
    val_loss = val(net, val_dataloader, optim, class_loss, epoch)


    if epoch % 10 == 0:
        brain_img = Image.fromarray(last_data[0].squeeze(0).cpu().numpy()).convert("L")

        seg_img = last_pred[0].detach().numpy()*255
        seg_img = Image.fromarray(seg_img).convert("L")

        target_img = last_target[0].detach().numpy()*255
        target_img = Image.fromarray(target_img).convert("L")
        # Neptune: Image log
        run["train/images"].append(brain_img, description=f"Epoch {epoch}: Input Img \n Avg val loss: {val_loss}")
        run["train/images"].append(seg_img, description=f"Epoch {epoch}: Output Img \n Avg val loss: {val_loss}") 
        run["train/images"].append(target_img, description=f"Epoch {epoch}: Target Img \n Avg val loss: {val_loss}") 
        run["train/temp_segmentation_average"].append(last_pred[0].mean())

    # Neptune: Loss logging
    run["train/train_loss"].append(train_loss)
    run["train/val_loss"].append(val_loss)
    run["train/model_weight"].append(model_weight)


torch.save(net.state_dict(), "UNet.pth")
run["model_checkpoint/final_model"].upload("UNet.pth")


run.stop()

https://app.neptune.ai/miyamura80/DLH-Miniproject/e/DLHMIN-63

Training set: Average loss: 2.8907
Time taken for epoch =  12.41242613500026
Validation set: Average loss: 2.7476



Training set: Average loss: 2.4967
Time taken for epoch =  11.77878205800016
Validation set: Average loss: 2.3982



Training set: Average loss: 2.1958
Time taken for epoch =  11.714813083000081
Validation set: Average loss: 2.1088



Training set: Average loss: 1.9424
Time taken for epoch =  11.97182513000007
Validation set: Average loss: 1.8621



Training set: Average loss: 1.7355
Time taken for epoch =  21.276662769999803
Validation set: Average loss: 1.6698



Training set: Average loss: 1.5516
Time taken for epoch =  11.390977943000053
Validation set: Average loss: 1.4925



Training set: Average loss: 1.3953
Time taken for epoch =  11.172564702999807
Validation set: Average loss: 1.3462



Training set: Average loss: 1.2619
Time taken for epoch =  12.218345927999962
Validation set: Average loss: 1.2564

In [12]:
torch.save(net.state_dict(), "UNet.pth")
run["model_checkpoint/final_model"].upload("UNet.pth")

In [13]:
run.stop()

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 2 operations to synchronize with Neptune. Do not kill this process.
All 2 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/miyamura80/DLH-Miniproject/e/DLHMIN-61/metadata
