<a href="https://colab.research.google.com/github/MEGNEOrnela/Ultrasound-Image-Challenge/blob/main/Ultrasound_Image_Challenge_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Cameroon Advance Analytic Ultrasound Image Challenge

## Import  necessary modules and libraries

In [None]:
import os
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader
from torchvision import utils
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
np.random.seed(0)

## Dataset Preparation

In [None]:
!wget https://github.com/sankasom/cmrn-kdny/raw/master/kidney_challenge.zip
!unzip kidney_challenge.zip

In [None]:
class KidneyDataLoader():
  """
  A class that reads the images, the corresponding labels and file names
  Input:
    * imgs_dir ==> images directory
    * labelsfile => the file containing the labels
    * transform => the data_transformer
  """
  def __init__(self, imgs_dir, labelsfile, transform):
    data = pd.read_csv(labelsfile)
    file_names = data['img_IDs'].values
    # obtain labels from data frame
    self.full_filenames = [os.path.join(imgs_dir, name+'.jpg') for name in file_names]
    self.labels = data['target'].values
    self.transform = transform

  def __len__(self):
    # return size of dataset
    return len(self.full_filenames)

  def __getitem__(self, idx):
    # open image, apply transforms and return with label
    #image = Image.open(self.full_filenames[idx]).resize((1039, 810)) # PIL image
    image = Image.open(self.full_filenames[idx]).resize((1039, 810))
    image = self.transform(image)
    return image, self.labels[idx]

### Setup dataset direcorty and labels

In [None]:
#data transformer
data_transformer = transforms.Compose([transforms.ToTensor()])
#setup directories and labels
data_dir = "./images/"
labelsFile = "Train.csv"
kidneyDataSet = KidneyDataLoader(data_dir, labelsFile, data_transformer)
#kidneyDataSet = KidneyDataLoader(data_dir, labelsFile, train_transforms)
print(len(kidneyDataSet))

#### Check the size of a random image

In [None]:
# load an image -- image 9 for example
img,label = kidneyDataSet[9]
print(img.shape,torch.min(img),torch.max(img))

### Split the Dataset

In [None]:
# import pytorch data splitter

#print the leng of the data
len_dataset = len(kidneyDataSet)
len_train = int(0.8*len_dataset)
len_val = len_dataset - len_train
train_ds, val_ds = random_split(kidneyDataSet,[len_train,len_val])
print("training dataset contains {} images.".format(len(train_ds)))
print("validation dataset contains {} images.".format(len(val_ds)))

### Visualize some random Dataset

In [None]:
def show(img,y,color=False):
    # convert tensor to numpy array
    npimg = img.numpy()
    # Convert to H*W*C shape
    npimg_tr=np.transpose(npimg, (1,2,0))
    if color==False:
        npimg_tr=npimg_tr[:,:,0]
        plt.imshow(npimg_tr,interpolation='nearest',cmap="gray")
    else:
        # display images
        plt.imshow(npimg_tr,interpolation='nearest')
        plt.title("label: "+str(y))

In [None]:

# show random image => image 20 for example
img, label = kidneyDataSet[20]
show(img, label)

### Training, validation and test transformer

In [None]:
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

In [None]:
train_transformer = transforms.Compose([
transforms.RandomHorizontalFlip(p=0.1),
transforms.RandomVerticalFlip(p=0.1),
transforms.RandomRotation(95),
transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])

In [None]:
val_transformer = transforms.Compose([transforms.ToTensor()])
test_transformer = transforms.Compose([transforms.ToTensor()])

In [None]:
# Data Loaders
train_dl = DataLoader(train_ds, batch_size=4, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=2, shuffle=False)

In [None]:
ts_data = pd.read_csv("./SampleSubmission.csv")
ts_data.shape

### Create the test dataset

In [None]:
ts_data = pd.read_csv("./SampleSubmission.csv")
# set the output values to 0 ==> just to follow the formality of the dataset loader
ts_data['target'] = 0
# save it to Test file name
test_labels_file = "custom_test.csv"
ts_data.to_csv(test_labels_file)
# test dataset
tst_ds = KidneyDataLoader(data_dir, test_labels_file, test_transformer)
# the batch size should be the same as the test dataset size [i.e. 34 images]
tst_dl = DataLoader(tst_ds, batch_size=1, shuffle=False)

In [None]:
len(tst_ds)

## Model Architecture

In [None]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
from torch.optim import lr_scheduler

### Training

In [None]:
def train(model, criterion, data_loader, optimizer, num_epochs, scheduler):
    """Simple training loop for a PyTorch model.""" 
    
    # Make sure model is in training mode.
    model.train()
    
    # Move model to the device (CPU or GPU).
    model.to(device)
    
    # Exponential moving average of the loss.
    ema_loss = None

    print('----- Training Loop -----')
    # Loop over epochs.
    for epoch in range(num_epochs):
        
      # Loop over data.
      for batch_idx, (features, target) in enumerate(data_loader):
            
          # Forward pass.
        output = model(features.to(device))
        loss = criterion(output.to(device), target.to(device))
          # Backward pass.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

      # NOTE: It is important to call .item() on the loss before summing.
        if ema_loss is None:
            ema_loss = loss.item()
        else:
            ema_loss += (loss.item() - ema_loss) * 0.01 

        # if phase == 'train':
        #         scheduler.step()

      # Print out progress the end of epoch.
      print('Epoch: {} \tLoss: {:.6f}'.format((epoch+1), ema_loss),)
  

### Validation

In [None]:
def validate(model, data_loader):
    """Measures the accuracy of a model on a data set.""" 
    # Make sure the model is in evaluation mode.
    model.eval()
    correct = 0
    print('----- Model Evaluation -----')
    # We do not need to maintain intermediate activations while testing.
    with torch.no_grad():
        
        # Loop over test data.
        for features, target in data_loader:
          
            # Forward pass.
            output = model(features.to(device))
            # Get the label corresponding to the highest predicted probability.
            pred = output.argmax(dim=1, keepdim=True)
            
            # Count number of correct predictions.
            correct += pred.cpu().eq(target.view_as(pred)).sum().item()

    # Print test accuracy.
    percent = 100. * correct / len(data_loader.sampler)
    print(f'Test accuracy: {correct} / {len(data_loader.sampler)} ({percent:.0f}%)')
    torch.save(model.state_dict(), 'model.ckpt')
    return percent

### Submission prediction

### Model configuration

In [None]:
model = torchvision.models.vgg16(pretrained=True)


In [None]:
model.to(device)

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
# exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
exp_lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 20)

#optimizer = torch.optim.Adam(model.parameters(), lr=1e-5,weight_decay=1e-3)

**Train the model**

In [None]:
epochs = 20
# odel.to(device)
train(model, criterion, train_dl, optimizer, num_epochs=epochs, scheduler= exp_lr_scheduler)

**Validate the model**

In [None]:
validate(model, val_dl)

## Submission prediction

In [None]:
def sumission_predict(model, test_loader, tst_labels_file):
  model.eval()
  result = []
  softmax = torch.nn.Softmax(dim=1)
  with torch.no_grad():
    
    # Loop over test data.
    for features, target in test_loader:
      
        # Forward pass.
        logits = model(features.to(device))
        probs = softmax(logits)
        result.append(torch.max(probs).detach().cpu().numpy())
        
  print(result)
  sub = pd.read_csv(tst_labels_file)
  sub['target'] = result
  sub.to_csv("submission03.csv",index=False)
  print("--------------- Process completed successfully ---------------")
  return sub

In [None]:
res=sumission_predict(model, tst_dl, test_labels_file)

In [None]:
res.columns

In [None]:
res.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
res.to_csv('submission11.csv', index=False)