## Project Overview
Import all relevant libraries and connect to google drive for MURA database. MURA database is stored as a .zip file in the shared google drive, this file is unzipped into the project content where it will be used. Database includes a csv file for train and validation image paths and labels.

In [1]:
## Standard libraries
import os
import math
import numpy as np
import time
import pandas as pd

## Imports for plotting
import matplotlib.pyplot as plt

## Progress bar
from tqdm.notebook import tqdm

In [2]:
## Import PyTorch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
import torchvision.transforms as transforms
import torchvision as tv
import torch.nn as nn
import torchvision.models as models

In [3]:
!pip install torchmetrics
from torchmetrics import CohenKappa

Collecting torchmetrics
  Downloading torchmetrics-1.7.2-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->torchmetrics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->torchmetrics)
  D

In [4]:
from google.colab import drive
from google.colab import files

drive.mount('/content/gdrive', force_remount=False)

path = '/content/gdrive/MyDrive/ECE 228 Project'
cd = path
!unzip -u "/content/gdrive/MyDrive/ECE 228 Project/MURA-v1.1.zip"

[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
   creating: MURA-v1.1/train/XR_SHOULDER/patient01061/
   creating: MURA-v1.1/train/XR_SHOULDER/patient01061/study1_positive/
  inflating: MURA-v1.1/train/XR_SHOULDER/patient01061/study1_positive/image1.png  
  inflating: MURA-v1.1/train/XR_SHOULDER/patient01061/study1_positive/image3.png  
  inflating: MURA-v1.1/train/XR_SHOULDER/patient01061/study1_positive/image2.png  
   creating: MURA-v1.1/train/XR_SHOULDER/patient01016/
   creating: MURA-v1.1/train/XR_SHOULDER/patient01016/study1_positive/
  inflating: MURA-v1.1/train/XR_SHOULDER/patient01016/study1_positive/image1.png  
  inflating: MURA-v1.1/train/XR_SHOULDER/patient01016/study1_positive/image3.png  
  inflating: MURA-v1.1/train/XR_SHOULDER/patient01016/study1_positive/image2.png  
   creating: MURA-v1.1/train/XR_SHOULDER/patient01182/
   creating: MURA-v1.1/train/XR_SHOULDER/patient01182/study1_positive/
  inflating: MURA-v1.1/train/XR_SHOULDER/patient0

## Torch Dataset
Create the custom Dataset class

In [5]:
class MuraDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        image_path = self.dataframe.iloc[index]['path']
        label = self.dataframe.iloc[index]['target']
        image = Image.open(image_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label).float()

## Transformations
The source papers used transforms to flip, rotate and scale the images. Additional transforms can be created to create alternative datasets and concatenated

In [6]:
## Include or create and transforms here:
size = 256 #Image size will be size x size square image for consistency in training
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
train_transform = transforms.Compose([
    transforms.Resize((size, size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

val_transform = transforms.Compose([
        transforms.Resize((size, size)),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])

## Data Formating

Load the training and validation paths and labels

In [7]:
def process_mura_data(image_paths_csv, labeled_studies_csv):
    path_csv = pd.read_csv(image_paths_csv, sep='/', header=None)
    label_csv = pd.read_csv(labeled_studies_csv, sep='/', header=None)

    path_csv[6] = path_csv.apply(lambda row: "/".join(str(x) for x in row), axis=1)
    path_csv.columns = ['folder','set','body_part','patient_id','study_PN','image_id','path']
    label_csv.columns = ['folder','set','body_part','patient_id','study_PN','target']

    label_csv['target'] = label_csv['target'].astype(str).str.replace(',', '').astype(int)
    df = pd.merge(path_csv, label_csv, on=['folder','set','body_part','patient_id','study_PN'])
    return df

### DataLoader

Load the data using DataLoade

In [8]:
train_dataframe = process_mura_data('/content/MURA-v1.1/train_image_paths.csv','/content/MURA-v1.1/train_labeled_studies.csv')
val_dataframe = process_mura_data('/content/MURA-v1.1/valid_image_paths.csv','/content/MURA-v1.1/valid_labeled_studies.csv')
print(train_dataframe.info())
print(set(train_dataframe['body_part']))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36808 entries, 0 to 36807
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   folder      36808 non-null  object
 1   set         36808 non-null  object
 2   body_part   36808 non-null  object
 3   patient_id  36808 non-null  object
 4   study_PN    36808 non-null  object
 5   image_id    36808 non-null  object
 6   path        36808 non-null  object
 7   target      36808 non-null  int64 
dtypes: int64(1), object(7)
memory usage: 2.2+ MB
None
{'XR_FOREARM', 'XR_HUMERUS', 'XR_ELBOW', 'XR_SHOULDER', 'XR_WRIST', 'XR_HAND', 'XR_FINGER'}


In [9]:
train_dataset = MuraDataset(train_dataframe, transform=train_transform)
val_dataset = MuraDataset(val_dataframe, transform=val_transform)
batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size, shuffle=False)

## Model Setup

Ititializing ResNet 18 Model with Pretrained Imagenet Weights
Replace last layer with a mapping to 1 output for the binary classification

In [10]:
rn18 = models.resnet18(weights='IMAGENET1K_V1')
fc_input = rn18.fc.in_features
rn18.fc = torch.nn.Linear(fc_input, 1)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 135MB/s]


## Model Training Function

In [11]:
def trainer(model, trainData, valData, learning_rate, step_size, epochs, output_rate):
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  model.to(device)

  loss_func = nn.BCEWithLogitsLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=0.1)

  # Kappa score metric
  kappa = CohenKappa(num_classes=2, task='binary', weights='quadratic').to(device)

  for epoch in range(epochs):
    # TRAINING STEP
    model.train()
    train_loss, train_total, train_correct = 0.0, 0, 0

    for images, labels in trainData:
      images, labels = images.to(device), labels.to(device).unsqueeze(1).float()
      optimizer.zero_grad()
      outputs = model(images)
      loss = loss_func(outputs, labels)
      loss.backward()
      optimizer.step()

      train_loss += loss.item()* images.size(0)
      preds = torch.sigmoid(outputs) >= 0.5 #sigmoid needs to be applied here because outputs is raw output.
      train_correct += (preds == labels.bool()).sum().item()
      train_total += labels.size(0)

    scheduler.step()
    train_loss /= train_total
    train_acc = train_correct / train_total

    # EVALUATION STEP
    model.eval()
    val_loss, val_total, val_correct = 0.0, 0, 0
    kappa.reset()

    with torch.no_grad():
      for images, labels in valData:
        images, labels = images.to(device), labels.to(device).unsqueeze(1).float()

        outputs = model(images)
        loss = loss_func(outputs, labels)
        val_loss += loss.item()* images.size(0)

        preds = torch.sigmoid(outputs) >= 0.5
        val_correct += (preds == labels.bool()).sum().item()
        val_total += labels.size(0)

        # Kappa update
        kappa.update(preds.int(), labels.int())

    val_loss /= val_total
    val_acc = val_correct / val_total
    val_kappa = kappa.compute().item()

    if epoch % output_rate == 0 or epoch == 1 or epoch == epochs:
      print(f"Epoch: {epoch}/{epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f} | Val Kappa: {val_kappa:.4f}")
  return model

## Training ResNet 18 Model

In [12]:
learning_rate = 1e-2
step_size = 5
epochs = 10
output_rate = 1
model = trainer(rn18, train_dataloader, val_dataloader, learning_rate, step_size, epochs, output_rate)

KeyboardInterrupt: 

## Saved weights

In [None]:
path = '/content/gdrive/MyDrive/ECE 228 Project/resnet18_MURA.pt'
torch.save(model.state_dict(), path)

## Model Setup and Training ResNeXt 50

In [None]:
rn50 = models.resnext50_32x4d(weights='IMAGENET1K_V1')
fc_input = rn50.fc.in_features
rn50.fc = torch.nn.Linear(fc_input, 1)

In [None]:
learning_rate = 1e-2
step_size = 5
epochs = 10
output_rate = 1
model = trainer(rn50, train_dataloader, val_dataloader, learning_rate, step_size, epochs, output_rate)

In [None]:
path = '/content/gdrive/MyDrive/ECE 228 Project/ResNeXt50_MURA.pt'
torch.save(model.state_dict(), path)

## Model Setup and Training DenseNet 121

In [14]:
dn121 = models.densenet121(weights='IMAGENET1K_V1')
fc_input = dn121.classifier.in_features
dn121.classifier = nn.Linear(fc_input, 1)

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 53.4MB/s]


In [None]:
learning_rate = 1e-2
step_size = 5
epochs = 10
output_rate = 1
model = trainer(dn121, train_dataloader, val_dataloader, learning_rate, step_size, epochs, output_rate)

In [None]:
path = '/content/gdrive/MyDrive/ECE 228 Project/denseNet121_MURA.pt'
torch.save(model.state_dict(), path)

In [13]:
cvn = models.convnext_tiny(weights='IMAGENET1K_V1')
fc_input = cvn.classifier[2].in_features
cvn.classifier[2] = nn.Linear(fc_input, 1)

Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /root/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth
100%|██████████| 109M/109M [00:01<00:00, 86.1MB/s]


In [None]:
learning_rate = 1e-4
step_size = 5
epochs = 10
output_rate = 1
model = trainer(cvn, train_dataloader, val_dataloader, learning_rate, step_size, epochs, output_rate)