In [38]:
import numpy as np
import torchvision
from torchvision.transforms import Compose, Resize, ToTensor, Normalize

import torch
from einops import rearrange
import pandas as pd
import sys
sys.path.insert(0, '../utils/')
from dataset import ChestImage64

from torch.utils.data import random_split, DataLoader, Dataset


In [9]:
csv_path = "../64pxImages/train_labels_64p.csv"
root_path = '../64pxImages'
transform = Compose([
    Resize((64, 64)),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [13]:
# information about the pretrained models is coming from this link: 
#https://pytorch.org/vision/master/models.html

# load up the pretrained model 
from torchvision.models import vit_l_16, ViT_L_16_Weights

# just use the default weights. These should yeild the best results
weights = ViT_L_16_Weights.DEFAULT
model = vit_l_16(weights = weights)

In [15]:
print(type(model))
print(model)

<class 'torchvision.models.vision_transformer.VisionTransformer'>
VisionTransformer(
  (conv_proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
  (encoder): Encoder(
    (dropout): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (encoder_layer_0): EncoderBlock(
        (ln_1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (self_attention): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=1024, out_features=1024, bias=True)
        )
        (dropout): Dropout(p=0.0, inplace=False)
        (ln_2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (mlp): MLPBlock(
          (0): Linear(in_features=1024, out_features=4096, bias=True)
          (1): GELU(approximate='none')
          (2): Dropout(p=0.0, inplace=False)
          (3): Linear(in_features=4096, out_features=1024, bias=True)
          (4): Dropout(p=0.0, inplace=False)
        )
      )
      (encoder_layer_1): EncoderBlock(
        (ln_

                                  1024Path       Patient   Study   
0  Frontal\patient00002_study1_Frontal.png  patient00002  study1  \
1  Lateral\patient00002_study1_Lateral.png  patient00002  study1   
2  Frontal\patient00004_study1_Frontal.png  patient00004  study1   
3  Lateral\patient00004_study1_Lateral.png  patient00004  study1   
4  Frontal\patient00005_study1_Frontal.png  patient00005  study1   

  Frontal/Lateral  Enlarged Cardiomediastinum  Cardiomegaly  Lung Opacity   
0         Frontal                           1             1             1  \
1         Lateral                           1             1             1   
2         Frontal                           0             0             0   
3         Lateral                           0             0             0   
4         Frontal                           0             0             0   

   Lung Lesion  Edema  Consolidation  Pneumonia  Atelectasis  Pneumothorax   
0            1      0              1          1   

In [37]:
myCSV = pd.read_csv(csv_path)
myCSV['EncodedLabels'] = ''
print(myCSV.shape)

for i in range(4, myCSV.shape[1]-1):
    myCSV['EncodedLabels'] = myCSV['EncodedLabels'].astype(str) + myCSV.iloc[:, i].astype(str)


myCSV.head()
# We can use the encodedlabels column as our labels for our data

# since we are not useing cross attention, pull out only the frontal images. 
frontalCSV = myCSV[myCSV['Frontal/Lateral'].str.contains("Frontal")]
frontalCSV.head()


(61266, 19)


Unnamed: 0,1024Path,Patient,Study,Frontal/Lateral,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,Edema,Consolidation,Pneumonia,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,No Finding,EncodedLabels
0,Frontal\patient00002_study1_Frontal.png,patient00002,study1,Frontal,1,1,1,1,0,1,1,1,0,0,1,1,0,0,11110111001100
2,Frontal\patient00004_study1_Frontal.png,patient00004,study1,Frontal,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1
4,Frontal\patient00005_study1_Frontal.png,patient00005,study1,Frontal,0,0,0,0,0,0,0,0,0,0,0,0,1,0,10
6,Frontal\patient00009_study1_Frontal.png,patient00009,study1,Frontal,1,1,0,0,0,0,0,0,0,0,0,0,0,0,11000000000000
8,Frontal\patient00010_study1_Frontal.png,patient00010,study1,Frontal,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1


In [43]:
# load up the dataset
class CustomDataset(Dataset):
    def __init__(self, df, root_dir, label_col):
        self.df = df
        self.root_dir = root_dir
        self.label_col = label_col

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        # get the filename of the image
        filename = self.df.iloc[index, 0]
        label = self.df[index, self.label_col]

        # load the image from disk
        img = Image.open(self.root_dir + '/' + filename)

        # return the image and its filename
        return img, filename, label
    

dataset = CustomDataset(frontalCSV, root_dir=root_path, label_col="EncodedLabels")

# split into test train validate
train_size = int(0.7 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = int(0.2 * len(dataset))


train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

print("Train Length: ", len(train_dataset))
print("Validation Length: ", len(val_dataset))
print("Test Length: ", len(test_dataset))

# make three different dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset,batch_size=32, shuffle=True)


Train Length:  21441
Validation Length:  3063
Test Length:  6126
