In [1]:
import os

from PIL import Image #Transformations from torch vision tend to use PIL images
import torch #DeepLearning library for Neural Network
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models #transforms images to correct input for model ##ResNet18##
from sklearn.metrics import accuracy_score

In [11]:
import multiprocessing

multiprocessing.cpu_count

<bound method BaseContext.cpu_count of <multiprocessing.context.DefaultContext object at 0x0000016CD42BBAA0>>

In [34]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') #if device is available then it will use GPU

class PneumoniaDataset(Dataset): #Converts dataset into information used for the model

    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir #root dir is simply where the images come from 'images/chest_xray'
        self.transform = transform #if transform method is passed then it will use it
        self.image_paths = [] #path for each individual image will be saved in list
        self.labels = [] #label to go with every image_path will be saved here

        for label in ['NORMAL', 'PNEUMONIA']: #chest_xray file is divided into these labels
            class_dir = os.path.join(root_dir, label) #will go into the folders with corresponding labeled folder
            for img_name in os.listdir(class_dir): # img_name corresponds with image in labeled folded
                self.image_paths.append(os.path.join(class_dir, img_name)) #saves path into image_paths list
                self.labels.append(0 if label=='NORMAL' else 1) #then saves list into labels

    def __len__(self):
        return len(self.image_paths) #important to know dataset length

    def __getitem__(self, ind): #will get individual image
        img_path = self.image_paths[ind] # gets the image path from the list of image path based on the index provided
        image = Image.open(img_path).convert('RGB') #opens image data and converts to RGB
        label = self.labels[ind] #label for healthy or pneumonia

        if self.transform:
            image = self.transform(image) #applies transformation parameter passed by user

        return image, label

In [35]:
#I need to change to torch.nn.Sequential, so I need to transform to tensor in the __getitem__ method above.
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #These are the values of current weight and
    # standard deviation according to Pytorch
])

In [36]:
train_dataset = PneumoniaDataset('data/chest_xray/train', transform=transform) #simply saving datasets
test_dataset = PneumoniaDataset('data/chest_xray/test', transform=transform)
val_dataset = PneumoniaDataset('data/chest_xray/val', transform=transform)

In [37]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) #we are creating batches for the models
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [38]:
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) #Here is how we use existing models for our project
model.fc = nn.Linear(model.fc.in_features, 2) #The two will return NORMAL, PNEUMONIA to get two neuron output for our model
model = model.to(device)

In [39]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [40]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()

        optimizer.step()

        running_loss += loss

    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}')
    model.eval()
    val_labels = []
    val_preds = []

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs,1)

            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(preds.cpu().numpy())

    val_accuracy = accuracy_score(val_labels, val_preds)
    print('Validation accuracy:', val_accuracy)

Epoch 1/10, Loss: 0.11645766347646713
Validation accuracy: 0.8125
Epoch 2/10, Loss: 0.06003028526902199
Validation accuracy: 0.9375
Epoch 3/10, Loss: 0.04217918589711189
Validation accuracy: 1.0
Epoch 4/10, Loss: 0.05151577666401863
Validation accuracy: 0.5625
Epoch 5/10, Loss: 0.04119393602013588
Validation accuracy: 0.5
Epoch 6/10, Loss: 0.0326116643846035
Validation accuracy: 0.9375
Epoch 7/10, Loss: 0.019765358418226242
Validation accuracy: 1.0
Epoch 8/10, Loss: 0.029912836849689484
Validation accuracy: 0.875
Epoch 9/10, Loss: 0.020849233493208885
Validation accuracy: 0.875
Epoch 10/10, Loss: 0.01964992843568325
Validation accuracy: 1.0


In [43]:
model.eval()
test_labels = []
test_preds = []
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        test_labels.extend(labels.cpu().numpy())
        test_preds.extend(preds.cpu().numpy())

test_accuracy = accuracy_score(test_labels, test_preds)
print('Test accuracy:', test_accuracy)

torch.save(model.state_dict(), 'models/pneumonia_classifier.pth')

Test accuracy: 0.7964743589743589
