In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
import copy
import torch
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms

In [2]:
data_dir = '../../../Desktop/Apnea'

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ]),
}

image_datasets = {
    x: datasets.ImageFolder(
        os.path.join(data_dir, x), 
        transform=data_transforms[x],
    ) 
    for x in ['train', 'val']
}
dataloaders = {
    x: torch.utils.data.DataLoader(
        image_datasets[x], 
        batch_size=8,
        shuffle=True, 
        num_workers=6,
    )
    for x in ['train', 'val']
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=5):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [11]:
def test_model(model):
    model.eval()   # Set model to evaluate mode

    running_count, running_corrects = 0, 0
    test_df = pd.read_csv('resources/File_test.csv')
    # Iterate over testing patients
    for file in test_df['file']:
        image_datasets = datasets.ImageFolder(
            f'{data_dir}/test/{file}', 
            transform=data_transforms['val'],
        ) 
        dataloaders = torch.utils.data.DataLoader(
            image_datasets, 
            batch_size=8,
            shuffle=True, 
            num_workers=6,
        )

        # Iterate over data.
        for inputs, labels in dataloaders:
            inputs = inputs.to(device)
            labels = labels.to(device)

            with torch.set_grad_enabled(False):
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)

            # statistics
            running_count += len(preds)
            running_corrects += torch.sum(preds == labels.data)

    epoch_acc = running_corrects.double() / running_count
    print('Testing Acc: {:.4f}'.format(epoch_acc))

# Finetuning 

In [6]:
# ResNet18
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = torch.nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)
torch.save(model_ft, 'resources/resnet.pt')
test_model(model_ft)

Epoch 0/4
----------
train Loss: 0.4157 Acc: 0.8280
val Loss: 0.3950 Acc: 0.8694

Epoch 1/4
----------
train Loss: 0.3570 Acc: 0.8590
val Loss: 0.3490 Acc: 0.8692

Epoch 2/4
----------
train Loss: 0.3257 Acc: 0.8731
val Loss: 0.3279 Acc: 0.8588

Epoch 3/4
----------
train Loss: 0.3028 Acc: 0.8793
val Loss: 0.3067 Acc: 0.8743

Epoch 4/4
----------
train Loss: 0.2912 Acc: 0.8850
val Loss: 0.3964 Acc: 0.8685

Training complete in 15m 31s
Best val Acc: 0.874256


Testing Acc: 0.7839


In [19]:
# SqueezeNet
model_ft_2 = models.squeezenet1_0(pretrained=True)
model_ft_2.classifier[1] = torch.nn.Conv2d(512, 2, kernel_size=(1,1), stride=(1,1))
model_ft_2.num_classes = 2
model_ft_2 = model_ft_2.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer_ft = torch.optim.SGD(model_ft_2.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft_2 = train_model(model_ft_2, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)
torch.save(model_ft_2, 'resources/squeezenet.pt')
test_model(model_ft_2)

Epoch 0/4
----------
train Loss: 0.4030 Acc: 0.8250
val Loss: 0.3604 Acc: 0.8512

Epoch 1/4
----------
train Loss: 0.3636 Acc: 0.8488
val Loss: 0.3521 Acc: 0.8490

Epoch 2/4
----------
train Loss: 0.3523 Acc: 0.8568
val Loss: 0.3349 Acc: 0.8616

Epoch 3/4
----------
train Loss: 0.3420 Acc: 0.8572
val Loss: 0.3420 Acc: 0.8596

Epoch 4/4
----------
train Loss: 0.3342 Acc: 0.8635
val Loss: 0.3283 Acc: 0.8679

Training complete in 12m 34s
Best val Acc: 0.867932
Testing Acc: 0.7880


In [14]:
# GoogLeNet
model_ft_3 = models.googlenet(pretrained=True)
num_ftrs = model_ft_3.fc.in_features
model_ft_3.fc = torch.nn.Linear(num_ftrs, 2)
model_ft_3 = model_ft_3.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer_ft = torch.optim.SGD(model_ft_3.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft_3 = train_model(model_ft_3, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=5)
torch.save(model_ft_3, 'resources/googlenet.pt')
test_model(model_ft_3)

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to C:\Users\joey3/.cache\torch\checkpoints\googlenet-1378be20.pth


HBox(children=(FloatProgress(value=0.0, max=52147035.0), HTML(value='')))


Epoch 0/4
----------
train Loss: 0.4036 Acc: 0.8306
val Loss: 0.3455 Acc: 0.8715

Epoch 1/4
----------
train Loss: 0.3450 Acc: 0.8603
val Loss: 0.3053 Acc: 0.8815

Epoch 2/4
----------
train Loss: 0.3159 Acc: 0.8700
val Loss: 0.3021 Acc: 0.8789

Epoch 3/4
----------
train Loss: 0.2970 Acc: 0.8803
val Loss: 0.4343 Acc: 0.8240

Epoch 4/4
----------
train Loss: 0.2807 Acc: 0.8869
val Loss: 0.2991 Acc: 0.8810

Training complete in 23m 2s
Best val Acc: 0.881510
Testing Acc: 0.7783
