In [10]:
import os
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from tqdm import tqdm
import torch.optim as optim
from torchvision.transforms import v2 as T
from torchvision import datasets, transforms, models

LAST_EPOCH=0
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
RESULTS_FOLDER_PATH = '/kaggle/working/Luna16_classification_results'
os.makedirs(RESULTS_FOLDER_PATH, exist_ok=True)

epoch_performances_file_path = f"{RESULTS_FOLDER_PATH}/epoch_performances.csv"

In [2]:
Label_0_folder = '/kaggle/input/luna16-classification-training-dataset/Label0_cropped_100x100_nodules/kaggle/working/Label0_cropped_100x100_nodules'
Label_1_folder = '/kaggle/input/luna16-classification-training-dataset/label1_2336input100x100images/kaggle/working/input100x100images'

label0_filenames = sorted(os.listdir(Label_0_folder))
label1_filenames = sorted(os.listdir(Label_1_folder))

label0_filepath = [Label_0_folder+'/'+item for item in label0_filenames]
label1_filepath = [Label_1_folder+'/'+item for item in label1_filenames]

label_1_df = pd.DataFrame(columns=['filename','label'])
label_1_df['filename'] = label1_filepath
label_1_df['label'] = 1

label_0_df = pd.DataFrame(columns=['filename','label'])
label_0_df['filename'] = label0_filepath
label_0_df['label'] = 0
label_0_df=label_0_df.iloc[:len(label_1_df)]

dataset_df = pd.concat((label_1_df, label_0_df), axis=0).reset_index().drop(columns='index')
dataset_df.head()

Unnamed: 0,filename,label
0,/kaggle/input/luna16-classification-training-d...,1
1,/kaggle/input/luna16-classification-training-d...,1
2,/kaggle/input/luna16-classification-training-d...,1
3,/kaggle/input/luna16-classification-training-d...,1
4,/kaggle/input/luna16-classification-training-d...,1


In [3]:
batch_size = 16
train_indices, test_indices = train_test_split(range(len(dataset_df)),
                                              shuffle=True,
                                              test_size=0.3)
val_indices, test_indices = train_test_split(test_indices,
                                              shuffle=False,
                                              test_size=2/3)
print(len(train_indices), len(val_indices), len(test_indices))

3270 467 935


In [4]:
class Luna16(torch.utils.data.Dataset):
    def __init__(self, df, indices, transforms):
        self.df = df
        self.indices = indices
        self.transforms = transforms
        
    def __len__(self):
        return self.indices.__len__()
    
    def __getitem__(self, idx):
        img_path = self.df.iloc[self.indices[idx]]['filename']
        img = np.load(img_path)
        label = self.df.iloc[self.indices[idx]]['label']
        if label == 0:
            img=img/255
        img = torch.from_numpy(img).float().unsqueeze(0)
        target = {'labels': label}
        if self.transforms is not None:
            img, target = self.transforms(img, target)
        return img, target

In [15]:
def check_accuracy(loader, model, device="cuda"):
    num_correct = 0
    total=0
    true_positive = 0
    true_negative = 0
    false_positive = 0
    false_negative = 0

    with torch.no_grad():
        for data, targets in loader:
            data = data.to(device=DEVICE)
            
            targets = targets['labels'].to(torch.float32).view((-1,1)).to(DEVICE)
            preds = torch.round(torch.sigmoid(model(data)))
            
            num_correct += (preds == targets).sum().item()
            total+=len(preds)
            true_positive += ((preds == 1) & (targets == 1)).sum().item()
            true_negative += ((preds == 0) & (targets == 0)).sum().item()
            false_positive += ((preds == 1) & (targets == 0)).sum().item()
            false_negative += ((preds == 0) & (targets == 1)).sum().item()
            
    accuracy = (num_correct / total) * 100 
    sensitivity = true_positive / (true_positive + false_negative + 1e-8)
    specificity = true_negative / (true_negative + false_positive + 1e-8)

    return accuracy, sensitivity, specificity, true_positive, false_positive, true_negative, false_negative

def train_fn(loader, model, optimizer, loss_fn, scaler, device, gradient_clipping):
    loop = tqdm(train_loader)
    training_loss=0
    for batch_idx, (data, targets) in enumerate(loop):
        data = data.to(device=DEVICE)
        targets = targets['labels'].to(torch.float32).view((-1,1)).to(DEVICE)
        
        # forward
        with torch.cuda.amp.autocast():
            predictions = model(data)
            loss = loss_fn(predictions, targets)
            training_loss+=loss
            
        # backward
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clipping)
        scaler.step(optimizer)
        scaler.update()
    return training_loss

In [6]:
def get_transform(train):
    transforms = []
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
        transforms.append(T.RandomVerticalFlip(0.5))
    transforms.append(T.ToDtype(torch.float, scale=True))
    transforms.append(T.ToPureTensor())
    return T.Compose(transforms)


train_loader = torch.utils.data.DataLoader(Luna16(dataset_df, train_indices, get_transform(train=True)),
                                    batch_size=batch_size,
                                    pin_memory=True
                                    )
val_loader = torch.utils.data.DataLoader(Luna16(dataset_df, val_indices , get_transform(train=False)),
                                    batch_size=batch_size,
                                    pin_memory=True
                                    )
test_loader = torch.utils.data.DataLoader(Luna16(dataset_df, test_indices, get_transform(train=False)),
                                    batch_size=batch_size,
                                    pin_memory=True
                                    )
print(len(train_loader), len(val_loader), len(test_loader))

205 30 59


In [16]:
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim

resnet18 = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
features = resnet18.fc.in_features
num_classes = 1
resnet18.fc = nn.Linear(features, num_classes)
resnet18.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resnet18 = resnet18.to(DEVICE)


In [17]:
LEARNING_RATE= 0.001
WEIGHT_DECAY=0.00001
GRADIENT_CLIPPING = 1.0

loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(resnet18.parameters(), lr=LEARNING_RATE,  weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=5)
scaler = torch.cuda.amp.GradScaler()

In [18]:
NUM_EPOCHS=10

for epoch in range(NUM_EPOCHS):
    resnet18.train()
    training_Loss = train_fn(train_loader, resnet18, optimizer, 
                             loss_fn, scaler, DEVICE, GRADIENT_CLIPPING)
    resnet18.eval()
    accuracy, sensitivity, specificity=check_accuracy(val_loader, resnet18, device=DEVICE)
    scheduler.step(sensitivity)
    epoch_Performances = {
        "Epoch": LAST_EPOCH,
        "LEARNING_RATE": LEARNING_RATE,
        "Training_loss": training_Loss,
        "Accuracy": accuracy,
        "Sensitivity": sensitivity,
        "Specificity": specificity,}
    LAST_EPOCH+=1
    try:
        with open(epoch_performances_file_path, mode='a', newline='') as file:
            fieldnames = epoch_Performances.keys()
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            if file.tell() == 0:
                writer.writeheader()
            if isinstance(epoch_Performances, dict):
                writer.writerow(epoch_Performances)
            else:
                print("Error: epoch_Performances is not a dictionary.")
    except Exception as e:
        print("Error writing to CSV:", e)

    print(f'Current learning rate: {optimizer.param_groups[0]["lr"]}')
    print(f"Epoch {LAST_EPOCH} Training Loss: {training_Loss}")
    print(f"Epoch {LAST_EPOCH} Validation Metrics: ")
    print(f"Accuracy: {accuracy}")
    print(f"Sensitivity: {sensitivity}")
    print(f"Specificity: {specificity}")

100%|██████████| 205/205 [00:07<00:00, 25.99it/s]


Current learning rate: 0.001
Epoch 1 Training Loss: 79.39490509033203
Epoch 1 Validation Metrics: 
Accuracy: 62.31263383297645
Sensitivity: 0.9016393442253426
Specificity: 0.31838565020993786


100%|██████████| 205/205 [00:07<00:00, 25.99it/s]


Current learning rate: 0.001
Epoch 2 Training Loss: 89.14183807373047
Epoch 2 Validation Metrics: 
Accuracy: 47.75160599571734
Sensitivity: 0.0
Specificity: 0.9999999999551569


100%|██████████| 205/205 [00:07<00:00, 26.00it/s]


Current learning rate: 0.001
Epoch 3 Training Loss: 65.12944793701172
Epoch 3 Validation Metrics: 
Accuracy: 47.75160599571734
Sensitivity: 0.0
Specificity: 0.9999999999551569


100%|██████████| 205/205 [00:08<00:00, 25.31it/s]


Current learning rate: 0.001
Epoch 4 Training Loss: 51.99333190917969
Epoch 4 Validation Metrics: 
Accuracy: 81.15631691648822
Sensitivity: 0.6393442622688793
Specificity: 0.9999999999551569


100%|██████████| 205/205 [00:08<00:00, 25.16it/s]


Current learning rate: 0.001
Epoch 5 Training Loss: 46.59856033325195
Epoch 5 Validation Metrics: 
Accuracy: 97.4304068522484
Sensitivity: 0.9918032786478769
Specificity: 0.9551569506298135


100%|██████████| 205/205 [00:07<00:00, 26.40it/s]


Current learning rate: 0.001
Epoch 6 Training Loss: 45.907379150390625
Epoch 6 Validation Metrics: 
Accuracy: 94.86081370449678
Sensitivity: 0.9959016393034467
Specificity: 0.8968609865068672


100%|██████████| 205/205 [00:08<00:00, 25.60it/s]


Current learning rate: 0.001
Epoch 7 Training Loss: 43.40972137451172
Epoch 7 Validation Metrics: 
Accuracy: 85.4389721627409
Sensitivity: 0.9999999999590163
Specificity: 0.6950672645428221


100%|██████████| 205/205 [00:08<00:00, 25.49it/s]


Current learning rate: 0.001
Epoch 8 Training Loss: 39.55348205566406
Epoch 8 Validation Metrics: 
Accuracy: 96.57387580299786
Sensitivity: 0.9959016393034467
Specificity: 0.9327354259671419


100%|██████████| 205/205 [00:07<00:00, 25.67it/s]


Current learning rate: 0.001
Epoch 9 Training Loss: 44.90479278564453
Epoch 9 Validation Metrics: 
Accuracy: 97.00214132762312
Sensitivity: 0.9754098360255979
Specificity: 0.9641255604948822


100%|██████████| 205/205 [00:08<00:00, 25.31it/s]


Current learning rate: 0.001
Epoch 10 Training Loss: 45.517520904541016
Epoch 10 Validation Metrics: 
Accuracy: 79.22912205567452
Sensitivity: 0.9999999999590163
Specificity: 0.5650224214993264


In [20]:
LEARNING_RATE= 0.0001
WEIGHT_DECAY=0.000001
GRADIENT_CLIPPING = 1.0

loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(resnet18.parameters(), lr=LEARNING_RATE,  weight_decay=WEIGHT_DECAY)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=5)
scaler = torch.cuda.amp.GradScaler()

In [21]:
NUM_EPOCHS=5

for epoch in range(NUM_EPOCHS):
    resnet18.train()
    training_Loss = train_fn(train_loader, resnet18, optimizer, 
                             loss_fn, scaler, DEVICE, GRADIENT_CLIPPING)
    resnet18.eval()
    accuracy, sensitivity, specificity=check_accuracy(val_loader, resnet18, device=DEVICE)
    scheduler.step(sensitivity)
    epoch_Performances = {
        "Epoch": LAST_EPOCH,
        "LEARNING_RATE": LEARNING_RATE,
        "Training_loss": training_Loss,
        "Accuracy": accuracy,
        "Sensitivity": sensitivity,
        "Specificity": specificity,}
    LAST_EPOCH+=1
    try:
        with open(epoch_performances_file_path, mode='a', newline='') as file:
            fieldnames = epoch_Performances.keys()
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            if file.tell() == 0:
                writer.writeheader()
            if isinstance(epoch_Performances, dict):
                writer.writerow(epoch_Performances)
            else:
                print("Error: epoch_Performances is not a dictionary.")
    except Exception as e:
        print("Error writing to CSV:", e)

    print(f'Current learning rate: {optimizer.param_groups[0]["lr"]}')
    print(f"Epoch {LAST_EPOCH} Training Loss: {training_Loss}")
    print(f"Epoch {LAST_EPOCH} Validation Metrics: ")
    print(f"Accuracy: {accuracy}")
    print(f"Sensitivity: {sensitivity}")
    print(f"Specificity: {specificity}")

100%|██████████| 205/205 [00:07<00:00, 26.65it/s]


Current learning rate: 0.0001
Epoch 11 Training Loss: 86.32925415039062
Epoch 11 Validation Metrics: 
Accuracy: 95.28907922912205
Sensitivity: 0.9549180327477492
Specificity: 0.9506726456972793


100%|██████████| 205/205 [00:08<00:00, 24.91it/s]


Current learning rate: 0.0001
Epoch 12 Training Loss: 82.84846496582031
Epoch 12 Validation Metrics: 
Accuracy: 95.50321199143468
Sensitivity: 0.959016393403319
Specificity: 0.9506726456972793


100%|██████████| 205/205 [00:07<00:00, 26.13it/s]


Current learning rate: 0.0001
Epoch 13 Training Loss: 84.85694122314453
Epoch 13 Validation Metrics: 
Accuracy: 95.28907922912205
Sensitivity: 0.9549180327477492
Specificity: 0.9506726456972793


100%|██████████| 205/205 [00:07<00:00, 25.81it/s]


Current learning rate: 0.0001
Epoch 14 Training Loss: 84.76195526123047
Epoch 14 Validation Metrics: 
Accuracy: 95.50321199143468
Sensitivity: 0.959016393403319
Specificity: 0.9506726456972793


100%|██████████| 205/205 [00:08<00:00, 24.06it/s]


Current learning rate: 0.0001
Epoch 15 Training Loss: 85.62690734863281
Epoch 15 Validation Metrics: 
Accuracy: 95.71734475374733
Sensitivity: 0.959016393403319
Specificity: 0.9551569506298135


**`Evaluate the classification model on test set `**

In [22]:
accuracy, sensitivity, specificity=check_accuracy(test_loader, resnet18, device=DEVICE)
print(f"Accuracy: {accuracy}, \nSensitivity: {sensitivity}, \nSpecificity: {specificity}")

Accuracy: 95.72192513368985, 
Sensitivity: 0.9656652360307797, 
Specificity: 0.9488272920906433


In [24]:
torch.save(resnet18.state_dict(), 'Luna16_classification_resnet18_15epochs.pth')

In [25]:
!zip -r /kaggle/working/Luna16_classification_results.zip /kaggle/working/Luna16_classification_results

  adding: kaggle/working/Luna16_classification_results/ (stored 0%)
  adding: kaggle/working/Luna16_classification_results/epoch_performances.csv (deflated 70%)
