## *Lung-Cancer-Classification-from-Histopathological-Images* ##

-> *mohammad sarhangzadeh*

In [None]:
import os
import random 
import tqdm as tqdm
import matplotlib.pyplot as plt

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataloader, Dataset

import configs
from utils import plot_train_samples
from dataset import LungCancerDataset
from model import ResNet18


In [None]:
status = torch.cuda.is_available()
print(status)

In [None]:
full_dataset = LungCancerDataset(root_dir='dataset')

In [None]:
train_split = 0.7
val_split = 0.2
test_split = 0.1
indices = np.arange(len(full_dataset))
train_indices, temp_indices, train_labels, temp_labels = train_test_split(
    indices, full_dataset.labels, stratify=full_dataset.labels, test_size=(1 - train_split)
)

val_indices, test_indices = train_test_split(
    temp_indices, stratify=temp_labels, test_size=test_split / (test_split + val_split)
)

In [None]:
train_dataset = Subset(LungCancerDataset(root_dir='dataset', transform=configs.train_transforms), train_indices)
val_dataset = Subset(LungCancerDataset(root_dir='dataset', transform=configs.val_transforms), val_indices)
test_dataset = Subset(LungCancerDataset(root_dir='dataset', transform=configs.test_transforms), test_indices)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=configs.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=configs.BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=configs.BATCH_SIZE, shuffle=False)

In [None]:
print(f"Training samples: {len(train_indices)}")
print(f"Validation samples: {len(val_indices)}")
print(f"Test samples: {len(test_indices)}")

In [None]:
plot_train_samples(train_loader)

In [None]:

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=configs.LEARNING_RATE)
model = ResNet18(num_classes=configs.NUM_CLASSES, hidden_dim=configs.HIDDEN_DIM)

In [None]:
def train_and_validate(model, train_loader, val_loader, criterion, optimizer, num_epochs=configs.EPOCHS, device=configs.DEVICE, save_path='best_model.pth'):
    model = model.to(device)
    
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    train_f1_scores = []
    val_f1_scores = []

    for epoch in range(num_epochs):
        model.train() 
        train_loss = 0.0
        correct = 0
        total = 0
        all_train_labels = []
        all_train_preds = []
        
        for images, labels in tqdm(train_loader, desc=f'Training Epoch {epoch + 1}/{num_epochs}', leave=False):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad() 
            outputs = model(images)  
            loss = criterion(outputs, labels) 
            loss.backward() 
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_train_labels.extend(labels.cpu().numpy())
            all_train_preds.extend(predicted.cpu().numpy())

        train_loss /= len(train_loader)
        train_accuracy = 100 * correct / total
        train_f1 = f1_score(all_train_labels, all_train_preds, average='weighted')

        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        train_f1_scores.append(train_f1)

        model.eval() 
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        all_val_labels = []
        all_val_preds = []
        
        with torch.no_grad(): 
            for images, labels in tqdm(val_loader, desc='Validating', leave=False):
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)  
                loss = criterion(outputs, labels) 

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

                
                all_val_labels.extend(labels.cpu().numpy())
                all_val_preds.extend(predicted.cpu().numpy())

        val_loss /= len(val_loader)
        val_accuracy = 100 * val_correct / val_total
        val_f1 = f1_score(all_val_labels, all_val_preds, average='weighted')

        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)
        val_f1_scores.append(val_f1)

        print(f'Epoch [{epoch + 1}/{num_epochs}], '
              f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Train F1 Score: {train_f1:.2f}, '
              f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%, Val F1 Score: {val_f1:.2f}')

    torch.save(model.state_dict(), save_path)
    print(f'Model saved to {save_path}')

    return train_losses, val_losses, train_accuracies, val_accuracies, train_f1_scores, val_f1_scores


In [None]:
train_and_validate(model, train_loader, val_loader, criterion, optimizer)

In [None]:
plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies, train_f1_scores, val_f1_scores, num_epochs=configs.EPOCHS)

In [None]:
model = ResNet18() 
model.load_state_dict(torch.load('lung_cancer_model.pth'))
model.eval()

evaluate_model(model, test_loader, criterion, device=configs.DEVICE)