In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm_notebook as tqdm
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import matplotlib.pyplot as plt
import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms

from torchvision.models import densenet121
from torchvision.models.densenet import DenseNet121_Weights

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
BASE_PATH = 'classify_images_frontal_lateral'

In [None]:
image=[]
labels=[]
for file in os.listdir(BASE_PATH):
    if file=='frontal':
        for c in os.listdir(os.path.join(BASE_PATH, file)):
            if c!='annotations':
                image.append(c)
                labels.append('frontal')
    if file=='lateral':
        for c in os.listdir(os.path.join(BASE_PATH, file)):
            if c!='annotations':
                image.append(c)
                labels.append('lateral')
data = {'Images':image, 'labels':labels} 
data = pd.DataFrame(data)
data.head()

In [None]:
lb = LabelEncoder()
data['encoded_labels'] = lb.fit_transform(data['labels'])
data.head()

In [None]:
# Parameters
batch_size = 128
train_split = 0.7
validation_split = 0.2
test_split = 0.1
shuffle_dataset = True
random_seed = 42

In [None]:
# Dataset size
dataset_size = len(data)
indices = list(range(dataset_size))

# Calculate split indices
train_split_idx = int(np.floor(train_split * dataset_size))
validation_split_idx = int(np.floor((train_split + validation_split) * dataset_size))

if shuffle_dataset:
    np.random.seed(random_seed)
    np.random.shuffle(indices)

# Split indices
train_indices = indices[:train_split_idx]
val_indices = indices[train_split_idx:validation_split_idx]
test_indices = indices[validation_split_idx:]

In [None]:
# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

In [None]:
print(f'Number of images in training set: {len(train_indices)}')
print(f'Number of images in validation set: {len(val_indices)}')
print(f'Number of images in test set: {len(test_indices)}')

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
class MimicCXR_Dataset(Dataset):
    def __init__(self, img_data, img_path, transform=None):
        self.img_path = img_path
        self.transform = transform
        self.img_data = img_data
        
    def __len__(self):
        return len(self.img_data)
        
    def __getitem__(self, index):
        img_name = os.path.join(self.img_path, self.img_data.loc[index, 'labels'],
                                self.img_data.loc[index, 'Images'])
        image = Image.open(img_name).convert('RGB')
        label = torch.tensor(self.img_data.loc[index, 'encoded_labels'], dtype=torch.long)
        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [None]:
dataset = MimicCXR_Dataset(data,BASE_PATH,transform)

In [None]:
train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
validation_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                sampler=test_sampler)

In [None]:
def img_display(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    npimg = np.transpose(npimg, (1, 2, 0))
    return npimg

In [None]:
dataiter = iter(train_loader)
images, labels = next(dataiter)
arthopod_types = {0: 'frontal', 1: 'lateral'}
# Viewing data examples used for training
fig, axis = plt.subplots(3, 5, figsize=(15, 10))
for i, ax in enumerate(axis.flat):
    with torch.no_grad():
        image, label = images[i], labels[i]
        ax.imshow(img_display(image)) # add image
        ax.set(title = f"{arthopod_types[label.item()]}") # add label

In [None]:
class DenseNetModel(nn.Module):
    def __init__(self, num_classes=2):
        super(DenseNetModel, self).__init__()
        self.densenet = densenet121(weights=DenseNet121_Weights.DEFAULT)
        num_ftrs = self.densenet.classifier.in_features
        self.densenet.classifier = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        return self.densenet(x)

In [None]:
model = DenseNetModel().to(device)

print(model)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
def accuracy(out, labels):
    _,pred = torch.max(out, dim=1)
    return torch.sum(pred==labels).item()

In [None]:
n_epochs = 50
print_every = 10
valid_loss_min = np.Inf
val_loss = []
val_acc = []
train_loss = []
train_acc = []
total_step = len(train_loader)

for epoch in range(1, n_epochs+1):
    running_loss = 0.0
    correct = 0
    total = 0
    print(f'Epoch {epoch}\n')
    
    # Set the model to training mode
    model.train()
    
    for batch_idx, (data_, target_) in enumerate(train_loader):
        # Move data and target to GPU if available
        data_, target_ = data_.to(device), target_.to(device)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(data_)
        loss = criterion(outputs, target_)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Accumulate statistics
        running_loss += loss.item()
        _, pred = torch.max(outputs, dim=1)
        correct += torch.sum(pred == target_).item()
        total += target_.size(0)
        
        # Print progress every 20 batches
        if (batch_idx) % 20 == 0:
            print(f'Epoch [{epoch}/{n_epochs}], Step [{batch_idx}/{total_step}], Loss: {loss.item():.4f}')
    
    # Calculate accuracy and loss for the epoch
    train_acc.append(100 * correct / total)
    train_loss.append(running_loss / total_step)
    print(f'\ntrain loss: {np.mean(train_loss):.4f}, train acc: {(100 * correct / total):.4f}')

    # Validation phase
    model.eval()
    batch_loss = 0
    total_t = 0
    correct_t = 0
    
    with torch.no_grad():
        for data_t, target_t in validation_loader:
            # Move validation data to GPU
            data_t, target_t = data_t.to(device), target_t.to(device)
            
            # Forward pass
            outputs_t = model(data_t)
            loss_t = criterion(outputs_t, target_t)
            
            # Accumulate validation statistics
            batch_loss += loss_t.item()
            _, pred_t = torch.max(outputs_t, dim=1)
            correct_t += torch.sum(pred_t == target_t).item()
            total_t += target_t.size(0)
    
    # Calculate validation accuracy and loss
    val_acc.append(100 * correct_t / total_t)
    val_loss.append(batch_loss / len(validation_loader))
    network_learned = batch_loss < valid_loss_min
    print(f'validation loss: {np.mean(val_loss):.4f}, validation acc: {(100 * correct_t / total_t):.4f}\n')

    # Save model if validation loss improves
    if network_learned:
        valid_loss_min = batch_loss
        torch.save(model.state_dict(), 'model_classification_tutorial.pt')
        print('Detected network improvement, saving current model')
    model.train()

# After training, generate the classification report
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        _, pred = torch.max(output, 1)
        all_preds.extend(pred.cpu().numpy())
        all_labels.extend(target.cpu().numpy())

from sklearn.metrics import classification_report

report = classification_report(all_labels, all_preds, target_names=['Frontal', 'Lateral'])
print("Classification Report:")
print(report)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
import torch

# Switch to evaluation mode
model.eval()

# Store all predictions and true labels
all_preds = []
all_labels = []

# Turn off gradients for validation
with torch.no_grad():
    for data_t, target_t in test_loader:
        data_t, target_t = data_t.to(device), target_t.to(device)
        outputs_t = model(data_t)
        _, preds_t = torch.max(outputs_t, 1)
        all_preds.append(preds_t.cpu().numpy())
        all_labels.append(target_t.cpu().numpy())

# Flatten the list of predictions and true labels
all_preds = np.concatenate(all_preds)
all_labels = np.concatenate(all_labels)

# Generate the confusion matrix
conf_matrix = confusion_matrix(all_labels, all_preds)

# Create the heatmap
fig, ax = plt.subplots(figsize=(10, 8))
im = ax.imshow(conf_matrix, interpolation='nearest', cmap='Blues')

# Add colorbar
cbar = ax.figure.colorbar(im, ax=ax)
cbar.ax.set_ylabel('Count', rotation=-90, va="bottom")

# Set labels
ax.set(xticks=np.arange(conf_matrix.shape[1]),
       yticks=np.arange(conf_matrix.shape[0]),
       xticklabels=['Frontal', 'Lateral'],
       yticklabels=['Frontal', 'Lateral'],
       ylabel='Actual',
       xlabel='Predicted')

# Rotate the tick labels and set their alignment
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

# Loop over data dimensions and create text annotations
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        text = ax.text(j, i, str(conf_matrix[i, j]),
                       ha="center", va="center", color="black")

ax.set_title("Confusion Matrix")
fig.tight_layout()
plt.savefig("densenet121_confusion_matrix")
plt.show()

# Print the confusion matrix values
print("Confusion Matrix:")
print(conf_matrix)


In [None]:
from sklearn.metrics import classification_report

# Assuming all_preds and all_labels are already defined as in the previous code

# Generate the classification report
report = classification_report(all_labels, all_preds, target_names=['Frontal', 'Lateral'])

# Print the classification report
print("Classification Report:")
print(report)

In [None]:
accuracy_per_class = conf_matrix.diagonal() / conf_matrix.sum(axis=1)
class_names = ['Frontal', 'Lateral']

for i, class_name in enumerate(class_names):
    print(f"Accuracy for {class_name}: {accuracy_per_class[i]:.2f}")