In [3]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models 
import torchvision.transforms
from torchvision.transforms import ToTensor
import os
from PIL import Image 
import torch.nn.functional as F
from torch import optim
from tqdm import tqdm

In [4]:
def open_image(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


# define dataset directory 
path = 'retina_dataset/dataset'
#img_size = 224 
#batch_size = 32

image = Image.open('retina_dataset/dataset/1_normal/NL_001.png')
#image.show()


image_transforms = transforms.Compose([
    transforms.Resize((204, 308)),  #1232, 816
    transforms.CenterCrop(size=(204, 206)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4563, 0.2717, 0.1612], std=[0.5519, 0.3326, 0.2021])
    #transforms.Normalize(mean=[0.3066, 0.1828, 0.1091], std=[0.324, 0.1947, 0.1162])
    #transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


dataset = datasets.ImageFolder(root=path, transform=image_transforms)

In [5]:
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [0.8, 0.2])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=4, shuffle=False)

In [6]:
class CNN(nn.Module):
    def __init__(self, in_channels=3, num_classes=4):  # num class = 10
        """
        Define the layers of the convolutional neural network.

        Parameters:
            in_channels: int
                The number of channels in the input image. For MNIST, this is 1 (grayscale images).
            num_classes: int
                The number of classes we want to predict, in our case 10 (digits 0 to 9).
        """
        in_channels: 3 
        num_classes: 4
        super(CNN, self).__init__()


        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=8, kernel_size=10, stride=1, padding=0)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=8, stride=1, padding=3)

      
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, stride=2, padding=5)
        self.fc1 = nn.Linear(in_features=5408, out_features=num_classes)
    def forward(self, x):
        """
        Define the forward pass of the neural network.

        Parameters:
            x: torch.Tensor
                The input tensor.

        Returns:
            torch.Tensor
                The output tensor after passing through the network.
        """
        x = F.relu(self.conv1(x))  # Apply first convolution and ReLU activation
        x = self.pool(x)           # Apply max pooling
        x = F.relu(self.conv2(x))  # Apply second convolution and ReLU activation
        x = self.pool(x)           # Apply max pooling
        x = F.relu(self.conv3(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)  # Flatten the tensor
        x = self.fc1(x)            # Apply fully connected layer
        return x

#print(train_loader.dataset[0].size())
input_size = 126072        #204*206*3
num_classes = 4  # 4 types of eye: normal, cataracts, glaucoma, retina disease
learning_rate = 0.001
batch_size = 32
num_epochs = 75  

device = "cuda" if torch.cuda.is_available() else "cpu"

model = CNN(in_channels=3, num_classes=num_classes).to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#tensor_to_image(train_loader.dataset[0][0][1])


for epoch in range(num_epochs):
    print(f"Epoch [{epoch + 1}/{num_epochs}]")
    for batch_index, (data, targets) in enumerate(tqdm(train_loader)):

        data = data.to(device)
        targets = targets.to(device)
        scores = model(data)
        loss = criterion(scores, targets)

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

Epoch [1/75]


 41%|████▏     | 50/121 [00:12<00:18,  3.86it/s]


KeyboardInterrupt: 

In [5]:
def check_accuracy(loader, model, dataset_type):
    """
    Checks the accuracy of the model on the given dataset loader.

    Parameters:
        loader: DataLoader
            The DataLoader for the dataset to check accuracy on.
        model: nn.Module
            The neural network model.
    """
    # if loader.dataset.train:
    #     print("Checking accuracy on training data")
    # else:
    #     print("Checking accuracy on test data")

    print(f"Checking accuracy on {dataset_type} data") 

    num_correct = 0
    num_samples = 0
    model.eval()  # Set the model to evaluation mode

    with torch.no_grad():  # Disable gradient calculation
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            # Forward pass: compute the model output
            scores = model(x)
            _, predictions = scores.max(1)  # Get the index of the max log-probability
            num_correct += (predictions == y).sum()  # Count correct predictions
            num_samples += predictions.size(0)  # Count total samples

        # Calculate accuracy
        accuracy = float(num_correct) / float(num_samples) * 100
        print(f"Got {num_correct}/{num_samples} with accuracy {accuracy:.2f}%")
    
    model.train()  # Set the model back to training mode

# Final accuracy check on training and test sets
check_accuracy(train_loader, model, 'training')
check_accuracy(test_loader, model, 'test')


Checking accuracy on training data
Got 481/481 with accuracy 100.00%
Checking accuracy on test data
Got 63/120 with accuracy 52.50%
