In [13]:
# Create the .kaggle directory in the home folder if it doesn't already exist
!mkdir -p ~/.kaggle

# Copy the kaggle.json file (your API key) to the .kaggle directory
!cp kaggle.json ~/.kaggle/

# Change the file permissions to make sure it's only readable by the owner
# This is required by Kaggle to authenticate securely
!chmod 600 ~/.kaggle/kaggle.json


In [14]:
# Download the "covid19-image-dataset" dataset from Kaggle using the Kaggle API
!kaggle datasets download pranavraikokte/covid19-image-dataset


Dataset URL: https://www.kaggle.com/datasets/pranavraikokte/covid19-image-dataset
License(s): CC-BY-SA-4.0
covid19-image-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [15]:
# Unzip the downloaded dataset file
!unzip covid19-image-dataset.zip -d ./covid19-image-dataset


Archive:  covid19-image-dataset.zip
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0100.jpeg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0102.jpeg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0105.png  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0106.jpeg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0108.jpeg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0111.jpg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0112.jpg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0113.jpg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0115.jpeg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0118.jpeg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0119.jpeg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test/Covid/0120.jpg  
  inflating: ./covid19-image-dataset/Covid19-dataset/test

In [16]:
# Import the OS module to handle file paths and directories
import os

# Import PyTorch core library
import torch

# Import PyTorch's neural network module
import torch.nn as nn

# Import PyTorch's optimization module
import torch.optim as optim

# Import datasets and transforms from torchvision for image preprocessing
from torchvision import datasets, transforms

# Import DataLoader to load data in batches
from torch.utils.data import DataLoader

# Import ImageFolder to load images from a directory structure
from torchvision.datasets import ImageFolder

# Import accuracy_score metric from scikit-learn to evaluate model performance
from sklearn.metrics import accuracy_score


In [17]:
# Define a series of image transformations:
# - Convert images to grayscale with 1 output channel
# - Resize all images to 28x28 pixels
# - Convert images to PyTorch tensors
# - Normalize pixel values to range [-1, 1]
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Set the directories where training and testing images are stored
train_dir = '/content/Covid19-dataset/train'
test_dir = '/content/Covid19-dataset/test'

# Load training and testing datasets using ImageFolder, applying the transform
train_dataset = ImageFolder(train_dir, transform=transform)
test_dataset = ImageFolder(test_dir, transform=transform)

# Create DataLoaders to load data in batches for training and testing
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Get the number of classes (e.g., COVID, Normal, Viral Pneumonia)
num_classes = len(train_dataset.classes)

# Print out the class names
print("classes:", train_dataset.classes)


classes: ['Covid', 'Normal', 'Viral Pneumonia']


In [18]:
# Define a Convolutional Neural Network model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        # First convolutional layer: 1 input channel (grayscale), 32 output channels, 3x3 kernel
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)

        # Second convolutional layer: 32 input channels, 64 output channels, 3x3 kernel
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)

        # Third convolutional layer: 64 input channels, 128 output channels, 3x3 kernel
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)

        # Fully connected layer: input from 128 feature maps of size 3x3, output 256
        self.fc1 = nn.Linear(128 * 3 * 3, 256)

        # Output layer: from 256 to number of classes
        self.fc2 = nn.Linear(256, num_classes)

        # Dropout layer to prevent overfitting (50% dropout rate)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # Apply first conv layer + ReLU + max pooling
        x = torch.relu(self.conv1(x))
        x = torch.max_pool2d(x, 2)

        # Apply second conv layer + ReLU + max pooling
        x = torch.relu(self.conv2(x))
        x = torch.max_pool2d(x, 2)

        # Apply third conv layer + ReLU + max pooling
        x = torch.relu(self.conv3(x))
        x = torch.max_pool2d(x, 2)

        # Flatten the output for the fully connected layer
        x = x.view(-1, 128 * 3 * 3)

        # Fully connected layer with ReLU activation
        x = torch.relu(self.fc1(x))

        # Apply dropout
        x = self.dropout(x)

        # Output layer
        x = self.fc2(x)

        return x


In [21]:
# Set the device to GPU if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create an instance of the CNN model and move it to the chosen device
model = CNNModel().to(device)

# Define the loss function (CrossEntropyLoss is used for multi-class classification)
criterion = nn.CrossEntropyLoss()

# Use Adam optimizer with a learning rate of 0.001
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Set the number of training epochs
num_epochs = 5

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode

    running_loss = 0.0  # Total loss for the epoch
    correct = 0         # Number of correct predictions
    total = 0           # Total number of samples

    # Iterate through the training data
    for inputs, labels in train_loader:
        # Move data to the selected device (CPU or GPU)
        inputs, labels = inputs.to(device), labels.to(device)

        # Clear gradients from previous step
        optimizer.zero_grad()

        # Forward pass: compute model output
        outputs = model(inputs)

        # Calculate loss between predictions and true labels
        loss = criterion(outputs, labels)

        # Backward pass: compute gradients
        loss.backward()

        # Update weights
        optimizer.step()

        # Accumulate loss
        running_loss += loss.item()

        # Get predicted class by taking the index of the max log-probability
        _, predicted = torch.max(outputs.data, 1)

        # Update total and correct counts
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Calculate average loss and accuracy for the epoch
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total

    # Print statistics for the epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')


Epoch [1/5], Loss: 1.0751, Accuracy: 45.82%
Epoch [2/5], Loss: 0.9467, Accuracy: 57.77%
Epoch [3/5], Loss: 0.6399, Accuracy: 80.08%
Epoch [4/5], Loss: 0.5520, Accuracy: 76.49%
Epoch [5/5], Loss: 0.3760, Accuracy: 84.46%


In [22]:
# Set the model to evaluation mode (disables dropout, etc.)
model.eval()

# Initialize counters for correct predictions and total samples
correct = 0
total = 0

# Disable gradient computation for evaluation (saves memory and computation)
with torch.no_grad():
    for inputs, labels in test_loader:
        # Move inputs and labels to the appropriate device
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass to get predictions
        outputs = model(inputs)

        # Get the index of the class with the highest score
        _, predicted = torch.max(outputs.data, 1)

        # Update total and correct prediction counts
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calculate and print the overall accuracy on the test set
test_accuracy = 100 * correct / total
print(f'Test Accuracy: {test_accuracy:.2f}%')


Test Accuracy: 77.27%
