In [1]:
import numpy as np


### Q1 
- A team of researchers has collected image data of human cells 1 to predict the malarial infected cell. The file shared ‘Datasets.zip’ has the required data. Students must work on the folder that is named after their respective roll numbers. Each folder has two subfolders ‘parasite’ and ‘uninfected’ referring to the positive and the negative classes.

In [2]:
import torch
import torch.nn as nn

class MalariaCNN(nn.Module):
    def __init__(self):
        super(MalariaCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
        self.fc1 = nn.Linear(in_features=32*26*26, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=2)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32*26*26)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = MalariaCNN()


In [3]:
model

MalariaCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=21632, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)

In [33]:
import os
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader 

class MalariaDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.image_paths = []
        self.labels = []
        
        # Get image paths and labels
        for label in os.listdir(data_dir):
            label_dir = os.path.join(data_dir, label)
            for img_name in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_name)
                self.image_paths.append(img_path)
                self.labels.append(label)
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, index):
        img_path = self.image_paths[index]
        label = self.labels[index]
        
        # Load image and convert to RGB
        with Image.open(img_path) as img:
            img = img.convert('RGB')
        
        # Apply image transformations
        # You can add your own transformations here
        transform = transforms.Compose([
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])
        img = transform(img)
        
        # Convert label to integer
        if label == 'parasite':
            label = 0
        else:
            label = 1
        
        return img, label

# Create dataset and dataloaders
train_dir = 'Datasets/ch22m548/'
# test_dir = 'path/to/testing/folder'
train_dataset = MalariaDataset(train_dir)
# test_dataset = MalariaDataset(test_dir)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [34]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x2d50efb9970>

In [35]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from tqdm import tqdm

# Define the CNN model
class MalariaCNN(nn.Module):
    def __init__(self):
        super(MalariaCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3)
        self.fc1 = nn.Linear(in_features=32*14*14, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=2)
        
    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32*14*14)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create the dataset and dataloaders
train_dir = 'Datasets/ch22m548/'
# test_dir = 'path/to/testing/folder'
train_transforms = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
test_transforms = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
train_dataset = MalariaDataset(train_dir) #, transform=train_transforms)
# test_dataset = MalariaDataset(test_dir, transform=test_transforms)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Initialize the CNN model and optimizer
model = MalariaCNN()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the model
num_epochs = 2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
for epoch in range(num_epochs):
    running_loss = 0.0
    running_acc = 0.0
    for i, (inputs, labels) in enumerate(tqdm(train_loader)):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        running_acc += torch.sum(preds == labels.data)
    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_acc.double() / len(train_dataset)
    print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch+1, num_epochs, epoch_loss, epoch_acc))


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.88it/s]


Epoch [1/2], Loss: 0.5320, Accuracy: 0.8008


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  9.55it/s]

Epoch [2/2], Loss: 0.4640, Accuracy: 0.8008





In [37]:
# Initialize the CNN model and optimizer
model = MalariaCNN()
optimizer = optim.Adam(model.parameters(), lr=0.001)

model.to(device)

# Calculate class weights for the loss function
total_count = len(train_dataset)
class_count = [0, 0]
for data in train_loader:
    _, labels = data
    class_count[0] += torch.sum(labels == 0).item()
    class_count[1] += torch.sum(labels == 1).item()
class_weights = [total_count / (2.0 * class_count[i]) for i in range(2)]
criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights).to(device))

num_epochs = 25

# Train the model with class weights
for epoch in range(num_epochs):
    running_loss = 0.0
    running_acc = 0.0
    for i, (inputs, labels) in enumerate(tqdm(train_loader)):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        running_acc += torch.sum(preds == labels.data)
    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_acc.double() / len(train_dataset)
    print('Epoch [{}/{}], Loss: {:.4f}, Accuracy: {:.4f}'.format(epoch+1, num_epochs, epoch_loss, epoch_acc))


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  9.41it/s]


Epoch [1/25], Loss: 0.6775, Accuracy: 0.5272


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  9.20it/s]


Epoch [2/25], Loss: 0.6483, Accuracy: 0.5835


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.32it/s]


Epoch [3/25], Loss: 0.6050, Accuracy: 0.7344


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  9.58it/s]


Epoch [4/25], Loss: 0.5647, Accuracy: 0.7425


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.51it/s]


Epoch [5/25], Loss: 0.5391, Accuracy: 0.7103


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.91it/s]


Epoch [6/25], Loss: 0.4831, Accuracy: 0.7183


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.95it/s]


Epoch [7/25], Loss: 0.4680, Accuracy: 0.7666


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.49it/s]


Epoch [8/25], Loss: 0.4128, Accuracy: 0.8229


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.50it/s]


Epoch [9/25], Loss: 0.3636, Accuracy: 0.8491


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.83it/s]


Epoch [10/25], Loss: 0.3871, Accuracy: 0.8370


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.26it/s]


Epoch [11/25], Loss: 0.3291, Accuracy: 0.8672


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.27it/s]


Epoch [12/25], Loss: 0.2626, Accuracy: 0.8712


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:01<00:00,  7.28it/s]


Epoch [13/25], Loss: 0.2552, Accuracy: 0.8954


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.15it/s]


Epoch [14/25], Loss: 0.2005, Accuracy: 0.9276


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.33it/s]


Epoch [15/25], Loss: 0.1805, Accuracy: 0.9215


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.05it/s]


Epoch [16/25], Loss: 0.1318, Accuracy: 0.9759


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  9.24it/s]


Epoch [17/25], Loss: 0.1136, Accuracy: 0.9557


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.80it/s]


Epoch [18/25], Loss: 0.0837, Accuracy: 0.9718


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.57it/s]


Epoch [19/25], Loss: 0.0727, Accuracy: 0.9819


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.58it/s]


Epoch [20/25], Loss: 0.0503, Accuracy: 0.9940


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.90it/s]


Epoch [21/25], Loss: 0.0414, Accuracy: 0.9899


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  9.00it/s]


Epoch [22/25], Loss: 0.0564, Accuracy: 0.9819


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.96it/s]


Epoch [23/25], Loss: 0.0557, Accuracy: 0.9799


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.87it/s]


Epoch [24/25], Loss: 0.0347, Accuracy: 0.9920


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00,  8.69it/s]

Epoch [25/25], Loss: 0.0220, Accuracy: 0.9980





In [46]:
test_dir = 'Datasets/ch22m536/'
test_dataset = MalariaDataset(test_dir) #, transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [47]:
from sklearn.metrics import precision_score, recall_score, f1_score

In [48]:
# Evaluate the model on the test dataset
model.eval()
with torch.no_grad():
    running_loss = 0.0
    running_corrects = 0.0
    true_labels = []
    pred_labels = []
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)
        true_labels += labels.cpu().numpy().tolist()
        pred_labels += preds.cpu().numpy().tolist()
    test_loss = running_loss / len(test_dataset)
    test_acc = running_corrects.double() / len(test_dataset)

# Calculate precision, recall, and F1 score
precision = precision_score(true_labels, pred_labels)
recall = recall_score(true_labels, pred_labels)
f1 = f1_score(true_labels, pred_labels)

# Print the results
print('Test Loss: {:.4f}, Test Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1 Score: {:.4f}'.format(
    test_loss, test_acc, precision, recall, f1))


Test Loss: 1.5352, Test Accuracy: 0.6721, Precision: 0.6028, Recall: 0.9195, F1 Score: 0.7282


In [49]:
from sklearn.metrics import classification_report

In [50]:
print(classification_report(true_labels, pred_labels))

              precision    recall  f1-score   support

           0       0.86      0.45      0.59       258
           1       0.60      0.92      0.73       236

    accuracy                           0.67       494
   macro avg       0.73      0.68      0.66       494
weighted avg       0.74      0.67      0.65       494

