In [1]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Activation, Dropout, Flatten, Dense

import os
import cv2
from PIL import Image
import numpy as np

In [2]:
size = 150
def data_prepocessing(path):
    images = []
    file_list = os.listdir(path)
    for file_name in file_list:
        if file_name.endswith('.png'):
            img_path = os.path.join(path,file_name)
            img = cv2.imread(img_path)
            img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            resized_img = cv2.resize(img_RGB,(size,size))
            #norm_resized_img = resized_img.astype('float32') / 255.0
            images.append(resized_img)
    return np.array(images)

In [3]:
infected_images = data_prepocessing("E:\\Masters\\Semester 4\\Malaria Detection\\archive\\cell_images\\Parasitized")
noninfected_images= data_prepocessing("E:\\Masters\\Semester 4\\Malaria Detection\\archive\\cell_images\\Uninfected")

In [4]:
infected_labels = np.ones(len(infected_images))
noninfected_labels = np.zeros(len(noninfected_images))

In [5]:
X = np.concatenate((infected_images, noninfected_images))
y = np.concatenate((infected_labels, noninfected_labels))

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
from keras.utils import normalize
X_train = normalize(X_train, axis=1)
X_test = normalize(X_test, axis=1)

In [8]:
INPUT_SHAPE = (size, size, 3)   #change to (SIZE, SIZE, 3)


model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=INPUT_SHAPE))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), kernel_initializer = 'he_uniform'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), kernel_initializer = 'he_uniform'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(1))
model.add(Activation('sigmoid'))  

In [9]:
from sklearn.metrics import precision_score, recall_score

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [10]:
print(model.summary())  

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 activation (Activation)     (None, 148, 148, 32)      0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 32)        9248      
                                                                 
 activation_1 (Activation)   (None, 72, 72, 32)        0         
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 36, 32)       0         
 2D)                                                    

In [11]:
history = model.fit(X_train, 
                         y_train, 
                         batch_size = 64,
                         epochs = 5,
                         shuffle = False
                     )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
outputs= model.predict(X_test)
binary_predictions = (outputs > 0.5).astype(int)

precision = precision_score(y_test, binary_predictions)
recall = recall_score(y_test, binary_predictions)



print('Precision: ',precision)
print('Recall: ',recall)



Precision:  0.9586206896551724
Recall:  0.9548083875632682


In [13]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, binary_predictions)  # Calculate accuracy
print(accuracy)

0.9524093986459577


In [14]:
loss, accuracy = model.evaluate(X_test, y_test)



In [15]:
print("Validation loss:", loss)
print("Validation accuracy:", accuracy)

Validation loss: 0.1550319939851761
Validation accuracy: 0.9524093866348267


In [16]:
img = X_test[0]

input_img = np.expand_dims(img, axis=0)

In [19]:
pred = model.predict(input_img)
print('predicted label = ', pred)
print('actual label = ',y_test[0])

predicted label =  [[0.99998784]]
actual label =  1.0



# ResNet 50:

## Data Preprocessing

In [None]:
SIZE = 224
def data_prepocessing(path):
    images = []
    file_list = os.listdir(path)
    for file_name in file_list:
        if file_name.endswith('.png'):
            img_path = os.path.join(path,file_name)
            img = cv2.imread(img_path)
            img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            resized_img = cv2.resize(img_RGB,(SIZE,SIZE))
            #norm_resized_img = resized_img.astype('float32') / 255.0
            images.append(resized_img)
    return np.array(images)


infected_images = data_prepocessing("E:\\Masters\\Semester 4\\Malaria Detection\\archive\\cell_images\\Parasitized")
noninfected_images= data_prepocessing("E:\\Masters\\Semester 4\\Malaria Detection\\archive\\cell_images\\Uninfected")

infected_labels = np.ones(len(infected_images))
noninfected_labels = np.zeros(len(noninfected_images))

X = np.concatenate((infected_images, noninfected_images))
y = np.concatenate((infected_labels, noninfected_labels))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from keras.utils import normalize
X_train = normalize(X_train, axis=1)
X_test = normalize(X_test, axis=1)

MemoryError: Unable to allocate 22.5 GiB for an array with shape (20088, 224, 224, 3) and data type float64

## Data preprocessing using DataLoader

In [20]:
import torch
import torchvision
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split
import os
import matplotlib.pyplot as plt
from torchsummary import summary
import torchvision.models as models

In [21]:
#infected_images = data_prepocessing("E:\\Masters\\Semester 4\\Malaria Detection\\archive\\cell_images\\Parasitized")
#noninfected_images= data_prepocessing("E:\\Masters\\Semester 4\\Malaria Detection\\archive\\cell_images\\Uninfected")

mean = [0.4751, 0.4270, 0.3992]
std = [0.3097, 0.3083, 0.3183] 

data_dir = "E:\\Masters\\Semester 4\\Malaria Detection\\archive\\cell_images"

dataset = ImageFolder(data_dir,transform = transforms.Compose([
    transforms.Resize((224,224)),transforms.ToTensor(),transforms.Normalize(mean,std)])) # Loading the data into the system
    

In [22]:
batch_size = 64
val_size = int(0.2*len(dataset))
train_size = len(dataset)- val_size
train_data, val_data = random_split(dataset, [train_size, val_size])
trainloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(val_data, batch_size=64,shuffle=True)

In [23]:
# Load pre-trained ResNet-50
resnet50 = models.resnet50(pretrained=True)

# Freeze all layers except the last one
for param in resnet50.parameters():
    param.requires_grad = False
num_classes = len(dataset.classes)
resnet50.fc = nn.Linear(resnet50.fc.in_features, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet50.fc.parameters(), lr=0.001, momentum=0.9)

resnet50.train()
num_epochs = 10
for epoch in range(num_epochs):
    for inputs, labels in trainloader:
        optimizer.zero_grad()
        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation loop
    resnet50.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in valloader:
            outputs = resnet50(inputs)
            val_loss += criterion(outputs, labels).item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    print(f"Epoch [{epoch+1}/{num_epochs}] - Validation Loss: {val_loss/len(valloader):.4f}, Accuracy: {(correct/total)*100:.2f}%")

# After training loop is complete, you can save or use the trained model



Epoch [1/10] - Validation Loss: 0.2635, Accuracy: 90.48%


KeyboardInterrupt: 

In [25]:
import torch
import torch.nn as nn
from torchvision import models

# Load pre-trained ResNet-50
resnet50 = models.resnet50(pretrained=True)

# Freeze all layers except the last one
for param in resnet50.parameters():
    param.requires_grad = False

num_classes = 1  # For binary classification
resnet50.fc = nn.Linear(resnet50.fc.in_features, num_classes)

criterion = nn.BCEWithLogitsLoss()  # Use BCEWithLogitsLoss for binary classification
optimizer = torch.optim.SGD(resnet50.fc.parameters(), lr=0.001, momentum=0.9)

num_epochs = 10
for epoch in range(num_epochs):
    resnet50.train()  # Set the model to training mode
    for inputs, labels in trainloader:
        optimizer.zero_grad()
        outputs = resnet50(inputs)
        loss = criterion(outputs.view(-1), labels.float())  # BCEWithLogitsLoss expects float labels
        loss.backward()
        optimizer.step()

    # Validation loop
    resnet50.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in valloader:
            labels = labels.view(-1, 1)
            outputs = resnet50(inputs)
            val_loss += criterion(outputs, labels.float()).item()  # BCEWithLogitsLoss expects float labels
            predicted = (outputs > 0.0).float()  # Convert logits to binary predictions
            total += labels.size(0)
            correct += (predicted == labels.float()).sum().item()

    print(f"Epoch [{epoch+1}/{num_epochs}] - Validation Loss: {val_loss/len(valloader):.4f}, Accuracy: {(correct/total)*100:.2f}%")
