# [Malaria Detection]

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import torch
from torch import nn, optim, cuda
from torchvision import transforms, datasets, models
from sklearn.model_selection import train_test_split

## Preprocessing

In [2]:
#parasite_dir=os.path.join(os.getcwd(),'malaria_classification\\cell_images\\Parasitized')
#uninfect_dir=os.path.join(os.getcwd(),'malaria_classification\\cell_images\\Uninfected')
dataset_dir=os.path.join(os.getcwd(),'malaria_classification\\cell_images\\cell_images')
#print(parasite_dir)
#print(uninfect_dir)
print(dataset_dir)

C:\Users\user\Downloads\The_Agency Beginner Project\malaria_classification\cell_images\cell_images


In [3]:
#Data Augmentation
def get_data_loader(data_dir,batch_size):
    
    #different transformaitons for train and validation sets
    transform={'train':transforms.Compose([transforms.Resize((256,256)),
                           transforms.CenterCrop(224),
                           transforms.ToTensor(),
                                      transforms.Normalize((0.485,0.486,0.406),(0.229, 0.224, 0.225))]),

    'test':transforms.Compose([transforms.Resize((256,256)),
                           transforms.CenterCrop(224),
                                      transforms.RandomRotation([0,180]),
                                      transforms.RandomHorizontalFlip(p=0.5),
                           transforms.ToTensor(),
                                      transforms.Normalize((0.485,0.486,0.406),(0.229, 0.224, 0.225))])
    }
    
    #read in the images
    trainset=datasets.ImageFolder(root=data_dir,transform=transform['train'])
    testset=datasets.ImageFolder(root=data_dir,transform=transform['test'])
    
    #split 80/20
    indices=list(range(len(trainset)))
    np.random.shuffle(indices)
    split=int(np.floor(len(trainset)*0.8))
    train_idx,test_idx=indices[:split],indices[split:]
    # define samplers for obtaining training and validation batches
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_idx)
    test_sampler = torch.utils.data.sampler.SubsetRandomSampler(test_idx)
    
    #load the images as dataloader obj
    train_dataloader=torch.utils.data.DataLoader(dataset=trainset,batch_size=batch_size,shuffle=False,sampler=train_sampler)
    test_dataloader=torch.utils.data.DataLoader(dataset=testset,batch_size=batch_size,shuffle=False,sampler=test_sampler)
    
    return train_dataloader,test_dataloader

In [4]:
train_dataloader,test_dataloader=get_data_loader(dataset_dir,batch_size=1)

## Model Structure

In [5]:
class Malaria_Resnet():
    def __init__(self,weights):
        self.weights=weights
        self.model=models.resnet50(weights=self.weights)
    
    
    def Config(self):
        #freeze parameters
        for params in self.model.parameters():
            params.requires_grad=False
        #change last layer to 2 classes
        in_features = self.model.fc.in_features
        self.model.fc =nn.Linear(in_features, 2 , bias=True)
        print(self.model.fc)
        
        #unfreeze the last layer
        for params in self.model.fc.parameters():
            params.required_grad=True

    
    def Train(self,epoch,trainset,testset,lr=0.001):
        self.epoch=epoch
        self.trainset=trainset
        criterion=nn.CrossEntropyLoss()
        optimizer=optim.SGD(self.model.parameters(),lr=lr,momentum=0)
        
        for epoch in range(self.epoch):
            self.model.train()
            running_loss=0
            for i, data in enumerate(self.trainset,0):
                inputs,labels=data
                # zero the parameter gradients
                optimizer.zero_grad()
                # forward + backward + optimize
                outputs = self.model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                # print statistics
                running_loss += loss.item()
                if i % 2000 == 1999:    # print every 2000 mini-batches
                    print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                    running_loss = 0.0
            self.model.eval()
            print('Accuracy:',self.Eval(testset))  
        print('Finished Training')   
    
    def Eval(self,testset):
        correct=0
        total=0
        self.testset=testset
        # since we're not training, we don't need to calculate the gradients for our outputs
        with torch.no_grad():
            for images, labels in testset:
                # calculate outputs by running images through the network
                outputs = self.model(images)
                # the class with the highest energy is what we choose as prediction
                val,predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += int(predicted == labels)

        print(f'Accuracy of the network: {100 * correct // total} %')

In [None]:
model=Malaria_Resnet(models.ResNet50_Weights.IMAGENET1K_V2)
model.Config()
model.Train(100,train_dataloader,test_dataloader,lr=0.005)

Linear(in_features=2048, out_features=2, bias=True)
