In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
print(os.listdir("/kaggle/input/cell-images-for-detecting-malaria/cell_images"))


['Uninfected', 'Parasitized', 'cell_images']


In [3]:
import numpy as np 
import matplotlib.pyplot as plt

import torch 
import torchvision 
from torchvision import datasets, transforms 
from torchvision.datasets import ImageFolder 
import torch.nn as nn 
import torch.nn.functional as F 
from torch.utils.data import DataLoader 
from torch.utils.data.sampler import SubsetRandomSampler 

In [4]:
if(torch.cuda.is_available()):
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)

cuda


**Data Preprocessing and Augmentation**

In [5]:
stats = ((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))

transform = transforms.Compose([
    transforms.Resize((120,120)),
    transforms.ColorJitter(0.05),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(*stats, inplace=True)
])

In [6]:
input_dir = '/kaggle/input/cell-images-for-detecting-malaria/cell_images/cell_images'
train_set = ImageFolder(input_dir, transform=transform)

In [7]:
test_size = 0.2

num_train = len(train_set)
indices = list(range(num_train))
np.random.shuffle(indices)

test_split = int(np.floor((test_size) * num_train))
test_index, train_index = indices[:test_split - 1], indices[test_split - 1:]

train_sampler = SubsetRandomSampler(train_index)
test_sampler = SubsetRandomSampler(test_index)

train_loader = DataLoader(train_set, sampler=train_sampler, batch_size=128)
test_loader = DataLoader(train_set, sampler=test_sampler, batch_size=64)
print("Images in Test set: {}\nImages in Train set: {}".format(len(test_index), len(train_index)))

Images in Test set: 5510
Images in Train set: 22048


In [8]:
classes=['infected','uninfected']

# Building the Model

Our model is going to have 
- 3 same conv layers iwth max-pooling
- 3 fully connected layers 
- Dropout with a probability of 0.5 
- BatchNormalization after the conv operations 
- ReLU activation function 

In [9]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

class MalariaCellDetectBase(nn.Module):
    def train_step(self, batch):
        images, labels = batch
        out = self(images)
        loss = F.cross_entropy(out, labels)
        return loss
    
    def validation_step(self, batch):
        images, labels = batch
        out = self(images)
        loss = F.cross_entropy(out, labels)
        acc = accuracy(out, labels)
        return {'val_loss': loss.detach(), 'val_acc': acc}
    
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['train_loss'], result['val_loss'], result['val_acc']))
        
    def predict(self, batch): 
        images, labels = batch 
        out = self(images)
        acc = accuracy(out, lables)
        return acc
    def predict2(self, image): 
        out = self(image)
        acc = accuracy(out, labels)
        return acc

In [10]:
# class MalariaNet(MalariaCellDetectBase):
    
#     def __init__(self):
#         super(MalariaNet, self).__init__()
        
#         self.layer1 = nn.Sequential(
#             nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=2),
#             nn.BatchNorm2d(16),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2, stride=2)
#         )
        
#         self.layer2 = nn.Sequential(
#             nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
#             nn.BatchNorm2d(32),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2, stride=2)
#         )
        
#         self.layer3 = nn.Sequential(
#             nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2, stride=2)  
#         )
        
#         self.flatten = nn.Flatten()
        
#         self.fc1 = nn.Linear(64*15*15, 512)
#         self.fc2 = nn.Linear(512, 128)
#         self.fc3 = nn.Linear(128, 2)
#         self.drop = nn.Dropout(0.2)
            
#     def forward(self, x):
#         out = self.layer1(x)
#         out = self.layer2(out)
#         out = self.layer3(out)
        
#         out = self.flatten(out)
        
#         out = self.fc1(out)
#         out = F.relu(out)
#         out = self.drop(out)
        
#         out = self.fc2(out)
#         out = F.relu(out)
#         out = self.drop(out)
        
#         out = self.fc3(out)
        
#         return out       

In [11]:
class MalariaNet(MalariaCellDetectBase):
    
    def __init__(self):
        super(MalariaNet, self).__init__()
        
        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=1, padding=2) 
        self.batch1 = nn.BatchNorm2d(16)
        self.relu1 = nn.ReLU()
        self.max1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2)
        self.batch2 = nn.BatchNorm2d(32)
        self.relu2 = nn.ReLU()
        self.max2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.batch3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU()
        self.max3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.flatten = nn.Flatten()
        
        self.fc1 = nn.Linear(64*15*15, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 2)
        self.drop = nn.Dropout(0.2)
            
    def forward(self, x):
        out = self.conv1(x)
        out = self.batch1(out)
        out = self.relu1(out)
        out = self.max1(out)
        
        out = self.conv2(out)
        out = self.batch2(out)
        out = self.relu2(out)
        out = self.max2(out)
        
        out = self.conv3(out)
        out = self.batch3(out)
        out = self.relu3(out)
        out = self.max3(out)
        
        out = self.flatten(out)
        
        out = self.fc1(out)
        out = F.relu(out)
        out = self.drop(out)
        
        out = self.fc2(out)
        out = F.relu(out)
        out = self.drop(out)
        
        out = self.fc3(out)
        
        return out       

In [12]:
model = MalariaNet()
model

MalariaNet(
  (conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (batch1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (max1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (batch2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (max2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batch3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (max3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=14400, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
 

# Defining some helper functions 

In [13]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [14]:
device = get_default_device()
device

device(type='cuda')

In [15]:
train_loader = DeviceDataLoader(train_loader, device)
test_loader = DeviceDataLoader(test_loader, device)
to_device(model, device)

MalariaNet(
  (conv1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (batch1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (max1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (batch2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu2): ReLU()
  (max2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (batch3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (max3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=14400, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
 

# Training and Testing the Model 

In [16]:
learning_rate = 0.0001
optimizer = torch.optim.Adam
epochs = 20

In [17]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        model.train()
        train_losses = []
        for batch in train_loader:
            loss = model.train_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [18]:
model = to_device(MalariaNet(), device)

In [19]:
history = fit(epochs, learning_rate, model, train_loader, test_loader, opt_func=optimizer)

Epoch [0], train_loss: 0.4008, val_loss: 0.2116, val_acc: 0.9134
Epoch [1], train_loss: 0.2063, val_loss: 0.1641, val_acc: 0.9463
Epoch [2], train_loss: 0.1779, val_loss: 0.1380, val_acc: 0.9544
Epoch [3], train_loss: 0.1594, val_loss: 0.1217, val_acc: 0.9601
Epoch [4], train_loss: 0.1493, val_loss: 0.1457, val_acc: 0.9490
Epoch [5], train_loss: 0.1448, val_loss: 0.1250, val_acc: 0.9568
Epoch [6], train_loss: 0.1410, val_loss: 0.1264, val_acc: 0.9549
Epoch [7], train_loss: 0.1373, val_loss: 0.1170, val_acc: 0.9596
Epoch [8], train_loss: 0.1324, val_loss: 0.1197, val_acc: 0.9589
Epoch [9], train_loss: 0.1285, val_loss: 0.1316, val_acc: 0.9572
Epoch [10], train_loss: 0.1269, val_loss: 0.1156, val_acc: 0.9618
Epoch [11], train_loss: 0.1224, val_loss: 0.1081, val_acc: 0.9625
Epoch [12], train_loss: 0.1233, val_loss: 0.1194, val_acc: 0.9640
Epoch [13], train_loss: 0.1210, val_loss: 0.1051, val_acc: 0.9650
Epoch [14], train_loss: 0.1179, val_loss: 0.1027, val_acc: 0.9634
Epoch [15], train_lo

In [20]:
model = model.to(torch.device('cpu'))

In [21]:
# torch.save(model.state_dict(), '/kaggle/working/model_weights.pth')

In [22]:
print(type(model))

<class '__main__.MalariaNet'>


In [23]:
import joblib
# Save RL_Model to file in the current working directory

joblib_file = "joblib_Malarianet.joblib"  
joblib.dump(model, joblib_file)

['joblib_Malarianet.joblib']

In [24]:
# # Load from file

# joblib_Malarianet = joblib.load(joblib_file)
# joblib_Malarianet

In [25]:
joblib.__version__


'1.3.2'

In [26]:
torch.save(model, "/kaggle/working/malarianet.pt")

In [27]:
import pickle 

In [28]:
pickle.dump(model, open('model.pkl', 'wb'))

In [None]:
model