In [None]:
!pip install mlflow --quiet
!pip install pyngrok --quiet

In [None]:
import mlflow
import mlflow.pytorch
from pyngrok import ngrok
from getpass import getpass
from PIL import Image
import io
import gc
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from datasets import load_dataset

In [None]:
get_ipython().system_raw("mlflow ui --port 5000 &")
mlflow.pytorch.autolog()

# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken (optional)
# Get your authtoken from https://dashboard.ngrok.com/auth
NGROK_AUTH_TOKEN = "2W9h6RsU2rIUrNR4ZXvwmlUnzt5_MniTnrTgWKSzJVioT3aV"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Open an HTTPs tunnel on port 5000 for http://localhost:5000
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def transform_data(dataset,transform):
    dataset2 = [[0,''] for i in range(len(dataset))]

    for i in range(len(dataset)):
        image_bytes = dataset[i]['image']['bytes'] # Get bytes
        image_pil = Image.open(io.BytesIO(image_bytes)) # Convert bytes to PIL Image
        dataset2[i] = [transform(image_pil),dataset[i]['label']]
    return dataset2

In [None]:
def data_loader(dataset,
                batch_size,
                random_seed=42,
                valid_size=0.1,
                shuffle=True,
                test=False):
    
    print("Transforming data")
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize the image to (224, 224)
        transforms.ToTensor(),          # Convert PIL Image to tensor
    ])

    if test:
        data_loader = torch.utils.data.DataLoader(
            transform_data(dataset,transform), batch_size=batch_size, shuffle=shuffle
        )

        return data_loader

    # load the dataset
    train_dataset = transform_data(dataset,transform)

    valid_dataset = train_dataset

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(42)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)
 
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

In [None]:
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes = 4):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Sequential(
                        nn.Conv2d(1, 64, kernel_size = 7, stride = 2, padding = 3),
                        nn.BatchNorm2d(64),
                        nn.ReLU())
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._make_layer(block, 64, layers[0], stride = 1)
        self.layer1 = self._make_layer(block, 128, layers[1], stride = 2)
        self.layer2 = self._make_layer(block, 256, layers[2], stride = 2)
        self.layer3 = self._make_layer(block, 512, layers[3], stride = 2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512, num_classes)
        
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [None]:
def train(train_loader, model,criterion,optimizer,params,mlflow):
    
    print("Start training")
    
    for epoch in range(params['num_epochs']):
        for i, (images, labels) in enumerate(train_loader):  
            # Move tensors to the configured device
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            del images, labels, outputs
            torch.cuda.empty_cache()
            gc.collect()
        
        # Compute and store train loss
        mlflow.log_metric('train_loss',loss.item())
        print ('Epoch [{}/{}], Loss: {:.4f}' 
                       .format(epoch+1, params['num_epochs'], loss.item()))
    
    return model,optimizer
    

In [None]:
def validation(valid_loader,model,mlflow):
    
    print("Start validation")
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs

        mlflow.log_metric('val_acc',100*correct/total)
        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total)) 

In [None]:
def save_model(model,optimizer,name):
    print("Save model "+ name)
    checkpoint = {'model': model,
              'state_dict': model.state_dict(),
              'optimizer' : optimizer.state_dict()}
    torch.save(checkpoint, name+'.pth')

In [None]:
def do_experiment(train_loader,valid_loader,model,params,criterion,optimizer,idx):
    mlflow.log_params(params)

    total_step = len(train_loader)
    model,optimizer = train(train_loader, model,criterion,optimizer,params,mlflow)
    
    validation(valid_loader,model,mlflow)

    mlflow.set_tag("Experiment group","Experimento"+idx)
    
    save_model(model,optimizer,'Model'+idx)

In [None]:
mlflow.set_tracking_uri(str(ngrok_tunnel.public_url))
mlflow.set_experiment("experiment1")

print("Loading datasets")
dataset_train = load_dataset('Falah/Alzheimer_MRI',split='train')
dataset_test = load_dataset('Falah/Alzheimer_MRI',split='test')

print("Transforming data")
train_loader, valid_loader = data_loader(dataset=dataset_train,
                                         batch_size=64)

test_loader = data_loader(dataset=dataset_test,
                              batch_size=64,
                              test=True)

print("Creating ResNet")
model = ResNet(ResidualBlock, [3, 4, 6, 3]).to(device)

params= {
        'num_classes':4,
        'num_epochs':[1,2],
        'batch_size':16,
        'learning_rate':0.01
    }

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=params['learning_rate'], weight_decay = 0.001, momentum = 0.9) 


# Train the model
total_step = len(train_loader)
num_runs = 2
for i in range(num_runs):
    with mlflow.start_run():
        print("Start run number "+str(i))
        params['num_epochs'] = i
        mlflow.log_params(params)
        do_experiment(train_loader,valid_loader,model,params,criterion,optimizer,str(i))