Ref: https://docs.medperf.org/getting_started/installation/

# Install conda

In [1]:
!pip install -q condacolab
import condacolab
condacolab.install()

[0m✨🍰✨ Everything looks OK!


# Install medperf

In [2]:
!git clone https://github.com/mlcommons/medperf.git
%cd medperf

Cloning into 'medperf'...
remote: Enumerating objects: 7706, done.[K
remote: Counting objects: 100% (7704/7704), done.[K
remote: Compressing objects: 100% (2379/2379), done.[K
remote: Total 7706 (delta 5240), reused 7301 (delta 5050), pack-reused 2[K
Receiving objects: 100% (7706/7706), 54.59 MiB | 17.54 MiB/s, done.
Resolving deltas: 100% (5240/5240), done.
/content/medperf


In [3]:
!conda create -n medperf-env python=3.9

Collecting package metadata (current_repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - done
Solving environment: | / done


  current version: 23.1.0
  latest version: 23.7.3

Please update conda by running

    $ conda update -n base -c conda-forge conda

Or to minimize the number of packages updated during conda update use

     conda install conda=23.7.3



## Package Plan ##

  environment location: /usr/local/

In [4]:
# this is important to avoide any issues or updates by colab
%%shell
eval "$(conda shell.bash hook)" # copy conda command to shell
conda activate medperf-env
python --version
which pip
pip install -e ./cli

Python 3.9.18
/usr/local/envs/medperf-env/bin/pip
Obtaining file:///content/medperf/cli
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mlcube@ git+https://github.com/mlcommons/mlcube@fb371c960938b495e939bf38b161199d529cf912#subdirectory=mlcube (from medperf==0.1.0)
  Cloning https://github.com/mlcommons/mlcube (to revision fb371c960938b495e939bf38b161199d529cf912) to /tmp/pip-install-liwv91f0/mlcube_176f831ec7924271b168bf326df8b8fa
  Running command git clone --filter=blob:none --quiet https://github.com/mlcommons/mlcube /tmp/pip-install-liwv91f0/mlcube_176f831ec7924271b168bf326df8b8fa
  Running command git rev-parse -q --verify 'sha^fb371c960938b495e939bf38b161199d529cf912'
  Running command git fetch -q https://github.com/mlcommons/mlcube fb371c960938b495e939bf38b161199d529cf912
  Running command git checkout -q fb371c960938b495e939bf38b161199d529cf912
  Resolved https://github.com/mlcommons/mlcube to commit fb371c960938b495e939bf38b161199d529cf912
  Preparing metada



In [None]:
# this is important to avoide any issues or updates by colab
%%shell
eval "$(conda shell.bash hook)" # copy conda command to shell
conda activate medperf-env
medperf --version

MedPerf version 0.1.0




In [None]:
%cd ../

/content


# Creat model

In [None]:
# this is important to avoide any issues or updates by colab
%%shell
eval "$(conda shell.bash hook)" # copy conda command to shell
conda activate medperf-env

medperf mlcube create model

MedPerf 0.1.0
project_name [Model MLCube]: 
project_slug [model_mlcube]: 
description [Model MLCube Template. Provided by MLCommons]: 
author_name [John Smith]: 
accelerator_count [0]: 
docker_image_name [docker/image:latest]: 




# Test code (Run only to test that every thing is installed good)

Ref: https://docs.medperf.org/mlcubes/mlcube_models/

In [None]:
%%writefile /content/model_mlcube/project/models.py
"""
Taken from MedMNIST/MedMNIST.
"""

import torch.nn as nn


class SimpleCNN(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(SimpleCNN, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels, 16, kernel_size=3), nn.BatchNorm2d(16), nn.ReLU()
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 16, kernel_size=3),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.layer3 = nn.Sequential(
            nn.Conv2d(16, 64, kernel_size=3), nn.BatchNorm2d(64), nn.ReLU()
        )

        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3), nn.BatchNorm2d(64), nn.ReLU()
        )

        self.layer5 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fc = nn.Sequential(
            nn.Linear(64 * 4 * 4, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


Overwriting /content/model_mlcube/project/models.py


In [None]:
%%writefile /content/model_mlcube/project/data_loader.py

import numpy as np
import torchvision.transforms as transforms
import os
from torch.utils.data import Dataset


class CustomImageDataset(Dataset):
    def __init__(self, data_path):
        self.transform = transforms.Compose(
            [transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5])]
        )
        self.files = os.listdir(data_path)
        self.data_path = data_path

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.data_path, self.files[idx])
        image = np.load(img_path)
        image = self.transform(image)
        file_id = self.files[idx].strip(".npy")
        return image, file_id


Overwriting /content/model_mlcube/project/data_loader.py


In [None]:
%cd /content/model_mlcube/project

/content/model_mlcube/project


In [None]:
import os
import sys

base = os.getcwd()

for file_ in os.listdir(os.path.join(base,"requirements")):
  sys.path.append(os.path.join(base, "requirements", file_))




In [None]:
%cd ../../

/content


In [None]:
%%writefile /content/model_mlcube/project/infer.py

import torch
import os
from models import SimpleCNN
from tqdm import tqdm
from torch.utils.data import DataLoader
from data_loader import CustomImageDataset
import json


def run_inference(data_path, parameters, output_path, weights):
    in_channels = parameters["in_channels"]
    num_classes = parameters["num_classes"]
    batch_size = parameters["batch_size"]

    # load model
    model = SimpleCNN(in_channels=in_channels, num_classes=num_classes)
    model.load_state_dict(torch.load(weights))
    model.eval()

    # load prepared data
    dataset = CustomImageDataset(data_path)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    # inference
    predictions_dict = {}
    with torch.no_grad():
        for images, files_ids in tqdm(dataloader):
            outputs = model(images)
            outputs = torch.nn.Sigmoid()(outputs)
            outputs = outputs.detach().numpy().tolist()

            for file_id, output in zip(files_ids, outputs):
                predictions_dict[file_id] = output

    # save
    preds_file = os.path.join(output_path, "predictions.json")
    with open(preds_file, "w") as f:
        json.dump(predictions_dict, f, indent=4)


Overwriting /content/model_mlcube/project/infer.py


In [None]:
%%writefile /content/model_mlcube/mlcube/workspace/parameters.yaml
in_channels: 1
num_classes: 14
batch_size: 5

Overwriting /content/model_mlcube/mlcube/workspace/parameters.yaml


In [None]:
!wget https://storage.googleapis.com/medperf-storage/chestxray_tutorial/cnn_weights.tar.gz

--2023-08-19 15:33:55--  https://storage.googleapis.com/medperf-storage/chestxray_tutorial/cnn_weights.tar.gz
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.99.128, 142.250.107.128, 74.125.20.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.99.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 878192 (858K) [application/gzip]
Saving to: ‘cnn_weights.tar.gz.1’


2023-08-19 15:33:55 (159 MB/s) - ‘cnn_weights.tar.gz.1’ saved [878192/878192]



In [None]:
!tar -xvf /content/cnn_weights.tar.gz

cnn_weights.pth


In [None]:
!mkdir /content/model_mlcube/mlcube/workspace/additional_files
!cp /content/cnn_weights.pth /content/model_mlcube/mlcube/workspace/additional_files

mkdir: cannot create directory ‘/content/model_mlcube/mlcube/workspace/additional_files’: File exists


In [None]:
%%writefile /content/model_mlcube/project/mlcube.py

"""MLCube handler file"""

import os
import sys

base = os.getcwd()

for file_ in os.listdir(os.path.join(base,"requirements")):
  sys.path.append(os.path.join(base, "requirements", file_))

import typer
import yaml

from infer import run_inference

app = typer.Typer()


@app.command("infer")
def infer(
    data_path: str = typer.Option(..., "--data_path"),
    parameters_file: str = typer.Option(..., "--parameters_file"),
    output_path: str = typer.Option(..., "--output_path"),
    weights: str = typer.Option(..., "--weights"),
):
    with open(parameters_file) as f:
        parameters = yaml.safe_load(f)

    run_inference(data_path, parameters, output_path, weights)


@app.command("hotfix")
def hotfix():
    # NOOP command for typer to behave correctly. DO NOT REMOVE OR MODIFY
    pass


if __name__ == "__main__":
    app()


Overwriting /content/model_mlcube/project/mlcube.py


In [None]:
%%writefile /content/model_mlcube/project/requirements.txt
typer==0.9.0
numpy==1.24.3
PyYAML==6.0
torch==2.0.1
torchvision==0.15.2
tqdm==4.65.0
--extra-index-url https://download.pytorch.org/whl/cpu


Overwriting /content/model_mlcube/project/requirements.txt


*NOTE:* update mlcube.yaml
weights: additional_files/cnn_weights.pth


# Overwrite mlcube.by (submission code)

pip freeze > requirements.txt

In [None]:
%%writefile /content/model_mlcube/project/mlcube.py

"""MLCube handler file"""
import typer


app = typer.Typer()


@app.command("infer")
def infer(
    data_path: str = typer.Option(..., "--data_path"),
    parameters_file: str = typer.Option(..., "--parameters_file"),
    output_path: str = typer.Option(..., "--output_path"),
    # Provide additional parameters as described in the mlcube.yaml file
    # e.g. model weights:
    # weights: str = typer.Option(..., "--weights"),
):
    # Modify the infer command as needed
    # test
    raise NotImplementedError("The infer method is not yet implemented")


@app.command("hotfix")
def hotfix():
    # NOOP command for typer to behave correctly. DO NOT REMOVE OR MODIFY
    pass


if __name__ == "__main__":
    app()


Overwriting /content/model_mlcube/project/mlcube.py


# Build

In [None]:
!pip install --target=/content/model_mlcube/project/requirements -r /content/model_mlcube/project/requirements.txt

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu
Collecting typer==0.9.0
  Using cached typer-0.9.0-py3-none-any.whl (45 kB)
Collecting numpy==1.24.3
  Using cached numpy-1.24.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
Collecting PyYAML==6.0
  Using cached PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (682 kB)
Collecting torch==2.0.1
  Using cached https://download.pytorch.org/whl/cpu/torch-2.0.1%2Bcpu-cp310-cp310-linux_x86_64.whl (195.4 MB)
Collecting torchvision==0.15.2
  Using cached https://download.pytorch.org/whl/cpu/torchvision-0.15.2%2Bcpu-cp310-cp310-linux_x86_64.whl (1.5 MB)
Collecting tqdm==4.65.0
  Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.1/77.1 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-extensions>=3.7.4.3
  Using cached typing_extensions-4.7.1-py3-no

In [None]:
!apt-get -qq install docker.io

In [None]:
# this is important to avoide any issues or updates by colab
%%shell
eval "$(conda shell.bash hook)" # copy conda command to shell
conda activate medperf-env

set -x
dockerd -b none --iptables=0 -l warn &
for i in $(seq 5); do [ ! -S "/var/run/docker.sock" ] && sleep 2 || break; done
docker network ls


cd /content/model_mlcube/mlcube
# mlcube
mlcube configure -Pdocker.build_strategy=always

kill $(jobs -p)

+ dockerd -b none --iptables=0 -l warn
++ seq 5
+ for i in $(seq 5)
+ '[' '!' -S /var/run/docker.sock ']'
+ sleep 2
[33mWARN[0m[0000] containerd config version `1` has been deprecated and will be removed in containerd v2.0, please switch to version `2`, see https://github.com/containerd/containerd/blob/main/docs/PLUGINS.md#version-header 
[33mWARN[0m[2023-08-19T16:05:47.444904173Z] failed to load plugin io.containerd.snapshotter.v1.devmapper  [33merror[0m="devmapper not configured"
[33mWARN[0m[2023-08-19T16:05:47.445197867Z] could not use snapshotter devmapper in metadata plugin  [33merror[0m="devmapper not configured"
[31mERRO[0m[2023-08-19T16:05:47.497296354Z] failed to mount overlay: invalid argument     [31mstorage-driver[0m=overlay2
[31mERRO[0m[2023-08-19T16:05:47.497532232Z] exec: "fuse-overlayfs": executable file not found in $PATH  [31mstorage-driver[0m=fuse-overlayfs
[31mERRO[0m[2023-08-19T16:05:47.503305422Z] AUFS was not found in /proc/filesystems       



# Run

In [None]:
!wget https://storage.googleapis.com/medperf-storage/chestxray_tutorial/sample_prepared_data.tar.gz

--2023-08-19 16:07:31--  https://storage.googleapis.com/medperf-storage/chestxray_tutorial/sample_prepared_data.tar.gz
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.135.128, 74.125.142.128, 173.194.202.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.135.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 45828 (45K) [application/gzip]
Saving to: ‘sample_prepared_data.tar.gz’


2023-08-19 16:07:32 (66.8 MB/s) - ‘sample_prepared_data.tar.gz’ saved [45828/45828]



In [None]:
!tar -xvf /content/sample_prepared_data.tar.gz

In [None]:
!mkdir output

In [None]:
%%shell
eval "$(conda shell.bash hook)" # copy conda command to shell
conda activate medperf-env

set -x
dockerd -b none --iptables=0 -l warn &
for i in $(seq 5); do [ ! -S "/var/run/docker.sock" ] && sleep 2 || break; done
docker network ls

cd /content/model_mlcube/mlcube

mlcube configure -Pdocker.build_strategy=always

mlcube run --task infer data_path=/content/sample_prepared_data output_path=/content/output

kill $(jobs -p)

+ dockerd -b none --iptables=0 -l warn
++ seq 5
+ for i in $(seq 5)
+ '[' '!' -S /var/run/docker.sock ']'
+ sleep 2
[33mWARN[0m[0000] containerd config version `1` has been deprecated and will be removed in containerd v2.0, please switch to version `2`, see https://github.com/containerd/containerd/blob/main/docs/PLUGINS.md#version-header 
[33mWARN[0m[2023-08-19T16:11:44.966933664Z] failed to load plugin io.containerd.snapshotter.v1.devmapper  [33merror[0m="devmapper not configured"
[33mWARN[0m[2023-08-19T16:11:44.967283091Z] could not use snapshotter devmapper in metadata plugin  [33merror[0m="devmapper not configured"
[31mERRO[0m[2023-08-19T16:11:44.994948179Z] failed to mount overlay: invalid argument     [31mstorage-driver[0m=overlay2
[31mERRO[0m[2023-08-19T16:11:44.995076646Z] exec: "fuse-overlayfs": executable file not found in $PATH  [31mstorage-driver[0m=fuse-overlayfs
[31mERRO[0m[2023-08-19T16:11:44.997771239Z] AUFS was not found in /proc/filesystems       



In [None]:
%%shell
set -x
dockerd -b none --iptables=0 -l warn &
for i in $(seq 5); do [ ! -S "/var/run/docker.sock" ] && sleep 2 || break; done

docker run docker/image
# docker images
kill $(jobs -p)

+ dockerd -b none --iptables=0 -l warn
++ seq 5
+ for i in $(seq 5)
+ '[' '!' -S /var/run/docker.sock ']'
+ sleep 2
[33mWARN[0m[0000] containerd config version `1` has been deprecated and will be removed in containerd v2.0, please switch to version `2`, see https://github.com/containerd/containerd/blob/main/docs/PLUGINS.md#version-header 
[33mWARN[0m[2023-08-19T16:20:17.404681404Z] failed to load plugin io.containerd.snapshotter.v1.devmapper  [33merror[0m="devmapper not configured"
[33mWARN[0m[2023-08-19T16:20:17.405001744Z] could not use snapshotter devmapper in metadata plugin  [33merror[0m="devmapper not configured"
[31mERRO[0m[2023-08-19T16:20:17.438016385Z] failed to mount overlay: invalid argument     [31mstorage-driver[0m=overlay2
[31mERRO[0m[2023-08-19T16:20:17.438146256Z] exec: "fuse-overlayfs": executable file not found in $PATH  [31mstorage-driver[0m=fuse-overlayfs
[31mERRO[0m[2023-08-19T16:20:17.440489332Z] AUFS was not found in /proc/filesystems       



Cannot connect to the Docker daemon at unix:///var/run/docker.sock. Is the docker daemon running?


# Docker

Ref: https://gist.github.com/mwufi/6718b30761cd109f9aff04c5144eb885

In [None]:
%%shell

# First let's update all the packages to the latest ones with the following command
sudo apt update -qq

# Now we want to install some prerequisite packages which will let us use HTTPS over apt
sudo apt install apt-transport-https ca-certificates curl software-properties-common -qq

# After that we will add the GPG key for the official Docker repository to the system
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -

# We will add the Docker repository to our APT sources
sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu bionic stable"

# Next let's update the package database with our newly added Docker package repo
sudo apt update -qq

# Finally lets install docker with the below command
sudo apt install docker-ce

# Lets check that docker is running
docker

# Originally, we did the following: (but doesn't work in Colab...)
# sudo systemctl status docker

# The output should be similar to this snippet below
# ● docker.service - Docker Application Container Engine
#    Loaded: loaded (/lib/systemd/system/docker.service; enabled; vendor preset: enabled)
#    Active: active (running) since Tue 2019-01-01 19:22:114 UTC; 1min 25s ago
#      Docs: https://docs.docker.com
#  Main PID: 10096 (dockerd)
#     Tasks: 16
#    CGroup: /system.slice/docker.service
#            ├─10096 /usr/bin/dockerd -H fd://
#            └─10113 docker-containerd --config /var/run/docker/containerd/containerd.toml

# And now that everything is good, you should be able to do:

# docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow


16 packages can be upgraded. Run 'apt list --upgradable' to see them.
ca-certificates is already the newest version (20230311ubuntu0.22.04.1).
curl is already the newest version (7.81.0-1ubuntu1.13).
software-properties-common is already the newest version (0.99.22.7).
The following NEW packages will be installed:
  apt-transport-https
0 upgraded, 1 newly installed, 0 to remove and 16 not upgraded.
Need to get 1,510 B of archives.
After this operation, 169 kB of additional disk space will be used.
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package apt-tran

