In [9]:
import medmnist
from medmnist import INFO
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

In [10]:
def Download_Datasets():
    """
    This function downloads the BreastMNIST and BloodMNIST datasets
    and saves them as mentioned in the Assignment Handout in the following directory:
    Datasets/BreastMNIST/breastmnist.npz
    Datasets/BloodMNIST/bloodmnist.npz
    """
    datasets = {
        'BreastMNIST': 'breastmnist',
        'BloodMNIST': 'bloodmnist'
        }
    root = 'Datasets'
    os.makedirs(root, exist_ok = True)

    for f_name , d_name in datasets.items():
        dir = os.path.join(root, f_name)
        os.makedirs(dir, exist_ok = True)
        print(f'Downloading {f_name} Dataset')
        info = INFO[d_name]
        DatasetClass = getattr(medmnist, info['python_class'])
        train = DatasetClass(root = dir, split = 'train', download = True)
        val = DatasetClass(root = dir, split = 'val', download = True)
        test = DatasetClass(root = dir, split = 'test', download = True)
        print(f'Download Complete')
        return train, val, test

In [11]:
tr,vl,tt = Download_Datasets()

Downloading BreastMNIST Dataset
Download Complete


In [12]:
def PreProcess_SK(task):
    """
    This function uses the previously downloading npz Dataset files, and does
    needed Data Preprocessing steps for Sci-Kit learn models like decision trees,
    bagged trees, the steps include normalization (0-1) and flattening (28x28 -> 784)
    """
    if task.upper() == 'A':
        path = os.path.join('Datasets','BreastMNIST','breastmnist.npz')
    elif task.upper() == 'B':
        path = os.path.join('Datasets','BloodMNIST','bloodmnist.npz')
    else:
        return
    
    data = np.load(path)

    X_train = data["train_images"].astype(np.float32) / 255.0
    X_val = data["val_images"].astype(np.float32) / 255.0
    X_test = data["test_images"].astype(np.float32) / 255.0
    
    X_train = X_train.reshape(X_train.shape[0], -1)
    X_val = X_val.reshape(X_val.shape[0], -1)
    X_test = X_test.reshape(X_test.shape[0], -1)

    y_train = data["train_labels"].squeeze().astype(np.int64)
    y_val = data["val_labels"].squeeze().astype(np.int64)
    y_test = data["test_labels"].squeeze().astype(np.int64)

    return X_train, X_val, X_test, y_train, y_val, y_test

In [13]:
a,b,c,d,e,f = PreProcess_SK("A")

In [23]:
f.shape

(156,)

In [None]:
def Random_Sample_Visual(x,y,loader,name):
    fig , axs = plt.subplots(x,y)
    images, labels = next(iter(loader))
    images, labels = next(iter(loader))
    for i in range(x):
        for j in range(y):
            with torch.no_grad():
                index = torch.randint(len(images) , size=(1,)).item()
                img = images[index]
                label = labels[index]
                axs[i][j].imshow(((img+1)/2).squeeze().permute(1, 2, 0), cmap='binary')
                axs[i][j].set_title(f'{label}')
                axs[i][j].set_xticks([])
                axs[i][j].set_yticks([])
    plt.suptitle('Random Samples from Testing Data with Prediction')
    fig.tight_layout()
    dir = os.path.join('Results','name')
    plt.savefig(dir)