# Download dataset

In [None]:
!pip install medmnist
!pip install matplotlib
!pip install torch
!pip install opencv-python

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
import medmnist
from medmnist import PathMNIST
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
from skimage.util import view_as_windows
from PIL import Image

In [2]:



transform = transforms.Compose([transforms.ToTensor()])
train_dataset = PathMNIST(split='train', download=True, transform=transform)
val_dataset = PathMNIST(split='val', download=True, transform=transform)
test_dataset = PathMNIST(split='test', download=True, transform=transform)

# compress

In [None]:
train_images = np.array([np.array(img) for img, _ in train_dataset])
train_labels = np.array([label for _, label in train_dataset])
test_images = np.array([np.array(img) for img, _ in test_dataset])
test_labels = np.array([label for _, label in test_dataset])
val_images = np.array([np.array(img) for img, _ in val_dataset])
val_labels = np.array([label for _, label in val_dataset])
np.savez_compressed("./pathmnist_train.npz",
                    train_images=train_images, train_labels=train_labels)
np.savez_compressed("./pathmnist_test.npz",
                    test_images=test_images, test_labels=test_labels)
np.savez_compressed("./pathmnist_val.npz",
                    val_images=val_images, val_labels=val_labels)


  train_images = np.array([np.array(img) for img, _ in train_dataset])


In [7]:
lo = np.load("./pathmnist_train.npz")
print(lo['train_images'].shape)

(89996, 3, 28, 28)


# data

In [9]:
all_train_images = []
all_train_labels = []
for i in range(len(train_dataset)):
    image, label = train_dataset[i]  
    all_train_images.append(image.numpy())
    all_train_labels.append(label)
all_train_images = np.array(all_train_images)
all_train_labels = np.array(all_train_labels)
all_test_images = []
all_test_labels = []
for i in range(len(test_dataset)):
    image, label = test_dataset[i]  
    all_test_images.append(image.numpy())
    all_test_labels.append(label)
all_test_images = np.array(all_test_images)
all_test_labels = np.array(all_test_labels)


In [4]:
print(all_train_images.shape)

(89996, 3, 28, 28)


In [10]:
max_size = 5000
train_images = all_train_images[:max_size]
train_labels = all_train_labels[:max_size].flatten()
test_images = all_test_images[:max_size]
test_labels = all_test_labels[:max_size].flatten()
train_labels = [str(x) for x in train_labels]
test_labels = [str(x) for x in test_labels]

# Feature extraction

In [None]:
def generate_feature(images, labels, kernel, step):
    image = images[0]
    _, height, width = image.shape
    column_name = ["mean", "var"]
    for c,channel in enumerate(image):
        number_of_block = 0
        for h in range(0, height - kernel + 1, step):
            for w in range(0, width - kernel + 1, step):
                column_name.append(f"Channel {c} - block {number_of_block} mean")
                column_name.append(f"Channel {c} - block {number_of_block} var")        
                number_of_block += 1
    column_name.append(f"label")
    res_df = pd.DataFrame(columns=column_name)
    for i, image in enumerate(images):
        rec = [np.mean(image), np.var(image)]
        for c, channel in enumerate(image):
            windows = view_as_windows(channel, (kernel, kernel), step)
            means = np.mean(windows, axis=(-2, -1))
            vars = np.var(windows, axis=(-2, -1)) 
            rec.extend(means.flatten())  
            rec.extend(vars.flatten())
        rec.append(labels[i])
        res_df.loc[len(res_df)] = rec
    return res_df


# Training model

In [15]:
train = generate_feature(train_images, train_labels, 7, 7)
test = generate_feature(test_images, test_labels, 7, 7)

In [16]:
print(train)

          mean       var  Channel 0 - block 0 mean  Channel 0 - block 0 var  \
0     0.847772  0.001735                  0.864106                 0.863625   
1     0.675400  0.014698                  0.712125                 0.729652   
2     0.742851  0.010313                  0.816967                 0.839936   
3     0.690086  0.018125                  0.851381                 0.839536   
4     0.733620  0.017086                  0.825770                 0.871869   
...        ...       ...                       ...                      ...   
4995  0.806166  0.003535                  0.811525                 0.833614   
4996  0.553311  0.016775                  0.629532                 0.644418   
4997  0.608378  0.038332                  0.610244                 0.622409   
4998  0.518996  0.017043                  0.580872                 0.586074   
4999  0.755364  0.018762                  0.822009                 0.806643   

      Channel 0 - block 1 mean  Channel 0 - block 1