In [1]:
!pip install medmnist

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple, https://pypi.ngc.nvidia.com


In [2]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms

import medmnist
from medmnist import INFO, Evaluator

In [3]:
print(f"MedMNIST v{medmnist.__version__} @ {medmnist.HOMEPAGE}")

MedMNIST v3.0.1 @ https://github.com/MedMNIST/MedMNIST/


In [7]:
data_flag = 'breastmnist'
# data_flag = 'breastmnist'
download = True

NUM_EPOCHS = 3
BATCH_SIZE = 128
lr = 0.001

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])

In [8]:
# preprocessing
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])
])

# load the data
train_dataset = DataClass(split='train', transform=data_transform, download=download)
test_dataset = DataClass(split='test', transform=data_transform, download=download)

pil_dataset = DataClass(split='train', download=download)

# encapsulate data into dataloader form
train_loader = data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
train_loader_at_eval = data.DataLoader(dataset=train_dataset, batch_size=2*BATCH_SIZE, shuffle=False)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=2*BATCH_SIZE, shuffle=False)

Using downloaded and verified file: /Users/yuqi/.medmnist/breastmnist.npz
Using downloaded and verified file: /Users/yuqi/.medmnist/breastmnist.npz
Using downloaded and verified file: /Users/yuqi/.medmnist/breastmnist.npz


In [9]:
print(train_dataset)
print("===================")
print(test_dataset)

Dataset BreastMNIST of size 28 (breastmnist)
    Number of datapoints: 546
    Root location: /Users/yuqi/.medmnist
    Split: train
    Task: binary-class
    Number of channels: 1
    Meaning of labels: {'0': 'malignant', '1': 'normal, benign'}
    Number of samples: {'train': 546, 'val': 78, 'test': 156}
    Description: The BreastMNIST is based on a dataset of 780 breast ultrasound images. It is categorized into 3 classes: normal, benign, and malignant. As we use low-resolution images, we simplify the task into binary classification by combining normal and benign as positive and classifying them against malignant as negative. We split the source dataset with a ratio of 7:1:2 into training, validation and test set. The source images of 1×500×500 are resized into 1×28×28.
    License: CC BY 4.0
Dataset BreastMNIST of size 28 (breastmnist)
    Number of datapoints: 156
    Root location: /Users/yuqi/.medmnist
    Split: test
    Task: binary-class
    Number of channels: 1
    Meaning

# We first compare their singular values

## We first use standard SVD to check their singular values

In [10]:
from SVD import standard_svd

In [11]:
x, y = train_dataset[0]

In [13]:
x.shape,y.shape

(torch.Size([1, 28, 28]), (1,))

In [14]:
type(x)

torch.Tensor

In [18]:
SVD_standard=np.zeros(10)

In [None]:
SVD

In [15]:
for i in range(10):
    x,y=train_dataset[i]
    SVD_standard, singular_values=standard_svd(x[0])

In [16]:
print(singular_values)

[1.1403920e+01 2.5492389e+00 1.7303849e+00 1.3565499e+00 1.2179061e+00
 8.9032990e-01 6.8286926e-01 6.1622620e-01 6.0441703e-01 4.5058477e-01
 3.5106349e-01 3.3365169e-01 2.6797858e-01 2.2811188e-01 2.1270064e-01
 1.7465991e-01 1.4750369e-01 1.3005732e-01 1.1633617e-01 9.2636526e-02
 7.1680062e-02 6.6214167e-02 4.7915462e-02 3.7108663e-02 3.1721704e-02
 2.2649294e-02 1.1360338e-02 4.9718725e-03]
