In [1]:
!pip install medmnist

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple, https://pypi.ngc.nvidia.com


In [1]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms

import medmnist
from medmnist import INFO, Evaluator

In [2]:
print(f"MedMNIST v{medmnist.__version__} @ {medmnist.HOMEPAGE}")

MedMNIST v3.0.1 @ https://github.com/MedMNIST/MedMNIST/


In [3]:
data_flag = 'breastmnist'
# data_flag = 'breastmnist'
download = True

NUM_EPOCHS = 3
BATCH_SIZE = 128
lr = 0.001

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])

In [4]:
# preprocessing
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])
])

# load the data
train_dataset = DataClass(split='train', transform=data_transform, download=download)
test_dataset = DataClass(split='test', transform=data_transform, download=download)

pil_dataset = DataClass(split='train', download=download)

# encapsulate data into dataloader form
train_loader = data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
train_loader_at_eval = data.DataLoader(dataset=train_dataset, batch_size=2*BATCH_SIZE, shuffle=False)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=2*BATCH_SIZE, shuffle=False)

Using downloaded and verified file: /Users/yuqi/.medmnist/breastmnist.npz
Using downloaded and verified file: /Users/yuqi/.medmnist/breastmnist.npz
Using downloaded and verified file: /Users/yuqi/.medmnist/breastmnist.npz


In [5]:
print(train_dataset)
print("===================")
print(test_dataset)

Dataset BreastMNIST of size 28 (breastmnist)
    Number of datapoints: 546
    Root location: /Users/yuqi/.medmnist
    Split: train
    Task: binary-class
    Number of channels: 1
    Meaning of labels: {'0': 'malignant', '1': 'normal, benign'}
    Number of samples: {'train': 546, 'val': 78, 'test': 156}
    Description: The BreastMNIST is based on a dataset of 780 breast ultrasound images. It is categorized into 3 classes: normal, benign, and malignant. As we use low-resolution images, we simplify the task into binary classification by combining normal and benign as positive and classifying them against malignant as negative. We split the source dataset with a ratio of 7:1:2 into training, validation and test set. The source images of 1×500×500 are resized into 1×28×28.
    License: CC BY 4.0
Dataset BreastMNIST of size 28 (breastmnist)
    Number of datapoints: 156
    Root location: /Users/yuqi/.medmnist
    Split: test
    Task: binary-class
    Number of channels: 1
    Meaning

# We first compare their singular values

## We first use standard SVD to check their singular values

In [6]:
from SVD import standard_svd

In [8]:
x, y = train_dataset[0]

In [9]:
x.shape,y.shape

(torch.Size([1, 28, 28]), (1,))

In [10]:
type(x)

torch.Tensor

In [7]:
from SVD import standard_svd
svd_standard={}
singular_values={}
for i in range(10):
    x,y=train_dataset[i]
    svd_standard[str(i)],singular_values[str(i)]=standard_svd(x[0])

## then we try our first sketching matrix

In [22]:
!pip install utils

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple, https://pypi.ngc.nvidia.com
Collecting utils
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/ad/1f/c196d21c2df061923154aecf24cab049a114394956e90c9bfbfdd398e27a/utils-1.0.2.tar.gz (13 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: utils
  Building wheel for utils (setup.py) ... [?25ldone
[?25h  Created wheel for utils: filename=utils-1.0.2-py2.py3-none-any.whl size=13905 sha256=f3128f7eba741c6c6219c021c9b77fbef7fa357cb1aa2dbe64b2c958c21ed122
  Stored in directory: /private/var/folders/y3/56xptv057xs_4rtl_q056wpc0000gn/T/pip-ephem-wheel-cache-t0pwz4x_/wheels/87/db/e8/9153d180b8efd38d97c906c56d943277874827a84c908be276
Successfully built utils
Installing collected packages: utils
Successfully installed utils-1.0.2


## Uniform Sketching Matrices

In [10]:
uniform_matrices={}
from sketching import uniform_sketching_matrix
from QB_decomposition_fixed_precision import randQB_FP_auto
for i in range (10):
    uniform_matrices[str(i)]=uniform_sketching_matrix(28, len(singular_values[str(i)])+5)
#get their QB decomposition
rank_approximated={}
Q_uniform={}
B_uniform={}
for i in range(10):
    Q,B,k=randQB_FP_auto(train_dataset[i][0][0],0.01,4,3,uniform_matrices[str(i)])
    rank_approximated[str(i)]=k
    Q_uniform[str(i)]=Q
    B_uniform[str(i)]=B

TypeError: 'numpy.float64' object cannot be interpreted as an integer

In [None]:
#then we do SVM based on the new matrix
rand_singular_values={}
for i in range(10):
    _, rand_singular_values[str(i)]=standard_svd(B)

In [None]:
#compare singular values

## Gaussian Sketching matrices