# PytorchModulePCA

## Install

pip install git+https://github.com/FrancescoSaverioZuppichini/PytorchModulePCA.git


## Getting started

First we need to load `PytorchModulePCA`

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
from PytorchModulePCA import PytorchModulePCA

In [3]:
%matplotlib notebook
plt.rcParams['figure.figsize'] = [9, 9]

In [4]:
TRAIN = True

Then we need some data to work with, let's use the CIFAR10 dataset

## Dataset

In [5]:
from torchvision.transforms import Compose, ToTensor, Resize, Grayscale, RandomHorizontalFlip, RandomVerticalFlip, Normalize

from torchvision.datasets import MNIST, CIFAR10
from fastai.vision import *
from torch.utils.data import DataLoader

train_tr = Compose([RandomHorizontalFlip(), RandomVerticalFlip(), ToTensor(), Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
tr = Compose([ToTensor(), Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
train_ds = CIFAR10(root='~/Documents/datasets/', download=True, transform=tr)
train_dl = DataLoader(train_ds, num_workers=14, batch_size=128, shuffle=True)

val_ds = CIFAR10(root='~/Documents/datasets/',  download=True, train=False, transform=tr)
val_dl = DataLoader(val_ds, num_workers=14, batch_size=128, shuffle=False)

data = ImageDataBunch(train_dl, val_dl)

Files already downloaded and verified
Files already downloaded and verified


After, we need a model to visualise

## Model
Let's use a simple cnn

In [6]:
from PytorchModulePCA.utils import device 
from torchvision.models import resnet18
# a random model
model = simple_cnn((3, 32, 64, 128))
model = nn.Sequential(model, torch.nn.Linear(128, 10)).to(device())
learn = Learner(data, model, path='./', loss_func=CrossEntropyFlat())
learn.metrics=[accuracy]

In [7]:
last_conv_layer = model[0][2]

## Not trained

This is how PCA in the last conv layer looks like on a untrained model.

In [8]:
# module_pca = PytorchModulePCA(model.eval(), last_conv_layer.eval(), val_dl)
# module_pca(k=2)
# module_pca.plot()
# module_pca = module_pca.reduce(to=100)
# module_pca.plot()
# plt.savefig("./images/7.png") 
# module_pca.annotate(zoom=1)
# plt.savefig("./images/8.png") 

### Train
A quick random train

In [9]:
if TRAIN:
    learn.fit(20, lr=1e-03)
    learn.fit(15, lr=1e-05)
    learn.save('learn', return_path=True)

epoch,train_loss,valid_loss,accuracy,time
0,1.604205,1.579111,0.4264,00:06
1,1.424394,1.4376,0.4789,00:05
2,1.354578,1.335559,0.5182,00:05
3,1.2781,1.264114,0.5455,00:07
4,1.195605,1.213614,0.5667,00:05
5,1.151789,1.157536,0.5916,00:06
6,1.105292,1.124535,0.605,00:06
7,1.073449,1.124529,0.6047,00:06
8,1.038872,1.105633,0.6104,00:08
9,1.016089,1.077197,0.6171,00:06


epoch,train_loss,valid_loss,accuracy,time
0,0.665262,0.940309,0.6768,00:05
1,0.674927,0.937217,0.6787,00:07
2,0.654308,0.93564,0.6797,00:07
3,0.662728,0.935862,0.6802,00:05
4,0.643745,0.935391,0.6806,00:07
5,0.651306,0.934825,0.6809,00:06
6,0.659996,0.935683,0.6805,00:06
7,0.647889,0.935457,0.6799,00:05
8,0.660807,0.935004,0.6799,00:06
9,0.653945,0.93534,0.6809,00:06


In [10]:
learn.load('./learn')

learn.validate(metrics=[accuracy])

[0.9357617, tensor(0.6810)]

## Compute PCA on the last conv layer
`PytorchModulePCA` will run PCA on each batch and it stores only the points, the labels and the indeces of the dataset in RAM


In [11]:
learn.model.eval()
last_conv_layer = learn.model.layer4

module_pca = PytorchModulePCA(learn.model, last_conv_layer, learn.data.valid_dl)
module_pca(k=2)
module_pca.plot()
plt.savefig("./images/0.png") 
module_pca.annotate(zoom=1)
plt.savefig("./images/1.png") 

AttributeError: 'Sequential' object has no attribute 'layer4'

Yeah, it is a mess! We have too many points

### Reduce
We can reduce the number of points by calling `.reduce`. By default it uses **kmeans** to properly select the new points.

In [None]:
reduced_module_pca = module_pca.reduce(to=100)
reduced_module_pca.plot()
plt.savefig("./images/2.png") 
reduced_module_pca.annotate(zoom=1)
plt.savefig("./images/3.png") 

## 3D

In [None]:
module_pca3d = PytorchModulePCA(learn.model, last_conv_layer, learn.data.valid_dl)
module_pca3d(k=3)
module_pca3d.plot()
plt.savefig("./images/4.png") 

### Reduce

In [None]:
reduced_module_pca3d = module_pca3d.reduce(to=100)
reduced_module_pca3d.plot()
plt.savefig("./images/5.png") 
reduced_module_pca3d.annotate()
plt.savefig("./images/6.png") 

## Not trained model
Let's see how a not trained model looks like