# Latent space visualization

In [None]:
import logging
from pathlib import Path
from pprint import pprint
from src.nb_imports import *


from fastai.losses import MSELossFlat
from fastai.learner import Learner


import fastai
# from fastai.tabular.all import *

from fastai.basics import *
from fastai.callback.all import *
from fastai.torch_basics import *
from fastai.data.all import *

# import fastai.callback.hook # Learner.summary

import sklearn
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

import vaep.io_images
from vaep.models import ae
from vaep.transform import VaepPipeline
from vaep.io import datasplits
from vaep.io.dataloaders import get_dls

import src
from src import config
from src.logging import setup_logger
logger = setup_logger(logger=logging.getLogger('vaep'))
logger.info("Experiment 03 - Analysis of latent spaces and performance comparisions")

figures = {}  # collection of ax or figures

Papermill script parameters

In [None]:
n_peptides = 50
data = 'data/msinstrument_in_QE4'
epochs_max = 30

Some argument transformations

In [None]:
args = config.Config()
args.data = Path(data)
args.epochs_max = epochs_max

## Load data

In [None]:
data = datasplits.DataSplits.from_folder(args.data)

data is loaded in long format

In [None]:
data.train_X.sample(5)

## Initialize Comparison

- replicates idea for truely missing values: Define truth as by using n=3 replicates to impute
  each sample
- real test data: Not used for predictions or early stopping.

In [None]:
test_predictions_real_na = data.interpolate('test_X').to_frame() # "gold standard"
test_predictions_real_na

In [None]:
test_predictions_observed = data.test_X.to_frame('truth')
test_predictions_observed

## Collaborative Filtering

## Data in Wide format

- Autoencoder need data in wide format

In [None]:
data.to_wide_format()
data.val_X.head()

## Denoising Autoencoder

### DataLoaders

In [None]:
dae_default_pipeline = sklearn.pipeline.Pipeline(
    [
        ('normalize', StandardScaler()),
        ('impute', SimpleImputer(add_indicator=False))
    ])

dae_transforms = VaepPipeline(
    df_train=data.train_X, encode=dae_default_pipeline, decode=['normalize'])

dls = get_dls(data.train_X, data.val_X, transformer=dae_transforms)

### Model

In [None]:
M = data.train_X.shape[-1]
latent_dim = 30

model = ae.Autoencoder(n_features=M, n_neurons=int(
    M/2), last_decoder_activation=None, dim_latent=latent_dim)

### Learner

In [None]:
learn = Learner(dls=dls, model=model,
                loss_func=MSELossFlat(), cbs=ae.ModelAdapter())

In [None]:
learn.show_training_loop()

In [None]:
learn.summary()

In [None]:
suggested_lr = learn.lr_find()
suggested_lr

### Training


In [None]:
learn.fit_one_cycle(args.epochs_max, lr_max=suggested_lr.valley)

### Plots

## Config

In [None]:
args