In [1]:
import numpy as np
import pandas as pd

import joblib
import os
import random
import shutil
import sys

from collections import defaultdict
from scipy.special import log_ndtr, logsumexp
from scipy.interpolate import Akima1DInterpolator

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchmetrics

from tensorboard.backend.event_processing import event_accumulator

import lightning as L
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping

In [2]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
from utilities.data_downloader import train_val_test_downloader
from utilities.upsampling import upsampling
from utilities.extracted_features_explorer import explorer
from utilities.plots import plt, COLORMAP, visualize_latent

In [4]:
from warnings import simplefilter, filterwarnings
simplefilter("ignore", category=RuntimeWarning)
filterwarnings("ignore", ".*does not have many workers.*")
filterwarnings("ignore", ".*exists and is not empty.*")
filterwarnings("ignore", ".*sampler has shuffling enabled.*")

In [5]:
def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

set_random_seed(42)
L.seed_everything(42)

Seed set to 42


42

# Description

Here we present **AutoEncoder** Model based on 1D Convolutional layers. A number of possible architectures are trained and logged. The experiments with various architectures are hard-coded.

## Fitting
The loss function is:

$${\rm Loss} = \Bigg\langle \left[\frac{\log_{10}({\rm rate}/{\rm s}^{-1})_{\rm reco} - \log_{10}({\rm rate}/{\rm s}^{-1})_{\rm true}}{\log_{10}({\rm rate}/{\rm s}^{-1})~{\rm err.}}\right]^2\Bigg\rangle_{\rm non-empty~bins}\quad +\quad\delta \cdot\Bigg\langle\Big|{ \Delta}_t \log_{10}({\rm rate}/{\rm s}^{-1})_{\rm reco}\Big|\Bigg\rangle_{\rm all~bins}$$

The first term corresponds to weighted MSE, and the second one applies L1-regularization to a predicted timeseries. 

## Scoring procedure
We infer the anomalous score from the weighted reconstruction error distribution.

We fit the cumulative distribution function of the weighted reconstruction error decimal logarithm $\lg[{\rm wre}]$ using our custom Kernel Density Estimation procedure (which should be rather called Kernel Cumulative Distribution Estimation):

$${\rm CDF}(\lg[{\rm wre}]) = \Bigg\langle \int\limits_{-\infty}^{\lg[{\rm wre}]} \frac{d\xi}{b\sqrt{2\pi}} \exp\Big[-\frac{(\xi - \lg[{\rm wre}_i])^2}{2b^2}\Big]\Bigg\rangle_i = \Bigg\langle \int\limits_{-\infty}^{\frac{\lg[{\rm wre}] - \lg[{\rm wre}_i]}{b}} \frac{d\zeta}{\sqrt{2\pi}} \exp\Big[-\frac{1}{2}\zeta^2\Big]\Bigg\rangle_i$$

Bandwidth $b$ is defined by Silverman's rule of thumb, see [[reference]](https://archive.org/details/densityestimatio00silv_0/page/44/mode/2up).

We then define $p$-value as $1-{\rm CDF}$ and score samples accordingly.

# Dataset
We use linearly interpolated rebinned dataset. In order to enlarge testing set, we upsample it applying gaussian noise and random shift augmentation.

In [6]:
train, val, test, labels = train_val_test_downloader('interp')

Datasets downloaded
 - train  : 810 entries
 - val    : 174 entries
 - test   : 174 entries
 - labels : 1158 entries


In [7]:
train_upsampled = upsampling(train)
print(f'Upsampled training fragment length: {len(train_upsampled)}')

Upsampled training fragment length: 81000


In [8]:
class LightCurveDataset(Dataset):
    def __init__(self, dataframe:pd.DataFrame,
                 data_col:str='lgRate',
                 weight_col:str='weight'):
        
        data = np.array(dataframe.loc[:, data_col].tolist(),
                        dtype=np.float32)
        weight = np.array(dataframe.loc[:, weight_col].tolist(),
                          dtype=np.float32)
        
        self.data = torch.from_numpy(
            data).unsqueeze(dim=1)   # value
        self.weight = torch.from_numpy(
            weight).unsqueeze(dim=1) # weight

        # using dataframe index = event names 
        # as labels
        labels = dataframe.index
        self.label_enc = LabelEncoder()
        self.labels = torch.as_tensor(self.label_enc.fit_transform(labels))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx], self.weight[idx]

In [9]:
train_dataset, val_dataset, test_dataset = (
    LightCurveDataset(train_upsampled),
    LightCurveDataset(val),
    LightCurveDataset(test)
)

In [10]:
train_loader = DataLoader(train_dataset,
                          batch_size=256,
                          shuffle=True,
                          num_workers=0)
val_loader = DataLoader(val_dataset,
                        batch_size=256,
                        shuffle=False,
                        num_workers=0)
test_loader = DataLoader(test_dataset,
                         batch_size=256,
                         shuffle=False,
                         num_workers=0)

# for predictions on non-augmented train:
train_loader_ = DataLoader(LightCurveDataset(train),
                           batch_size=256,
                           shuffle=False,
                           num_workers=0)

# Model
**AutoEncoder** Model based on the following block structure: 1D Convolutional layer + BatchNormalization + LeakyReLU activation.
The number of channels growth trend is 2^(depth of the layer), the starting number of channels and the depth are architetural parameters.

## Torch Models

In [11]:
class Encoder(nn.Module):
    def __init__(self, latent_dim:int,
                 architecture:tuple=(32, 4),
                 tseries_length:int=64):
        super().__init__()

        self.hidden_dims = [
            architecture[0]* 2**pow for pow in range(architecture[1])
            ]                                       # num of filters in layers
        self.tseries_length = tseries_length

        modules = []
        in_channels = 1                             # initial num of channels
        for h_dim in self.hidden_dims:              # conv layers
            modules.append(
                nn.Sequential(
                    nn.Conv1d(
                        in_channels=in_channels,    # num of input channels
                        out_channels=h_dim,         # num of output channels
                        kernel_size=3,
                        stride=2,                   # convolution kernel step
                        padding=1,                  # save shape
                    ),
                    nn.BatchNorm1d(h_dim),
                    nn.LeakyReLU(),
                )
            )
            in_channels = h_dim                     # changing num of 
                                                    # input channels for 
                                                    # next iteration

        modules.append(nn.Flatten())                # to vector
        intermediate_dim = (
            self.hidden_dims[-1] * 
            self.tseries_length // (2**len(self.hidden_dims))
        )
        modules.append(nn.Linear(in_features=intermediate_dim,
                                 out_features=latent_dim))

        self.encoder = nn.Sequential(*modules)

    def forward(self, x):
        x = self.encoder(x)
        return x


class Decoder(nn.Module):
    def __init__(self, latent_dim:int,
                 architecture:tuple=(32, 4),
                 tseries_length:int=64):
        super().__init__()
        self.hidden_dims = [
            architecture[0]* 2**pow for pow in range(architecture[1]-1, 0, -1)
            ]                                       # num of filters in layers
        self.tseries_length = tseries_length

        intermediate_dim = (
            self.hidden_dims[0] * 
            self.tseries_length // (2**len(self.hidden_dims))
        )
        self.linear = nn.Linear(in_features=latent_dim,
                                out_features=intermediate_dim)

        modules = []
        for i in range(len(self.hidden_dims) - 1):  # define upsample layers
            modules.append(
                nn.Sequential(
                    nn.Upsample(scale_factor=2),
                    nn.Conv1d(
                        in_channels=self.hidden_dims[i],
                        out_channels=self.hidden_dims[i + 1],
                        kernel_size=3,
                        padding=1,
                    ),
                    nn.BatchNorm1d(self.hidden_dims[i + 1]),
                    nn.LeakyReLU(),
                )
            )

        modules.append(
            nn.Sequential(
                nn.Upsample(scale_factor=2),
                nn.Conv1d(in_channels=self.hidden_dims[-1],
                          out_channels=1,
                          kernel_size=3, padding=1)
            )
        )

        self.decoder = nn.Sequential(*modules)

    def forward(self, x):
        x = self.linear(x)        # from latents space to Linear
        x = x.view(
            -1, self.hidden_dims[0],
            self.tseries_length // (2**len(self.hidden_dims))
            )                     # reshape
        x = self.decoder(x)       # reconstruction
        return x

# For Variational AE, not used here
# =================================
class VAEncoder(Encoder):
    def __init__(self, latent_dim):
        if latent_dim % 2 != 0:   # check for the parity of the latent space
            raise Exception('Latent size for VAEncoder must be even')

        super().__init__(latent_dim)
# =================================

## Lightning wrapper

In [12]:
class LitAE(L.LightningModule):
    def __init__(self, encoder, decoder, derivative_weight=1.0):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.derivative_weight = derivative_weight

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4)

    def forward_handler(self, data,
                        *args, **kwargs):
        # here is the logic how data is moved through AE
        latent = self.encoder(data)
        recon = self.decoder(latent)
        return latent, recon

    def loss_handler(self, recon, data, weight, latent,
                     *args, **kwargs):
        # here is the loss function computing
        recon_loss = torch.masked_select(
            input = F.mse_loss(
                recon, data, reduction='none'
            ) * weight,
            mask = weight.ge(0.0)
        )
        recon_loss = recon_loss.mean()

        # derivative penalty = 
        # L1-regularization of the output timeseries
        derivative_loss = torch.abs(
            torch.diff(recon, dim=-1)
        ).mean()

        # total loss
        loss = recon_loss + self.derivative_weight * derivative_loss

        return loss

    def training_step(self, batch, batch_idx):
        data, labels, weight = batch

        latent, recon = self.forward_handler(data, labels)
        loss = self.loss_handler(recon, data, weight, latent)

        self.log('train_loss', loss, on_step=False, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        data, labels, weight = batch

        latent, recon = self.forward_handler(data, labels)
        loss = self.loss_handler(recon, data, weight, latent)

        self.log('val_loss', loss, on_step=False, on_epoch=True)
        return loss

    def on_test_epoch_start(self):
        # create dict with empty tensors for further accumulating over batches
        self.test_result = defaultdict(torch.Tensor)

    def test_step(self, batch, batch_idx):
        data, labels, weight = batch

        latent, recon = self.forward_handler(data, labels)
        self.update_test_result(data, weight, recon, latent, labels)

    def update_test_result(self, data, weight, recon, latent, labels):
        # accumulating results every batch
        self.test_result['real'] = torch.cat(
            [self.test_result['real'], data.cpu()]
        )
        self.test_result['weight'] = torch.cat(
            [self.test_result['weight'], weight.cpu()]
        )
        self.test_result['recon'] = torch.cat(
            [self.test_result['recon'], recon.cpu()]
        )
        self.test_result['latent'] = torch.cat(
            [self.test_result['latent'], latent.cpu()]
        )
        self.test_result['labels'] = torch.cat(
            [self.test_result['labels'], labels.cpu()]
        )

    def on_test_epoch_end(self):
        # simply change type from torch tensor to numpy array
        # for every item in test_result dictionary
        for key in self.test_result:
            self.test_result[key] = self.test_result[key].numpy()

In [13]:
# For Variational AE, not used here
# =================================
class LitVAE(LitAE):
    def __init__(self, encoder, decoder,
                 derivative_weight=1.0,
                 kld_weight=0.005,
                 ):
        super().__init__(encoder, decoder, derivative_weight)
        self.kld_weight = kld_weight

    def vae_split(self, latent):
        size = (
            latent.shape[1] // 2
        )  # divide the latent representation into mu and log_var
        mu = latent[:, :size]
        log_var = latent[:, size:]
        return mu, log_var

    def vae_reparametrize(self, mu, log_var):
        sigma = torch.exp(0.5 * log_var)
        eps = torch.randn(mu.shape[0], mu.shape[1]).to(self.device)
        return eps * sigma + mu

    def kld_loss(self, mu, log_var):
        var = log_var.exp()
        kl_loss = torch.mean(
            -0.5 * torch.sum(log_var - var - mu**2 + 1, dim=1), dim=0
        )
        return kl_loss

    def forward_handler(self, data, *args, **kwargs):
        # here is the logic how data is moved through AE
        latent = self.encoder(data)

        mu, log_var = self.vae_split(latent)
        sample = self.vae_reparametrize(mu, log_var)

        recon = self.decoder(sample)
        return latent, recon

    def loss_handler(self, recon, data, weight, latent, *args, **kwargs):
        mu, log_var = self.vae_split(latent)
        # here is the loss function computing
        loss = torch.masked_select(
            input = F.mse_loss(recon, data, reduction='none') * weight,
            mask = weight.ge(0.0)).mean() + self.derivative_weight * torch.abs(
            torch.diff(recon, dim=-1)
            ).mean() + self.kld_weight * self.kld_loss(mu, log_var)
        return loss
# =================================

# Utilities

#### Training loop utilities

In [14]:
def create_models(latent_dim:int=3, architecture:tuple=(32, 4)):
    """
    Creates autoencoder model instance
    """
    encoder, decoder = (
        Encoder(latent_dim=latent_dim, architecture=architecture),
        Decoder(latent_dim=latent_dim, architecture=architecture)
    )
    autoencoder = LitAE(encoder, decoder)
    exp_name = f'AE_dim={latent_dim}_archi=' + '%d_%d' % architecture
    
    os.mkdir('./lightning_logs/'+exp_name)
    return encoder, decoder, autoencoder, exp_name

In [15]:
def save_latest_model(exp_name:str, target_dir:str):
    """
    Saves the latest verison of the trained model
    together with data collected during training loop
    
    Please note that target_dir, if exists,
    will be completely overwritten!
    """
    logdir = f'./lightning_logs/{exp_name}'
    available_versions = sorted([
        '/'+ver for ver in
        os.listdir(logdir)
        if ver.startswith('version')
    ])
    latest = logdir + available_versions[-1]
    
    if os.path.exists(target_dir):
        shutil.rmtree(target_dir)
    os.mkdir(target_dir)
        
    shutil.copytree(latest, target_dir, dirs_exist_ok=True)
    shutil.copyfile(logdir+'/best.ckpt', target_dir+'/best.ckpt')
    print(f'model logged at {target_dir}')

#### Learning curve visualization

In [16]:
def extract(event_acc_instance:event_accumulator.EventAccumulator, tag:str='train_loss'):
    """
    Extracts info from EventAccumulator instance
    """
    steps = []
    values = []
    for event in event_acc_instance.Scalars(tag):
        steps.append(event.step)
        values.append(event.value)
    return steps, values

def plot_learning_curve(target_dir:str, show:bool=True,
                        cut:int=2, freq:int=10):
    """
    Plots learning curve for train- and val- loss
    """
    
    event_files = [file for file in os.listdir(target_dir) if 'events.out.tfevents' in file]
    num_files = len(event_files)
    print(f'Found {num_files} file(s)')
    if num_files > 0:
        event_file = event_files[-1]
        print(f'Processing {event_file}')
        event_file_path = os.path.join(target_dir, event_file)
        event_acc = event_accumulator.EventAccumulator(event_file_path)
        event_acc.Reload()
    else:
        return False
    
    steps_train, values_train = extract(event_acc, 'train_loss')
    steps_valid, values_valid = extract(event_acc, 'val_loss')
    steps_epoch, values_epoch = extract(event_acc, 'epoch')
    
    plt.ylabel('loss')
    plt.xlabel('epoch')
    
    ticks_epoch, labels_epoch = (
        [float(step) for step in steps_epoch[::cut]][::freq], 
        [str(int(el)) for el in values_epoch[::cut]][::freq]
    )
    plt.xticks(ticks_epoch, labels=labels_epoch)
    
    min_loss = np.floor(np.min(values_train + values_valid))
    plt.ylim(1, 3)
    
    plt.grid(True)
    
    plt.errorbar(steps_train, values_train, color='gray', label='train', marker='s', markersize=4)
    plt.errorbar(steps_valid, values_valid, color='xkcd:blue', label='val', marker='s', markersize=4)
    plt.axvline(steps_valid[::-1][np.argmin(values_valid[::-1]).item()],
                0, (np.min(values_valid)-1)/2,
                color='xkcd:blue', linestyle='dashed')
    plt.text(steps_valid[::-1][np.argmin(values_valid[::-1]).item()]*1.05, min_loss + 0.1,
             'best', color='black')
    
    plt.legend(loc='upper right')
    
    if not os.path.isdir(f'{target_dir}/Figures'):
        os.mkdir(f'{target_dir}/Figures')
        
    plt.savefig(f'{target_dir}/Figures/learning_curve.pdf',
               format='pdf', bbox_inches='tight')
    if show:
        plt.gcf().set_dpi(300)
        plt.show()
    else:
        plt.close()

#### Predictions

In [17]:
# For Variational AE, not used here
# =================================
def reparametrize_latent(vae, latent):
    mu, log_var = vae.vae_split(latent)
    var = np.exp(log_var)

    mu, log_var = torch.tensor(mu), torch.tensor(log_var)
    sample = vae.vae_reparametrize(mu, log_var).numpy()
    return sample
# =================================

In [18]:
def get_dict_result(trainer, model, dataloader, ckpt_path):
    
    with torch.no_grad():
        trainer.test(model, dataloader, ckpt_path=ckpt_path)
    model.test_result[
        'labels'
    ]=dataloader.dataset.label_enc.inverse_transform(
    model.test_result[
        'labels'
        ].astype(int)
    )

    real = model.test_result['real'].squeeze()
    recon = model.test_result['recon'].squeeze()
    weight = model.test_result['weight'].squeeze()


    weightedMSE = (real-recon)**2 * weight
    pred_errors = (weightedMSE ** 0.5).tolist()

    weightedMSE = np.ma.masked_array(data=weightedMSE,
                                     mask=~(weight.astype(bool))
    )
    weightedMSE = weightedMSE.mean(axis=1, keepdims=True)

    latent = model.test_result['latent'].copy()
    
    # For Variational AE, not used here
    # =================================
    if hasattr(model, 'vae_reparametrize') and callable(model.vae_reparametrize):
        # for VAE, we must reparametrize latent first
        latent = reparametrize_latent(model, latent)
    # =================================
    
    latentdim = latent.shape[-1]

    latent = pd.DataFrame(
        data=np.concatenate((latent, weightedMSE), axis=1),
        index=model.test_result['labels'],
        columns=['feature_'+str(dim) for dim in range(latentdim)]+['wMSE'])

    latent.insert(loc=latentdim+1, column='pred_error', value=pred_errors)

    return latent, real, recon, weight

In [19]:
def get_predictions(trainer:L.Trainer, AE:LitAE):
    """
    Creates predictions of a trained AutoEncoder
    
    Returns
    -------
    scoring : Akima1DInterpolator (callable)
        A function that takes decimal logarithm of the 
        weighted reconstruction MSE and returns ln of p-value
        
    df : pd.DataFrame
        The resulting scored dataframe
    """
    
    # predictions for the augmented training dataset
    train_upsampled_latent, real, recon, weight = get_dict_result(
        trainer, AE, train_loader, 'best')
    
    log10wMSE = np.log10(train_upsampled_latent['wMSE'].values).ravel()
    # define scoring function using log10[weightedMSE_train] 
    # and custom KDE (since sklearn KernelDensity calculates 
    # log-likelihood but not p-value)
    
    n_samples = log10wMSE.size
    SilvermanRuleBandwidth = (4/3/n_samples)**(1/5) * np.std(log10wMSE)

    logcdf = lambda x: logsumexp(
        log_ndtr(
            (x-log10wMSE)/SilvermanRuleBandwidth
        ), axis=0, b=1/n_samples
    ).item()
    
    logpvalue = lambda x: logsumexp(
        [0.0, logcdf(x)], b=[1.0, -1.0]
    )
    scoring = np.vectorize(logpvalue)
    
    log10_wMSE_grid = np.linspace(-3, 3, 10000)
    log_pvalue_grid = scoring(log10_wMSE_grid)
    
    scoring = Akima1DInterpolator(log10_wMSE_grid, log_pvalue_grid)
    scoring.extrapolate=True
    
    # predictions for the non-augmented dataset
    train_latent, real, recon, weight = get_dict_result(
        trainer, AE, train_loader_, 'best')
    val_latent, real, recon, weight = get_dict_result(
        trainer, AE, val_loader, 'best')
    test_latent, real, recon, weight = get_dict_result(
        trainer, AE, test_loader, 'best')
    
    df = pd.concat(
        (train_latent, val_latent, test_latent),
        axis=0, ignore_index=False
    )
    df['sample'] = (
        ['train'] * len(train_latent) +
        ['val'] * len(val_latent) +
        ['test'] * len(test_latent)
    )
    df['score'] = df['wMSE'].apply(lambda x: np.clip(scoring(np.log10(x)), None, 0.0))
    df['p-value'] = df['score'].apply(np.exp)
    
    return scoring, df

# Training Models & Get Predictions

In [20]:
!rm -rf lightning_logs
!rm -rf Architectures
!mkdir lightning_logs
!mkdir Architectures

## *«Standard»* Architecture (32, 4)

In [21]:
encoder, decoder, autoencoder, exp_name = create_models(
    latent_dim=3, architecture=(32, 4))

In [22]:
early_stopping_callback = EarlyStopping(
    monitor='val_loss', mode='min', min_delta=0, patience=20)

checkpoint_callback = ModelCheckpoint(
    dirpath=f'./lightning_logs/{exp_name}',
    filename='best', monitor='val_loss', mode='min')

logger = TensorBoardLogger(save_dir='./lightning_logs', name=exp_name)

trainer = L.Trainer(max_epochs=500, logger=logger,
                    callbacks=[checkpoint_callback, early_stopping_callback],
                    enable_progress_bar=False
                   )

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


#### Training loop

In [23]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/{exp_name} --port 6011

In [None]:
trainer.fit(autoencoder, train_loader, val_loader)


  | Name    | Type    | Params | Mode 
--------------------------------------------
0 | encoder | Encoder | 133 K  | train
1 | decoder | Decoder | 131 K  | train
--------------------------------------------
265 K     Trainable params
0         Non-trainable params
265 K     Total params
1.062     Total estimated model params size (MB)
36        Modules in train mode
0         Modules in eval mode


In [None]:
save_latest_model(exp_name, f'./Architectures/{exp_name}')

In [None]:
plot_learning_curve(f'./Architectures/{exp_name}')

#### Get Predictions

In [None]:
scoring, df = get_predictions(trainer, autoencoder)
df.to_csv(f'./Architectures/{exp_name}/predictions.csv')
joblib.dump(scoring, f'./Architectures/{exp_name}/scoring.joblib')

In [None]:
df = pd.concat((df, labels), axis=1, ignore_index=False)
visualize_latent(df,
                 title='AutoEncoder Latent Space',
                 savedir=f'./Architectures/{exp_name}/Figures',
                 show=True)

In [None]:
print('ROC-AUC Score:'+
      '\n    train : '+
      str(round(roc_auc_score(df.loc[train.index, 'FlaresFlag'].values,
                              -df.loc[train.index, 'score'].values), 2))+
      '\n    val   : '+
      str(round(roc_auc_score(df.loc[val.index, 'FlaresFlag'].values,
                              -df.loc[val.index, 'score'].values), 2))+
      '\n    test  : '+
      str(round(roc_auc_score(df.loc[test.index, 'FlaresFlag'].values,
                              -df.loc[test.index, 'score'].values), 2))
     )

In [None]:
scaler, LinReg = explorer(df)

In [None]:
df[df['p-value']<=0.01].sort_values(by='score', axis=0)

## *«Purblind»* Architecture (16, 4)

Reduced number of channels

In [None]:
encoder, decoder, autoencoder, exp_name = create_models(
    latent_dim=3, architecture=(16, 4))

#### Training Loop

In [None]:
%reload_ext tensorboard
%tensorboard --logdir lightning_logs/{exp_name} --port 6013

In [None]:
early_stopping_callback = EarlyStopping(
    monitor='val_loss', mode='min', min_delta=0, patience=20)

checkpoint_callback = ModelCheckpoint(
    dirpath=f'./lightning_logs/{exp_name}',
    filename='best', monitor='val_loss', mode='min')

logger = TensorBoardLogger(save_dir='./lightning_logs', name=exp_name)

trainer = L.Trainer(max_epochs=500, logger=logger,
                    callbacks=[checkpoint_callback, early_stopping_callback],
                    enable_progress_bar=False
                   )

In [None]:
trainer.fit(autoencoder, train_loader, val_loader)

In [None]:
save_latest_model(exp_name, f'./Architectures/{exp_name}')

In [None]:
plot_learning_curve(f'./Architectures/{exp_name}')

#### Get Predictions

In [None]:
scoring, df = get_predictions(trainer, autoencoder)
df.to_csv(f'./Architectures/{exp_name}/predictions.csv')
joblib.dump(scoring, f'./Architectures/{exp_name}/scoring.joblib')

In [None]:
df = pd.concat((df, labels), axis=1, ignore_index=False)
visualize_latent(df,
                 title='AutoEncoder Latent Space',
                 savedir=f'./Architectures/{exp_name}/Figures',
                 show=True)

In [None]:
print('ROC-AUC Score:'+
      '\n    train : '+
      str(round(roc_auc_score(df.loc[train.index, 'FlaresFlag'].values,
                              -df.loc[train.index, 'score'].values), 2))+
      '\n    val   : '+
      str(round(roc_auc_score(df.loc[val.index, 'FlaresFlag'].values,
                              -df.loc[val.index, 'score'].values), 2))+
      '\n    test  : '+
      str(round(roc_auc_score(df.loc[test.index, 'FlaresFlag'].values,
                              -df.loc[test.index, 'score'].values), 2))
     )

In [None]:
scaler, LinReg = explorer(df)

In [None]:
df[df['p-value']<=0.01].sort_values(by='score', axis=0)

## *«Shallow»* Architecture (32, 2)

Reduced depth

In [None]:
encoder, decoder, autoencoder, exp_name = create_models(
    latent_dim=3, architecture=(32, 2))

#### Training Loop

In [None]:
%reload_ext tensorboard
%tensorboard --logdir lightning_logs/{exp_name} --port 6015

In [None]:
early_stopping_callback = EarlyStopping(
    monitor='val_loss', mode='min', min_delta=0, patience=20)

checkpoint_callback = ModelCheckpoint(
    dirpath=f'./lightning_logs/{exp_name}',
    filename='best', monitor='val_loss', mode='min')

logger = TensorBoardLogger(save_dir='./lightning_logs', name=exp_name)

trainer = L.Trainer(max_epochs=500, logger=logger,
                    callbacks=[checkpoint_callback, early_stopping_callback],
                    enable_progress_bar=False
                   )

In [None]:
trainer.fit(autoencoder, train_loader, val_loader)

In [None]:
save_latest_model(exp_name, f'./Architectures/{exp_name}')

In [None]:
plot_learning_curve(f'./Architectures/{exp_name}')

#### Get Predictions

In [None]:
scoring, df = get_predictions(trainer, autoencoder)
df.to_csv(f'./Architectures/{exp_name}/predictions.csv')
joblib.dump(scoring, f'./Architectures/{exp_name}/scoring.joblib')

In [None]:
df = pd.concat((df, labels), axis=1, ignore_index=False)
visualize_latent(df,
                 title='AutoEncoder Latent Space',
                 savedir=f'./Architectures/{exp_name}/Figures',
                 show=True)

In [None]:
print('ROC-AUC Score:'+
      '\n    train : '+
      str(round(roc_auc_score(df.loc[train.index, 'FlaresFlag'].values,
                              -df.loc[train.index, 'score'].values), 2))+
      '\n    val   : '+
      str(round(roc_auc_score(df.loc[val.index, 'FlaresFlag'].values,
                              -df.loc[val.index, 'score'].values), 2))+
      '\n    test  : '+
      str(round(roc_auc_score(df.loc[test.index, 'FlaresFlag'].values,
                              -df.loc[test.index, 'score'].values), 2))
     )

In [None]:
scaler, LinReg = explorer(df)

In [None]:
df[df['p-value']<=0.01].sort_values(by='score', axis=0)

## *«Tiny»* Architecture (16, 2)

Reduced both number of channels & depth

In [None]:
encoder, decoder, autoencoder, exp_name = create_models(
    latent_dim=3, architecture=(16, 2))

#### Training loop

In [None]:
%reload_ext tensorboard
%tensorboard --logdir lightning_logs/{exp_name} --port 6017

In [None]:
early_stopping_callback = EarlyStopping(
    monitor='val_loss', mode='min', min_delta=0, patience=20)

checkpoint_callback = ModelCheckpoint(
    dirpath=f'./lightning_logs/{exp_name}',
    filename='best', monitor='val_loss', mode='min')

logger = TensorBoardLogger(save_dir='./lightning_logs', name=exp_name)

trainer = L.Trainer(max_epochs=500, logger=logger,
                    callbacks=[checkpoint_callback, early_stopping_callback],
                    enable_progress_bar=False
                   )

In [None]:
trainer.fit(autoencoder, train_loader, val_loader)

In [None]:
save_latest_model(exp_name, f'./Architectures/{exp_name}')

In [None]:
plot_learning_curve(f'./Architectures/{exp_name}')

#### Get Predictions

In [None]:
scoring, df = get_predictions(trainer, autoencoder)
df.to_csv(f'./Architectures/{exp_name}/predictions.csv')

joblib.dump(scoring, f'./Architectures/{exp_name}/scoring.joblib')

In [None]:
df = pd.concat((df, labels), axis=1, ignore_index=False)
visualize_latent(df,
                 title='AutoEncoder Latent Space',
                 savedir=f'./Architectures/{exp_name}/Figures',
                 show=True)

In [None]:
print('ROC-AUC Score:'+
      '\n    train : '+
      str(round(roc_auc_score(df.loc[train.index, 'FlaresFlag'].values,
                              -df.loc[train.index, 'score'].values), 2))+
      '\n    val   : '+
      str(round(roc_auc_score(df.loc[val.index, 'FlaresFlag'].values,
                              -df.loc[val.index, 'score'].values), 2))+
      '\n    test  : '+
      str(round(roc_auc_score(df.loc[test.index, 'FlaresFlag'].values,
                              -df.loc[test.index, 'score'].values), 2))
     )

In [None]:
scaler, LinReg = explorer(df)

In [None]:
df[df['p-value']<=0.01].sort_values(by='score', axis=0)