In [1]:
import os
import time

import matplotlib.pyplot as plt
import numpy as np
import scipy
import torch
from torch.utils.data import DataLoader, ConcatDataset

os.chdir('..')
from data_processing.datasets import PigletDataset
from neuralnet.model import SpectraMLP
from utils import beerlamb_multi, beerlamb_multi_batch
import data_processing.preprocessing as preprocessing
from config import left_cut, right_cut

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
path_to_data = "dataset/piglet_diffs/"
spectra_dataset = PigletDataset(path_to_data)
n = 15
spectra_datasets = ConcatDataset(n * [spectra_dataset])
dl = DataLoader(spectra_datasets, batch_size=n*7830, shuffle=False)
inputs, targets = next(iter(dl))

In [3]:
img = scipy.io.loadmat('dataset/LWP483_10Jan2017_SharedHyperProbe.mat')
wavelengths = img['wavelengths'].astype(float)
idx = (wavelengths >= left_cut) & (wavelengths <= right_cut)
wavelengths = wavelengths[idx]

molecules, x = preprocessing.read_molecules(left_cut, right_cut, wavelengths)
molecules = molecules[:4]

In [4]:
def test(targets):
    start = time.time()
    preds = model(inputs.to(device).float()).cpu()
    duration = time.time() - start
    
    preds = torch.swapaxes(preds, 0, 1).detach()
    targets = torch.swapaxes(targets, 0, 1).detach()
        
    err = 0
    spec1 = np.asarray(np.log(beerlamb_multi_batch(molecules, x, preds, left_cut)))
    spec2 = np.asarray(np.log(beerlamb_multi_batch(molecules, x, targets, left_cut)))
    err += np.mean(np.abs(spec1 - spec2))
    return duration, err

In [7]:
import pandas as pd

results_list = []
folders = os.listdir("results")

for folder in folders:
    layer, size = folder.split("_")

    model = SpectraMLP(4, n_layers=int(layer), layer_width=int(size))
    checkpoints = torch.load('results/{}/best_model.pth'.format(folder))
    model.load_state_dict(checkpoints)
    model.to(device)
    model.eval()

    duration, error = test(targets)

    results_dict = {
        'Folder': folder,
        'N Params': sum(p.numel() for p in model.parameters()),
        'Duration': round(duration, 4),
        'Error': round(error, 4)}
    results_list.append(results_dict)

df = pd.DataFrame(results_list)
df.to_csv('result/results_summary.csv', index=False)



In [10]:
df.sort_values(by='Error')

Unnamed: 0,Folder,N Params,Duration,Error
4,1_256,56068,0.0424,0.1273
7,3_256,187652,0.1175,0.1479
6,4_128,77572,0.0736,0.1491
12,2_1024,1273860,0.4069,0.1591
15,4_1024,3373060,1.7364,0.1771
1,4_256,253444,0.2009,0.1823
0,1_128,28036,0.0747,0.1881
2,3_128,61060,0.0597,0.1893
5,2_256,121860,0.0729,0.2039
3,2_128,44548,0.0388,0.2111
