In [1]:
import pickle
import datetime
from pathlib import Path

import torch
import torch.nn as nn
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.optim import lr_scheduler
from torch.utils.data import TensorDataset, DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter

from libs_unet.models import peakyfinder_0001
from libs_unet.training.libs_train import train_loop, test_loop, Custom_Wgt_MSE

top_dir = Path.cwd()
datapath = top_dir / 'data'

In [2]:
#reference data load
#create weights array for weighted mse loss function
#with open(datapath / 'training' / 'el80_log_int.pickle', 'rb') as f:
#   el_tot_int = pickle.load(f).astype('float32')


#Leverage PyTorch native Dataset and DataLoader 
with open(datapath / 'training' / 'el80_pairs_lg.pickle', 'rb') as f:
    elem_symb = pickle.load(f)
    el_index = pickle.load(f)
    fracs = pickle.load(f)
    wave = pickle.load(f)
    x_data = pickle.load(f)
    y_data = pickle.load(f)

#create dataset
#input needs a placeholder "channel" dimension since single channel
#learned labels already has max_z + 2 channels from spec_array
#data has to match weights which default to float() so cast data as same
#apply a scaling factor to standardize spectra across sources.
nist_mult = 1.17
x_data = nist_mult * torch.tensor(x_data[:,None,:].astype('float32'))
y_data = nist_mult * torch.tensor(y_data.astype('float32'))
x_data[x_data < 0] = 0
y_data[y_data < 0] = 0
#log transform data, add small offset 1 so zero points remain ~zero on log scale
x_data = torch.log(x_data + 1)
y_data = torch.log(y_data + 1)

spec_ds = TensorDataset(x_data, y_data)
#batch sizes
train_bs = 50
test_bs = 100
#create random split for training and validation
train_len = int(0.8 * len(x_data))
test_len = len(x_data) - train_len
train_ds, test_ds = random_split(spec_ds,[train_len, test_len])
train_dl = DataLoader(train_ds, batch_size=train_bs, shuffle=True) #check if shuffle redundant
test_dl = DataLoader(test_ds, batch_size=test_bs)
#

Apply training optimization from 
 https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html
 https://pytorch.org/tutorials/beginner/nn_tutorial.html
Notes: 
1. Run dataloader cell first, set batch size/file as needed
2. Simulated data is built on first n elements with 760 point spectra (190nm - 949nm)
3. Set this parameter so correct model size is built

In [3]:
#set parameters
max_z = len(elem_symb)
wl_points = len(wave) #number of wavelength point measurements in data
#these are weights used in custom loss function to emphasize weaker signal elements
#loss_wts = torch.tensor(1/np.log(1+el_tot_int))
#avg_wt = torch.mean(loss_wts)
#add on MSE weights for artifact and noise arrays at avg
#loss_wts = torch.cat((loss_wts, torch.tensor([avg_wt, avg_wt])))
#loss_wts = loss_wts / torch.sum(loss_wts)
#fix He
#loss_wts[1] = 0.02
#loss_wts = 100 * loss_wts


learning_rate = 1 #There is a schedule to reduce as loss plateaus
#lr_patience = 10
#lr_cooldown = 2
#lr_thresh = 0.01
epochs = 5

#Initialize and run
model = peakyfinder_0001.LIBSUNet(max_z, wl_points)
loss_fn = nn.MSELoss(reduction='mean')
#loss_fn = Custom_Wgt_MSE(loss_wts)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
lr_scheduler = MultiStepLR(optimizer, [4], 0.5)
#lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=lr_patience,
#                                 cooldown=lr_cooldown, threshold=lr_thresh)

now_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') #  now 
writerpath = top_dir / 'runs' / now_time
writer = SummaryWriter(writerpath)
                            
# save trained model
#torch.save(model, top_dir / 'trained_models/peakyfinder_0001_')

#add model diagram to tensorboard
#writer.add_graph(model,train_ds)

for t in range(epochs):
    print(f"Epoch {t+1}, learning rate {optimizer.param_groups[0]['lr']}\n-------------------------------")
    train_loop(train_dl, model, loss_fn, optimizer, writer, t+1, debug=True, bsize=train_bs)
    test_loss = test_loop(test_dl, model, loss_fn, writer, t+1)
    lr_scheduler.step()
print("Done!")

#cleanup
writer.flush()
writer.close()


Epoch 1, learning rate 1
-------------------------------
Epoch 2, learning rate 1
-------------------------------
Epoch 3, learning rate 1
-------------------------------
Epoch 4, learning rate 1
-------------------------------
Epoch 5, learning rate 0.5
-------------------------------
Done!


In [4]:
#Save model parameters for use in Model_Test notebook
#https://pytorch.org/tutorials/beginner/saving_loading_models.html
param_path = top_dir / 'trained_models' / 'el80_pairs_lg_0001'
torch.save(model.state_dict(), param_path)

Use Model_Test.ipynb to load and review model prediction on sample / experimental data