In [1]:
import pickle
import datetime
from pathlib import Path

import torch
import torch.nn as nn
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
#from torch.optim import lr_scheduler
from torch.utils.data import TensorDataset, DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from libs_unet.models import peakyfinder_0006
from libs_unet.training.libs_train import train_loop, test_loop
import matplotlib.pyplot as plt

top_dir = Path.cwd()
datapath = top_dir / 'data'

In [2]:
#training data import
with open(datapath / 'training' / 'el80_pairs_lg.pickle', 'rb') as f:
    elem_symb = pickle.load(f)
    el_index = pickle.load(f)
    fracs = pickle.load(f)
    wave = pickle.load(f)
    x_data = pickle.load(f)
    #y_data = pickle.load(f)

In [3]:
#Scale data to account for source. Output simply 80 element fractions
nist_mult = 1.17
x_data = nist_mult * x_data
y_data = fracs

In [4]:
y_data.shape

(28520, 80)

In [5]:
#split x_data into mutex low_int and log(hi_int) input channels
x_data = x_data[:,None,:]
x_shape = x_data.shape
new_inp = np.zeros((x_shape[0], 2, x_shape[2]))
log_cut = 2
new_inp[:,0,:] = np.where(np.log(x_data[:,0,:]) < log_cut, x_data[:,0,:], 0)
new_inp[:,1,:] = np.where(np.log(x_data[:,0,:]) >= log_cut, np.log(x_data[:,0,:]), 0)
x_data = new_inp.copy()
new_inp = None

In [6]:
#create dataset
#input has two channels
#apply a scaling factor to standardize spectra across sources.
x_data = torch.tensor(x_data.astype('float32'))
y_data = torch.tensor(y_data.astype('float32'))
x_data[x_data < 0] = 0
y_data[y_data < 0] = 0
#log transform data, add small offset 1 so zero points remain ~zero on log scale
#x_data = torch.log(x_data + 1)
#y_data = torch.log(y_data + 1)

spec_ds = TensorDataset(x_data, y_data)
#batch sizes
train_bs = 50
test_bs = 100
#create random split for training and validation
train_len = int(0.8 * len(x_data))
test_len = len(x_data) - train_len
train_ds, test_ds = random_split(spec_ds,[train_len, test_len])
train_dl = DataLoader(train_ds, batch_size=train_bs, shuffle=True) #check if shuffle redundant
test_dl = DataLoader(test_ds, batch_size=test_bs)
#

In [7]:
#set parameters
max_z = len(elem_symb)
wl_points = len(wave) #number of wavelength point measurements in data
learning_rate = 0.1 #There is a schedule to reduce as loss plateaus
epochs = 10

#Initialize and run
model = peakyfinder_0006.LIBSUNet(max_z, wl_points)
loss_fn = nn.MSELoss(reduction='mean')
#loss_fn = Custom_Wgt_MSE(loss_wts)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
lr_scheduler = MultiStepLR(optimizer, [5, 10, 15], 0.5)

now_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') #  now 
writerpath = top_dir / 'runs' / now_time
writer = SummaryWriter(writerpath)

for t in range(epochs):
    print(f"Epoch {t+1}, learning rate {optimizer.param_groups[0]['lr']}\n-------------------------------")
    train_loop(train_dl, model, loss_fn, optimizer, writer, t+1, debug=True, bsize=train_bs)
    test_loss = test_loop(test_dl, model, loss_fn, writer, t+1)
    lr_scheduler.step()
print("Done!")

#cleanup
writer.flush()
writer.close()


Epoch 1, learning rate 0.1
-------------------------------
loss: 0.05432126671075821
loss: 0.05560799315571785
loss: 0.022639010101556778
loss: 0.00972551479935646
loss: 0.00881895050406456
loss: 0.007916092872619629
loss: 0.008151727728545666
loss: 0.008168907836079597
loss: 0.00787538941949606
loss: 0.00807010568678379
loss: 0.008422584272921085
loss: 0.008090497925877571
loss: 0.007802837062627077
loss: 0.008023337461054325
loss: 0.008008704520761967
loss: 0.008140647783875465
loss: 0.007855929434299469
loss: 0.00794131401926279
loss: 0.0077138436026871204
loss: 0.008133002556860447
loss: 0.007696035783737898
loss: 0.00811681803315878
loss: 0.007635341491550207
loss: 0.007483168970793486
loss: 0.008221395313739777
loss: 0.008006230928003788
loss: 0.00819059181958437
loss: 0.00795000046491623
loss: 0.007973872125148773
loss: 0.00810001976788044
loss: 0.007474210113286972
loss: 0.00843101181089878
loss: 0.008035756647586823
loss: 0.0079599404707551
loss: 0.008015641011297703
loss: 0.0

KeyboardInterrupt: 

In [None]:
a = torch.tensor(np.ones((4,3,5)))
b = torch.tensor(2*np.ones((4,1,5)))

In [29]:
torch.cat([a,b],dim=1).size()

torch.Size([4, 4, 5])

In [8]:
#Save model parameters for use in Model_Test notebook
#https://pytorch.org/tutorials/beginner/saving_loading_models.html
param_path = top_dir / 'trained_models' / 'el80_pairs_fracs_0006_a'
torch.save(model.state_dict(), param_path)

Use Model_Test.ipynb to load and review model prediction on sample / experimental data