In [1]:
# import save_load as sl
# import preprocess_data as ppd
import torch
from torch import nn
from torch.utils.data import DataLoader
# from model import DnCNN
# from Dataset import Img_Dataset
import numpy as np 
import pathlib

# Importing utitility functions for training
from PT_files.model import DnCNN, DnCNN_B
from PT_files.Dataset import Img_Dataset, Large_Img_Dataset
import PT_files.preprocess_data as ppd
import PT_files.save_load as sl

device = "cuda" if torch.cuda.is_available() else "cpu"

Using cuda device


# 0. Upload raw focal plane image pairs of (noisy/clean)

In [2]:
# Data naming notation is dataXXX-XXXX
# XXX - number of samples
# XXXX - size of img ie. 2000x2000 for this code block
#raw_data = sl.NERSC_load('data1500-3000.npy')
raw_data = sl.NERSC_load('data270-6000.npy')


# 1. Now create training and test sets from the raw data, still containing pairs of (noisy/clean) samples

NOTE: These training & test sets are automatically saved

In [None]:
ppd.create_data_sets(data=raw_data,
                     train_size=250,
                     test_size=20,
                     training_set_name='training_data250-6000.npy',
                     test_set_name='test_data20-6000.npy')

# 2. Check these newly created sets & thus reload them in

In [None]:
training_data = sl.NERSC_load('training_data250-6000.npy')
test_data = sl.NERSC_load('test_data20-6000.npy')

In [None]:
training_data[0].shape

In [None]:
type(training_data)

# 3. Put training samples into a Pytorch Dataloader object to allow easy training

In [None]:
# train_dataset = Large_Img_Dataset(data_set=training_data,
#                                   num_patchs=30,
#                                   patch_size=50,
#                                   width=6000,
#                                   height=6000)

train_dataset = Img_Dataset(data_set=training_data,
                                  patch_size=200,
                                  width=6000,
                                  height=6000)

train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)

train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")

In [None]:
type(train_dataset)

# 4. Run the training loop below!

In [None]:
# need to put .to(device) to put the model parameters onto the GPU.
# The data is already put on the GPU, so to be able to train the
# parameters must be compatible with the data

# model = DnCNN().to(device)
model = DnCNN_B().to(device)

In [None]:
loss_fn = nn.MSELoss(reduction='sum')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)#, momentum=0.9)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.cuda(device), y.cuda(device)
        
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y) /(2*len(X))
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss, batch * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [None]:
epochs = 20
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
print("Done!")

# 5. Save model parameters

In [None]:
# # Saving Models
# current_dir = pathlib.Path().resolve()
# model_params_path = current_dir / 'Model_params'
# assert model_params_path.exists()
# name = "6k_model_bs16_e500_ps150.pth"
# path = model_params_path / name
# torch.save(model.state_dict(), path)
# print("Saved PyTorch Model State to model.pth")

Check to see if we can load pytorch model params via `np.load` or `sl.NERSC_load`

In [None]:
x = np.load("/pscratch/sd/m/mdowicz/DESI_dn/Model_params/2k_model_bs64_e200.pth")

In [None]:
sl.NERSC_load("2k_model_bs64_e200.pth")