In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
from tqdm.notebook import tqdm

import numpy as np 
import pathlib
import matplotlib.pyplot as plt

# Importing utitility functions for training
from PT_files.model import DnCNN, DnCNN_B
from PT_files.Dataset import Img_Dataset, Large_Img_Dataset
import PT_files.preprocess_data as ppd
import PT_files.save_load as sl

device = "cuda" if torch.cuda.is_available() else "cpu"

Using cuda device


In [2]:
%%capture
# !pip install wandb --upgrade

In [3]:
import wandb

wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmdowicz[0m (use `wandb login --relogin` to force relogin)


True

In [4]:
config = dict(
    epochs=800,
    num_layers=20,
    num_features=64,
    batch_size=64,
    learning_rate=1e-3,
    dataset="6k model data",
    architecture="2k DnCNN")

In [5]:
def model_pipeline(hyperparameters):
    
    # tell wandb to get started
    with wandb.init(project="DnCNN-demo", config=hyperparameters):
        # access all HPs through wandb.config, so logging matches execution!
        config = wandb.config
        
        # make the model, data, and optimization problem
        model, train_loader, test_loader, criterion, optimizer = make(config)
        print(model)
        
        # and use them to train the model
        train(model, train_loader, test_loader, criterion, optimizer, config)
        
        # and test its final performance
        # test(model, test_loader, criterion, config)
        
    return model

In [6]:
def make(config):
    # Make the data
    train, test = get_data(model_name="2k", train=True), get_data(model_name="2k", train=False)
    train_loader = make_loader(train, batch_size=config.batch_size)
    test_loader = make_loader(test, batch_size=config.batch_size)
    
    # Make the model
    model = DnCNN(num_layers=config.num_layers, num_features=config.num_features).to(device)
    
    # Make the loss and optimizer
    criterion = nn.MSELoss(reduction='sum') # add more to this. see 02A notebook
    optimizer = torch.optim.Adam(
        model.parameters(), lr=config.learning_rate)
    
    return model, train_loader, test_loader, criterion, optimizer 

In [7]:
def get_data(model_name, train=True):
    
    if model_name == "6k":
        if train == True:
            # Raw training data
            training_data = sl.NERSC_load('training_data_60%_6000.npy')        
            # Processed training data for DnCNN
            dataset = Img_Dataset(data_set=training_data,
                                      patch_size=150,
                                      width=6000,
                                      height=6000)
        else:
            
            # Raw test data
            test_data = sl.NERSC_load('test_data_40%_6000.npy')
            # Processed test data for DnCN
            dataset = Img_Dataset(data_set=test_data,
                                    patch_size=150,
                                    width=6000,
                                    height=6000)
        
    elif model_name == "2k":
        if train == True:
            # Raw training & test data
            training_data = sl.NERSC_load('training_data_60%_2000.npy')        
            # Processed training & test data for DnCNN
            dataset = Img_Dataset(data_set=training_data,
                                      patch_size=50,
                                      width=2000,
                                      height=2000)
        else:
            
            # Raw test data
            test_data = sl.NERSC_load('test_data_40%_2000.npy')
            # Processed test data for DnCN
            dataset = Img_Dataset(data_set=test_data,
                                    patch_size=50,
                                    width=2000,
                                    height=2000)
        

    return dataset

def make_loader(dataset, batch_size):
    
    loader = DataLoader(dataset=dataset,
                        batch_size=batch_size,
                        shuffle=True)
    
    return loader

In [8]:
def train(model, train_loader, test_loader, criterion, optimizer, config):
    # Tell wandb to watch what the model gets up to: gradients, weights, etc.
    wandb.watch(model, criterion, log="all", log_freq=10)
    
    # Run training and track with wandb
    model.train()
    total_batches = len(train_loader) * config.epochs
    example_ct = 0 # number of examples seen
    batch_ct = 0
    
  
    for epoch in tqdm(range(config.epochs)):
        train_tot_loss = 0
        val_tot_loss = 0
        # Evaluate the training loss
        for batch_index, (images, labels) in enumerate(train_loader):
            
            loss = train_batch(images=images,
                               labels=labels,
                               model=model,
                               optimizer=optimizer,
                               criterion=criterion)
            train_tot_loss += loss
            example_ct += len(images)
            batch_ct += 1
            

        # Evaluate the validation loss       
        model.eval()
        for batch_index, (images, labels) in enumerate(test_loader):
            images, labels = images.to(device), labels.to(device)
            with torch.no_grad():
                val_loss = validate_batch(images=images,
                                          labels=labels,
                                          model=model,
                                          criterion=criterion)
            val_tot_loss += val_loss
        # Log validation and training loss
        wandb.log({"val_loss": val_tot_loss, "train_loss": train_tot_loss})
        
    torch.onxx.export(model, images, "dncnn.onxx")
    wandb.save("model.onxx")
    
                
                
def train_batch(images, labels, model, optimizer, criterion):
    images, labels = images.to(device), labels.to(device)
    
    # Forward Pass ->
    output = model(images)
    loss = criterion(output, labels) /(2*len(images))
    
    # Backward pass <-
    optimizer.zero_grad()
    loss.backward()
    
    # Step with optimizer
    optimizer.step()
    
    return np.log(loss.detach().cpu().numpy())

def validate_batch(images, labels, model, criterion):
    output = model(images)
    loss = criterion(output, labels) /(2*len(images))
    loss = np.log(loss.cpu())
    
    return loss

In [None]:
model = model_pipeline(config)

DnCNN(
  (layers): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
    )
    (1): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (2): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (3): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (4): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True

  0%|          | 0/800 [00:00<?, ?it/s]


