In [1]:
# imports and setup
%cd '/home/naodell/work/hgcal/analysis'

import os
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('default')

import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
from sklearn import preprocessing
import seaborn as sns
from tqdm.notebook import tqdm, trange

from models.autoencoder import AutoEncoderWafer
from datasets.hgcal_tc_dataset import HGCalTCModuleDataset

#%connect_info

/home/naodell/work/hgcal/analysis


In [2]:
# get the data
input_dir = 'local_data/econ_training_data/single_photon_data/'
input_filenames = [f'{input_dir}/{f}' for f in os.listdir(input_dir)]
hgcal_data = HGCalTCModuleDataset(input_filenames)

# sample from dataframe according to event weights; split into testing and training sets
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# training sample
batch_size = 4
n_total = len(hgcal_data)
split = round(0.8*n_total)
train_loader = DataLoader(hgcal_data[:split],  batch_size=batch_size, shuffle=True, pin_memory=True)

test_loader = DataLoader(hgcal_data[split:],  batch_size=batch_size, shuffle=True, pin_memory=True)

print(f'Total number of events: {n_total}')

Total number of events: 267596


In [3]:
# define and setup the NN model
n_flat_dimensions = np.product(hgcal_data[0].shape)
model = AutoEncoderWafer(n_flat_dimensions, device).to(device)
print(device, model, sep='\n')

cuda
AutoEncoderWafer(
  (conv2d_1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (act_1): ReLU()
  (pool_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bnorm_1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (encode_dense): Sequential(
    (0): Linear(in_features=512, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
  )
  (decode_dense): Sequential(
    (0): Linear(in_features=32, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
  )
)


In [None]:
# optimize NN weights
n_epochs = 10
n_events = len(train_loader)

# define loss and configure optimizer
loss_fn = nn.MSELoss(reduction='sum')
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
tb_writer = SummaryWriter(log_dir='logs')

pbar1 = tqdm(range(n_epochs), total=n_epochs, leave=False, position=0)
pbar1.set_postfix({'avg. loss':'?'})
for iepoch in pbar1:
    model.train()
    pbar2 = tqdm(train_loader, total=n_events, leave=False, position=1)
    pbar2.set_postfix({'loss':'?'})
    avg_loss = 0
    for jevent, features in enumerate(pbar2):
        features = features.to(device)
        #targets = targets.to(device)
        optimizer.zero_grad()
        features_pred = model(features)
        loss = loss_fn(features_pred, features)
        loss.backward()
        optimizer.step()
        
        #pbar2.set_postfix({'loss':f'{loss.item():.3f}'})
        #tb_writer.add_scalar('training loss', loss.item(), iepoch*n_events + jevent)
        
        avg_loss += loss.item()
        if jevent%1000 == 0:
            avg_loss /= 1000
            pbar2.set_postfix({'loss':f'{avg_loss:.3f}'})
            tb_writer.add_scalar('training loss', avg_loss, iepoch*(n_events//1000) + jevent/1000)
            avg_loss = 0
        
    with torch.no_grad():
        model.eval()
        test_loss = 0
        n_test_events = len(test_loader)
        for features in tqdm(test_loader, total=n_test_events, leave=False):
            features = features.to(device)
            #targets = targets.to(device)
            features_pred = model(features)
            test_loss += loss_fn(features_pred, features)
            
        avg_loss = test_loss.item()/n_test_events
        pbar1.set_postfix({'avg loss':f'{avg_loss:.3f}'})
        tb_writer.add_scalar('test loss', avg_loss, iepoch)
        tb_writer.add_graph(model, features)
        
        #images = torchvision.utils.make_grid(features)
        #images_pred = torchvision.utils.make_grid(features_pred)
        #tb_writer.add_image('input', images)
        #tb_writer.add_image('output', images_pred)
        
        # write a checkpoint based on the performance of the model



  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/53520 [00:00<?, ?it/s]

  0%|          | 0/13380 [00:00<?, ?it/s]

  0%|          | 0/53520 [00:00<?, ?it/s]

  0%|          | 0/13380 [00:00<?, ?it/s]

  0%|          | 0/53520 [00:00<?, ?it/s]

  0%|          | 0/13380 [00:00<?, ?it/s]

  0%|          | 0/53520 [00:00<?, ?it/s]

  0%|          | 0/13380 [00:00<?, ?it/s]

  0%|          | 0/53520 [00:00<?, ?it/s]

  0%|          | 0/13380 [00:00<?, ?it/s]

  0%|          | 0/53520 [00:00<?, ?it/s]

  0%|          | 0/13380 [00:00<?, ?it/s]

  0%|          | 0/53520 [00:00<?, ?it/s]

In [None]:
# compare images
with torch.no_grad():
    model.eval()
    
    fig, axes = plt.subplots(8, 3, facecolor='white', figsize=(8, 20))
    images = iter(test_loader).next()
    images = images.to(device)
    images_pred = model(images)
    for ix, (image, image_pred) in enumerate(zip(images, images_pred)):
        image = image.cpu().numpy()
        image_pred = image_pred.cpu().numpy().reshape(image.shape)
        
        ax = axes[ix][0]
        ax.imshow(image)
        if ix == 0:
            ax.set_title('input')
        
        ax = axes[ix][1]
        ax.imshow(image_pred)
        if ix == 0:
            ax.set_title('output')
        
        ax = axes[ix][2]
        ax.imshow(abs(image - image_pred), cmap='coolwarm')
        if ix == 0:
            ax.set_title('input - output')
            
        #plt.colorbar()
        
        if ix == 9: break

    plt.tight_layout()
    plt.show()