# Testing the 2D CNN - PENGUIN

Links to important scripts:

 * [Penguin network](../../src/models/PENGUIN.py)
 * [Trainer](../../src/models/train_model.py)
 * [Dataloader](../../src/data/sliced_cube_dataset.py)

In [13]:
# Imports and paths
import numpy as np
import sys, os
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

# Add parent directory to path
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir))
sys.path.append(parent_dir)

# Import custom modules
from src.data.sliced_cube_dataset import SlicedCubeDataset, make_sliced_dataset
from src.models.PENGUIN import PENGUIN
from src.models.train_model import ModelTrainer
from src.data.transforms import Normalise

VERBOSE = True

In [14]:
# Set device
GPU = torch.cuda.is_available()
GPU = False
device = torch.device("cuda:0" if GPU else "cpu")
print("Device:", device)

Device: cpu


## Data

In [15]:
DATA_PARAMS = {
    "train_test_val_split": (0.7, 0.2, 0.1),
    "batch_size": 256*3,
    "num_workers": 96,
    "stride": 1,
    "redshifts": 1.0,
    "transform": Normalise(),
    "total_seeds": np.arange(0,25,1),
    "random_seed": 42,
    "prefetch_factor": 4,
}

In [16]:
train_loader, test_loader, val_loader = make_sliced_dataset(**DATA_PARAMS)

In [17]:
# Number of batches:
print("Number of training batches:", len(train_loader))
print("Number of validation batches:", len(val_loader))
print("Number of test batches:", len(test_loader))

# Number of samples
print("Number of training samples:", len(train_loader.dataset))
print("Number of validation samples:", len(val_loader.dataset))
print("Number of test samples:", len(test_loader.dataset))


Number of training batches: 102
Number of validation batches: 18
Number of test batches: 30
Number of training samples: 78336
Number of validation samples: 13824
Number of test samples: 23040


## Model

In [18]:
# Mini network
MODEL_PARAMS = {
    "input_size": (DATA_PARAMS["stride"], 256, 256),
    "layer_param": 1,
    "activation": nn.LeakyReLU(),
    "output_activation": nn.Sigmoid(),
    "bias": False,
    "dropout": 0.25,
}

In [19]:
model = PENGUIN(**MODEL_PARAMS)

In [20]:
print(model)

PENGUIN(
  (activation): LeakyReLU(negative_slope=0.01)
  (output_activation): Sigmoid()
  (conv2d1): Sequential(
    (0): Conv2d(1, 4, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (2): BatchNorm2d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Dropout(p=0.25, inplace=False)
  )
  (conv2d2): Sequential(
    (0): Conv2d(4, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (2): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): LeakyReLU(negative_slope=0.01)
    (4): Dropout(p=0.25, inplace=False)
  )
  (conv2d3): Sequential(
    (0): Conv2d(6, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): MaxPool2d(kernel_size=(2, 2), stride=(2,

In [21]:
# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=1e-5)
loss_fn = nn.BCELoss()

## Train

In [22]:
# Writer object
writer = SummaryWriter("runs/2dtest_1")

In [23]:
# Trainer object
trainer = ModelTrainer(model, optimizer, loss_fn, device, verbose=VERBOSE, writer=writer, save_path="saved_models/2dtest_1")

In [24]:
# Train model for one epoch: 
# trainer.train_model(train_loader, val_loader, epochs=2)