In [5]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
import sys
import os
import torch.nn.functional as F
root = "/project/wyin/jlee/ml-project/dos-prediction-sparse"
util_loc = os.path.join(root, "utils")
data_loc = os.path.join(root, "data/random")
sys.path.append(util_loc)
from utilities import DosDataModule
import pickle

In [6]:
class LitNeuralNet(pl.LightningModule):
    def __init__(self, layer_sizes, lr = 0.01, factor = 0.0, dropout = 0.0):
        super(LitNeuralNet, self).__init__()
        
        modules = []
        for i in range(len(layer_sizes) - 1):
            modules.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
            
            if i != len(layer_sizes) - 2:
                modules.append(nn.ReLU())
                modules.append(nn.Dropout(dropout))
        
        self.forward_prop = nn.Sequential(*modules)
        self.learning_rate = lr
        self.factor = factor
        self.save_hyperparameters()
    
    def training_step(self, batch, batch_idx):
        params, dos = batch
        
        # Forward pass
        predicted = self.forward_prop(params)
        loss = F.mse_loss(predicted, dos)
        
        #log to tensorboard
        self.log("train_loss", loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        params, dos = batch
        
        # Forward pass
        predicted = self.forward_prop(params)
        loss = F.mse_loss(predicted, dos)
        
        #log to tensorboard
        self.log("val_loss", loss)
        return loss
        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        if self.factor == 0.0:
            return optimizer
        
        else:
            optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
            sch = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=self.factor, min_lr = 1e-7)
            return {
                "optimizer":optimizer,
                "lr_scheduler" : {
                    "scheduler" : sch,
                    "monitor" : "train_loss",

                }
            }

In [8]:
#######Set changed hyperparemter(s) here###############
layer_sizes = [3, 512, 512, 512, 512, 512, 512, 512, 301]
learning_rate = 0.001
batch_size = 256
schedule_factor = 0.5
dropout = 0.05
max_time = "00:00:05:00"
log_name = "drop-test-1"

dos_data = DosDataModule(data_loc, batch_size)
logger = TensorBoardLogger(f'logs/{log_name}', name = f'{layer_sizes},{learning_rate},{batch_size},{schedule_factor},{dropout}')
trainer = pl.Trainer(enable_checkpointing=False, max_time=max_time, logger = logger, enable_progress_bar = False)
model = LitNeuralNet(layer_sizes, lr = learning_rate, factor = schedule_factor, dropout = dropout)
trainer.fit(model, datamodule=dos_data)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: logs/drop-test-1/[3, 512, 512, 512, 512, 512, 512, 512, 301],0.001,256,0.5,0.05

  | Name         | Type       | Params
--------------------------------------------
0 | forward_prop | Sequential | 1.7 M 
--------------------------------------------
1.7 M     Trainable params
0         Non-trainable params
1.7 M     Total params
6.930     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(
Time limit reached. Elapsed time is 0:05:00. Signaling Trainer to stop.


In [10]:
#######Set changed hyperparemter(s) here###############
layer_sizes = [3, 512, 512, 512, 512, 512, 512, 512, 301]
learning_rate = 0.001
batch_size = 256
schedule_factor = 0.5
dropout = 0.01
max_time = "00:00:05:00"
log_name = "drop-test-1"

dos_data = DosDataModule(data_loc, batch_size)
logger = TensorBoardLogger(f'logs/{log_name}', name = f'{layer_sizes},{learning_rate},{batch_size},{schedule_factor},{dropout}')
trainer = pl.Trainer(enable_checkpointing=False, max_time=max_time, logger = logger, enable_progress_bar = False)
model = LitNeuralNet(layer_sizes, lr = learning_rate, factor = schedule_factor, dropout = dropout)
trainer.fit(model, datamodule=dos_data)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: logs/drop-test-1/[3, 512, 512, 512, 512, 512, 512, 512, 301],0.001,256,0.5,0.01

  | Name         | Type       | Params
--------------------------------------------
0 | forward_prop | Sequential | 1.7 M 
--------------------------------------------
1.7 M     Trainable params
0         Non-trainable params
1.7 M     Total params
6.930     Total estimated model params size (MB)
Time limit reached. Elapsed time is 0:05:00. Signaling Trainer to stop.
