In [68]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [69]:
import torch
import torch.optim as optim
from tensorboardX import SummaryWriter
import numpy as np
import os
import argparse
import time
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import config   ### just need to import config for config.py - 
from checkpoints import CheckpointIO
from onet import generation
from training import training
import models
import yaml
from torch.utils import data

In [86]:
print(device)

cuda


In [71]:
path="ocnet.yaml"
with open(path, 'r') as f:
    cfg = yaml.load(f, Loader=yaml.SafeLoader)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Shorthands
out_dir = "out/onet"
batch_size = 64
backup_every = 100000
#exit_after = args.exit_after  ### can check later if we really need the exit after

model_selection_metric = "iou"
model_selection_sign = 1

# Output directory
if not os.path.exists(out_dir):
    os.makedirs(out_dir)

In [72]:
def collate_remove_none(batch):
    ''' Collater that puts each data field into a tensor with outer dimension
        batch size.

    Args:
        batch: batch
    '''

    batch = list(filter(lambda x: x is not None, batch))
    return data.dataloader.default_collate(batch)

def worker_init_fn(worker_id):
    ''' Worker init function to ensure true randomness.
    '''
    random_data = os.urandom(4)
    base_seed = int.from_bytes(random_data, byteorder="big")
    np.random.seed(base_seed + worker_id)

In [73]:
logger = SummaryWriter(os.path.join(out_dir, 'logs'))

In [74]:
# Dataset
train_dataset = config.get_dataset('train', cfg)
val_dataset = config.get_dataset('val', cfg)

train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, num_workers=4, shuffle=True,
    collate_fn=collate_remove_none,
    worker_init_fn=worker_init_fn)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=10, num_workers=4, shuffle=False,
    collate_fn=collate_remove_none,
    worker_init_fn=worker_init_fn)

In [76]:
# Model
model = config.get_model(cfg, device=device, dataset=train_dataset)

In [87]:
# Intialize training
npoints = 1000
optimizer = optim.Adam(model.parameters(), lr=1e-4)
trainer = training.Trainer(                         ## set the trainig for training
    model, optimizer,
    device=device, input_type="img",
    vis_dir="out/img/onet/vis", threshold=0.2,
    eval_sample=False,
)


In [88]:
## all here to load check point and get info from the check point
checkpoint_io = CheckpointIO(out_dir, model=model, optimizer=optimizer)  ##save check poins
try:                                                            ##here to load checkpoint to cotinue to train i guess
    load_dict = checkpoint_io.load('model.pt')         ## so here model saved as .pt not .ckpt
    #load_dict = checkpoint_io.load('model.ckpt')   #so load_dict is a dict for the ckpt file
except FileExistsError:
    load_dict = dict()
epoch_it = load_dict.get('epoch_it', -1)
it = load_dict.get('it', -1)
metric_val_best = load_dict.get(
    'loss_val_best', -model_selection_sign * np.inf)


print('Current best validation metric (%s): %.8f'
      % (model_selection_metric, metric_val_best))   ## model_selection_metric here is IoU metric


Current best validation metric (iou): -inf


In [89]:
# Shorthands
print_every = cfg['training']['print_every']
checkpoint_every = cfg['training']['checkpoint_every']
validate_every = cfg['training']['validate_every']
visualize_every = cfg['training']['visualize_every']

# Print model                                                   ##can remove later- just need to see the number of paras
nparameters = sum(p.numel() for p in model.parameters())
print(model)
print('Total number of parameters: %d' % nparameters)

OccupancyNetwork(
  (decoder): DecoderCBatchNorm(
    (fc_p): Conv1d(3, 256, kernel_size=(1,), stride=(1,))
    (block0): CResnetBlockConv1d(
      (bn_0): CBatchNorm1d(
        (conv_gamma): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
        (conv_beta): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
        (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (bn_1): CBatchNorm1d(
        (conv_gamma): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
        (conv_beta): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
        (bn): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
      )
      (fc_0): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
      (fc_1): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
      (actvn): ReLU()
    )
    (block1): CResnetBlockConv1d(
      (bn_0): CBatchNorm1d(
        (conv_gamma): Conv1d(256, 256, kernel_size=(1,), stride=(1,))
        (conv_beta): Conv1d(256,

In [96]:
while True:
    epoch_it += 1
    #scheduler.step()

    for batch in train_loader:
        it += 1
        loss = trainer.train_step(batch)
        logger.add_scalar('train/loss', loss, it)

        # Print output
        if print_every > 0 and (it % print_every) == 0:
            print('[Epoch %02d] it=%03d, loss=%.4f'
                  % (epoch_it, it, loss))

        # Visualize output
        if visualize_every > 0 and (it % visualize_every) == 0:
            print('Visualizing')
            trainer.visualize(data_vis)

        # Save checkpoint
        if (checkpoint_every > 0 and (it % checkpoint_every) == 0):
            print('Saving checkpoint')
            checkpoint_io.save('model.pt', epoch_it=epoch_it, it=it,
                               loss_val_best=metric_val_best)

        # Backup if necessary
        if (backup_every > 0 and (it % backup_every) == 0):
            print('Backup checkpoint')
            checkpoint_io.save('model_%d.pt' % it, epoch_it=epoch_it, it=it,
                               loss_val_best=metric_val_best)
        # Run validation
        if validate_every > 0 and (it % validate_every) == 0:
            eval_dict = trainer.evaluate(val_loader)
            metric_val = eval_dict[model_selection_metric]
            print('Validation metric (%s): %.4f'
                  % (model_selection_metric, metric_val))

            for k, v in eval_dict.items():
                logger.add_scalar('val/%s' % k, v, it)

            if model_selection_sign * (metric_val - metric_val_best) > 0:
                metric_val_best = metric_val
                print('New best model (loss %.4f)' % metric_val_best)
                checkpoint_io.save('model_best.pt', epoch_it=epoch_it, it=it,
                                   loss_val_best=metric_val_best)

[Epoch 04] it=020, loss=278.8022
[Epoch 04] it=030, loss=288.2823
[Epoch 04] it=040, loss=269.2306
[Epoch 04] it=050, loss=246.6505
[Epoch 04] it=060, loss=306.3763
[Epoch 04] it=070, loss=240.6857
[Epoch 04] it=080, loss=266.3679
[Epoch 04] it=090, loss=255.9061
[Epoch 04] it=100, loss=190.1610
[Epoch 04] it=110, loss=237.9591
[Epoch 04] it=120, loss=198.2983
[Epoch 04] it=130, loss=241.2523
[Epoch 04] it=140, loss=193.7250
[Epoch 04] it=150, loss=202.4193
[Epoch 04] it=160, loss=180.4740
[Epoch 04] it=170, loss=202.3307
[Epoch 04] it=180, loss=224.6634
[Epoch 04] it=190, loss=222.3888
[Epoch 04] it=200, loss=213.4074
[Epoch 04] it=210, loss=208.7204
[Epoch 04] it=220, loss=178.0703
[Epoch 04] it=230, loss=217.4688
[Epoch 04] it=240, loss=198.6153
[Epoch 04] it=250, loss=231.1876
[Epoch 04] it=260, loss=187.0673
[Epoch 04] it=270, loss=177.7810
[Epoch 04] it=280, loss=200.2459
[Epoch 04] it=290, loss=226.7313
[Epoch 04] it=300, loss=197.4184
[Epoch 04] it=310, loss=198.7884
[Epoch 04]

KeyboardInterrupt: 

In [91]:
torch.cuda.empty_cache()


In [93]:
model.p0_z

Normal(loc: torch.Size([0]), scale: torch.Size([0]))

In [97]:
import zipfile
import os

def zip_folder(folder_path, zip_path):
    # Create a zip file object
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # Iterate over all the files in the folder
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                # Get the file path
                file_path = os.path.join(root, file)
                # Add the file to the zip file
                zipf.write(file_path, os.path.relpath(file_path, folder_path))

# Specify the folder path and zip file path
folder_path = 'occupancy_networks_second_ver'
zip_path = 'occupancy_networks_third_ver.zip'

# Call the function to zip the folder
zip_folder(folder_path, zip_path)
