In [1]:

from pathlib import Path
import sys
import os

# path_root = Path(__file__).parents[1]
# sys.path.append(str(path_root))
# os.chdir('..')
%cd ..
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
from os.path import join as jn
import yaml

import torch
from torch.utils.data import DataLoader
from tqdm.autonotebook import tqdm
import json

from video_module import Stack_dataset, \
    fit_epoch, eval_epoch, predict, eval_dataset

/home/amir/projects/frame_stack


In [2]:
with open('params.yaml') as conf_file:
    config = yaml.safe_load(conf_file)
with open('pathes.yaml') as conf_file:
    path_config = yaml.safe_load(conf_file)

if not os.path.exists(path_config['reports_path']):
    os.makedirs(path_config['reports_path'])

In [3]:
torch.manual_seed(config['random_seed'])
np.random.seed(config['random_seed'])


In [4]:

input_path = path_config['train_s_video_path']
test_input_path = path_config['test_s_video_path']
output_path = path_config['p_video_path']

tr = config['video_train']
frames_number, frames_interval = tr["frames_number"], tr["frames_interval"]

test_dataset = Stack_dataset(output_path, test_input_path, frames_number,
                             frames_interval)
train_dataset = Stack_dataset(output_path, input_path, frames_number,
                              frames_interval)


Dataset loading:   0%|                                                                                        …

Dataset loading:   0%|                                                                                        …

In [5]:

if not torch.cuda.is_available():
    print('CUDA is NOT available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')
device = 'cuda' if torch.cuda.is_available() else 'cpu'


CUDA is available!  Training on GPU ...


In [16]:
with open('params.yaml') as conf_file:
    config = yaml.safe_load(conf_file)
    
signal_shape, pressure_shape = (x.shape for x in train_dataset[0])
print('input chain shape: ', signal_shape, '\noutput chain shape: ',
      pressure_shape)

model_name = tr['model_name']
import models_src

model_class = eval(f"models_src.{model_name}")

args = []
if model_name.startswith("Param"):
    layers = tr['layers']
    args.append(layers)
args.append(frames_number)
args.append(frames_interval)
model = model_class(pressure_shape[-2:], signal_shape[-2:], *args)
model = model.to(device)

# print(model)
optim = torch.optim.Adam(model.parameters(), lr=tr['learning_rate'])
loss_fn = torch.nn.MSELoss()


input chain shape:  (1, 4, 64) 
output chain shape:  (64, 64)


In [17]:
def iter_train(train_dataset, test_dataset, model, epochs, optimizer,
               criterion):
    for epoch in range(epochs):
        train_loss = fit_epoch(model, train_dataset, criterion, optimizer,
                               tr['batch_size'], device)
        test_loss = eval_epoch(model, test_dataset, criterion,
                               config['test_batch_size'], device)
        # print("loss", f"{train_loss:.3f}")
        # pbar.set_postfix(train_loss=train_loss, test_loss=test_loss)
        # full_train_loss = eval_dataset(model, train_dataset, criterion, config['test_batch_size'], device)
        # full_test_loss = eval_dataset(model, test_dataset, criterion, config['test_batch_size'], device)
        yield (train_loss, test_loss)

In [18]:
history = []

In [19]:
epochs = 30 # config['video_train']['epochs']

In [20]:
with tqdm(total=epochs,
          position=0,
          unit='epoch',
          desc="Learning",
          dynamic_ncols=True) as pbar:
    
    for h in iter_train(train_dataset, test_dataset, model, epochs, optim,
                        loss_fn):
        history.append(h)
        train_loss, test_loss = h
        # print(f"Epoch {i+1}/{total_epochs}",
        #       f"train loss: {full_train_loss:.5f}, test_loss: {full_test_loss:.5f}")
        pbar.update()
        pbar.set_postfix(train_loss=train_loss, test_loss=test_loss)

        titles = ["full_train_loss", "full_test_loss"]
        res = np.array([titles] + history)
        for j, title in enumerate(titles):
            np.savetxt(jn(path_config['reports_path'], title + '.csv'),
                        res[:, j],
                        delimiter=',',
                        fmt='%s')
        
        # checkpoint saving
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optim.state_dict()
            }, f'checkpoint{len(history)}.pt')

        os.system('dvc plots show -q')

Learning:   0%|                                                                                               …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



eval_epoch:   0%|                                                                                             …

eval_epoch:   0%|                                                                                             …

KeyboardInterrupt: 

In [29]:
full_train_loss, full_test_loss = zip(*history)
res = {
    'train': {
        'loss': full_train_loss[np.argmin(full_test_loss)]
    },
    'test': {
        'loss': min(full_test_loss)
    }
}
with open(jn(path_config['reports_path'], "v_summary.json"), "w") as f:
    json.dump(res, f)

In [23]:
!python csv_logger.py

INFO - fiber_sensor_frame_stack - Running command 'send_csv'
INFO - fiber_sensor_frame_stack - Started run with ID "256"
INFO - fiber_sensor_frame_stack - Completed after 0:00:00


In [22]:
if not os.path.exists(path_config['v_model_path']):
    os.makedirs(path_config['v_model_path'])
torch.save(model, jn(path_config['v_model_path'], model_name + '.pt'))


In [21]:
from video_module import Dynamic_video_dataset, predict, visual_chains

v_model = model

save_path = path_config['video_predict_vis_path']
if not os.path.exists(save_path):
    os.makedirs(save_path)


def visual_dataset(dataset, step, max_items, begin=0):
    prev_id = ''
    total=min(len(dataset.files) // step, max_items)
    for pressure, signal, file_name in tqdm(
            zip(dataset.pressure[:total*step:step], dataset.signal[:total*step:step],
                dataset.files[:total*step:step]), total=total):
        file_name = file_name[:-4]
        id = file_name[:file_name.rfind('/')]
        id = id[:id.rfind('/')]
        if id != prev_id:
            print(f"\n#### id = {id}", file=file)
            prev_id = id
        prediction = predict(v_model,
                             signal[begin:],
                             device)
        pressure = pressure[-prediction.shape[0]:]
        visual_chains([pressure, prediction],
                      jn(save_path, file_name.replace('/', '_')))
        print(f"<img src={file_name.replace('/', '_')+'.gif'} width=400>",
              file=file)



file = open(jn(save_path, "view.md"), 'w')
print("# Visualization", file=file)

# pred_test_dataset = Dynamic_video_dataset(output_path, test_input_path)
pred_test_dataset = test_dataset
print("# Test dataset", file=file)
visual_dataset(pred_test_dataset, **config['visual']['test'])

file.close()

  0%|          | 0/32 [00:00<?, ?it/s]

In [24]:
# checkpoint saving
torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optim.state_dict()
            }, 'checkpoint.pt')

In [27]:
# checkpoint loading
checkpoint = torch.load('checkpoint.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optim.load_state_dict(checkpoint['optimizer_state_dict'])

model.train()

Unet1(
  (step1): Sequential(
    (0): Conv2d(4, 16, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (3): ReLU()
    (4): ReLU()
    (5): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (6): ReLU()
  )
  (down1): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): ReLU()
  )
  (down2): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (3): ReLU()
    (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): ReLU()
  )
  (down3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1