In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/VOCPN/models

Mounted at /content/drive
/content/drive/MyDrive/VOCPN/models


In [2]:
from c3d import C3D
from vcopn import VCOPN, VCOPN_attention
from torch import nn
import torch
import random
import itertools
import torch.optim as optim

from encoder import *

def make_encoder(generator, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1):
    # Basic Components
    attn = MultiHeadAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    
    encoder = Encoder(EncoderLayer(d_model, deepcopy(attn), deepcopy(ff), dropout), generator, N)
 
    model = encoder

    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model


input = torch.randn(1, 3, 3, 24, 112, 112)
print("input shape", input.shape)
base = C3D(with_classifier=False)
# result = base(input)

generator = Generator(512, 4)

encoder = make_encoder(generator, h = 4)

# vcopn = VCOPN_attention(base_network=base, feature_size=512, tuple_len=3, encoder = encoder)
# result = vcopn(input)
# del result

input shape torch.Size([1, 3, 3, 24, 112, 112])




In [None]:
def data_gen():
    total = 3 * 3 * 24 # Batch_size, Tuple_length * Channel * Time

    multiplier = np.sort(np.random.randint(0,216,(total))).reshape(total,1,1)

    inp = list((np.ones((total,112,112)) * multiplier).reshape(3,3,24, 112, 112))
    tgt = list(range(0,4))

    frame_and_order = list(zip(inp, tgt))

    random.shuffle(frame_and_order)
    tuple_frame, tuple_order = zip(*frame_and_order)
    tuple_frame = [torch.from_numpy(frame).double() for frame in tuple_frame]

    return torch.stack(tuple_frame), torch.tensor(tuple_order).double()

def batch_gen(batch_size):
    batch = []
    for i in range(batch_size):
        batch.append((data_gen()))
    tuple_frame, tuple_order = zip(*batch)
    tuple_frame = torch.stack(tuple_frame)
    tuple_order = torch.stack(tuple_order)
    return tuple_frame, tuple_order

def order_class_index(order):
    """Return the index of the order in its full permutation.
    
    Args:
        order (tensor): e.g. [0,1,2]
    """
    classes = list(itertools.permutations(list(range(len(order)))))
    return classes.index(tuple(order.tolist()))

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

vcopn = VCOPN_attention(base_network=base, feature_size=512, tuple_len=3, encoder = encoder).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(vcopn.parameters(), lr=0.000005, momentum=9e-1, weight_decay=5e-4)
torch.set_grad_enabled(True)
# model.train()

running_loss = 0.0
correct = 0
batch_size = 2

from tqdm import tqdm
for i in range(1000):
  tuple_frame, tuple_order = batch_gen(batch_size)
  tuple_frame = tuple_frame.type(torch.FloatTensor)
  tuple_order = tuple_order.type(torch.FloatTensor)

  tuple_frame = tuple_frame.to(device)
  tuple_order = tuple_order.to(device)

  outputs = vcopn(tuple_frame.float()).to(device)
  targets = [order_class_index(order) for order in tuple_order]
  targets = torch.tensor(targets).to(device)

  # zero the parameter gradients
  optimizer.zero_grad()
  # forward and backward

  loss = criterion(outputs, targets)
  loss.backward()
  optimizer.step()
  
  running_loss += loss.item()
  pts = torch.argmax(outputs, dim=1)
  correct += torch.sum(targets == pts).item()

  if i % 10 == 0:
    avg_loss = running_loss / 10
    avg_acc = correct / (10 * batch_size)
    print('[TRAIN] epoch-{}, batch-{}, loss: {:.3f}, acc: {:.3f}'.format(1, i, avg_loss, avg_acc))
    # step = (epoch-1)*len(train_dataloader) + i
    # writer.add_scalar('train/CrossEntropyLoss', avg_loss, step)
    # writer.add_scalar('train/Accuracy', avg_acc, step)
    running_loss = 0.0
    correct = 0

targets = [order_class_index(order) for order in tuple_order]
targets = torch.tensor(targets)

# print(result)
print(targets)
print(result.size())

[TRAIN] epoch-1, batch-0, loss: 13.818, acc: 0.050
[TRAIN] epoch-1, batch-10, loss: 360.044, acc: 0.250
[TRAIN] epoch-1, batch-20, loss: 913.300, acc: 0.300
[TRAIN] epoch-1, batch-30, loss: 519.089, acc: 0.300
[TRAIN] epoch-1, batch-40, loss: 599.265, acc: 0.300
[TRAIN] epoch-1, batch-50, loss: 657.280, acc: 0.150
[TRAIN] epoch-1, batch-60, loss: 382.900, acc: 0.550
[TRAIN] epoch-1, batch-70, loss: 408.975, acc: 0.200
[TRAIN] epoch-1, batch-80, loss: 164.631, acc: 0.600
[TRAIN] epoch-1, batch-90, loss: 183.525, acc: 0.500
[TRAIN] epoch-1, batch-100, loss: 160.325, acc: 0.400
[TRAIN] epoch-1, batch-110, loss: 205.270, acc: 0.200
[TRAIN] epoch-1, batch-120, loss: 135.446, acc: 0.100
[TRAIN] epoch-1, batch-130, loss: 79.033, acc: 0.400
[TRAIN] epoch-1, batch-140, loss: 72.673, acc: 0.450
[TRAIN] epoch-1, batch-150, loss: 56.958, acc: 0.200
[TRAIN] epoch-1, batch-160, loss: 44.127, acc: 0.300
[TRAIN] epoch-1, batch-170, loss: 27.521, acc: 0.300
[TRAIN] epoch-1, batch-180, loss: 29.357, acc

KeyboardInterrupt: ignored

In [None]:
vcopn = VCOPN(base_network=base, feature_size=512, tuple_len=3).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(vcopn.parameters(), lr=0.0005, momentum=9e-1, weight_decay=5e-4)
torch.set_grad_enabled(True)
# model.train()

running_loss = 0.0
correct = 0
batch_size = 2

from tqdm import tqdm
for i in range(1000):
  tuple_frame, tuple_order = batch_gen(batch_size)
  tuple_frame = tuple_frame.type(torch.FloatTensor)
  tuple_order = tuple_order.type(torch.FloatTensor)

  tuple_frame = tuple_frame.to(device)
  tuple_order = tuple_order.to(device)

  outputs = vcopn(tuple_frame.float()).to(device)
  targets = [order_class_index(order) for order in tuple_order]
  targets = torch.tensor(targets).to(device)

  # zero the parameter gradients
  optimizer.zero_grad()
  # forward and backward

  loss = criterion(outputs, targets)
  loss.backward()
  optimizer.step()
  
  running_loss += loss.item()
  pts = torch.argmax(outputs, dim=1)
  correct += torch.sum(targets == pts).item()

  if i % 10 == 0:
    avg_loss = running_loss / 10
    avg_acc = correct / (10 * batch_size)
    print('[TRAIN] epoch-{}, batch-{}, loss: {:.3f}, acc: {:.3f}'.format(1, i, avg_loss, avg_acc))
    # step = (epoch-1)*len(train_dataloader) + i
    # writer.add_scalar('train/CrossEntropyLoss', avg_loss, step)
    # writer.add_scalar('train/Accuracy', avg_acc, step)
    running_loss = 0.0
    correct = 0

[TRAIN] epoch-1, batch-0, loss: 0.178, acc: 0.000
[TRAIN] epoch-1, batch-10, loss: 1.789, acc: 0.150
[TRAIN] epoch-1, batch-20, loss: 1.786, acc: 0.300
[TRAIN] epoch-1, batch-30, loss: 1.791, acc: 0.200
[TRAIN] epoch-1, batch-40, loss: 1.796, acc: 0.100
[TRAIN] epoch-1, batch-50, loss: 1.780, acc: 0.300
[TRAIN] epoch-1, batch-60, loss: 1.798, acc: 0.000
[TRAIN] epoch-1, batch-70, loss: 1.794, acc: 0.150
[TRAIN] epoch-1, batch-80, loss: 1.798, acc: 0.150
[TRAIN] epoch-1, batch-90, loss: 1.792, acc: 0.200
[TRAIN] epoch-1, batch-100, loss: 1.798, acc: 0.150
[TRAIN] epoch-1, batch-110, loss: 1.795, acc: 0.050
[TRAIN] epoch-1, batch-120, loss: 1.788, acc: 0.150
[TRAIN] epoch-1, batch-130, loss: 1.789, acc: 0.200
[TRAIN] epoch-1, batch-140, loss: 1.792, acc: 0.200
[TRAIN] epoch-1, batch-150, loss: 1.785, acc: 0.300
[TRAIN] epoch-1, batch-160, loss: 1.792, acc: 0.150
[TRAIN] epoch-1, batch-170, loss: 1.793, acc: 0.250
[TRAIN] epoch-1, batch-180, loss: 1.800, acc: 0.050
[TRAIN] epoch-1, batch-

KeyboardInterrupt: ignored