In [None]:
#Notebook to generate new training data

In [1]:
from lipnet import ConvGRU

import os
from functions import *
import pandas as pd
from data.dataset import LipReadSet
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch
import sys
import json

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [62]:
with open(os.path.join("configs", f"lipnet_unseen_mark1.json"), "r") as f:
        config = json.load(f)

In [63]:
config

{'alias': 'lipnet_unseen_mark1',
 'epochs': 15,
 'batch_size': 64,
 'num_workers': 4,
 'learning_rate': 0.0001,
 'video_path': '../grid_vidimgs',
 'anno_path': '../grid_anno',
 'train_list': 'data/unseen/train_dirs.txt',
 'validation_list': 'data/unseen/val_dirs.txt',
 'test_list': 'data/unseen/test_dirs.txt',
 'vid_padding': 75,
 'txt_padding': 32}

In [64]:


model = ConvGRU() # Needs to be hard-coded
model.to(device)
model_save_dir = os.path.join("models")
model_save_path = os.path.join(model_save_dir, config['alias'])

model
    

ConvGRU(
  (conv1): Conv3d(3, 32, kernel_size=(3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2))
  (pool1): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv3d(32, 64, kernel_size=(3, 5, 5), stride=(1, 1, 1), padding=(1, 2, 2))
  (pool2): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv3d(64, 96, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool3): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (gru1): GRU(3072, 256, bidirectional=True)
  (gru2): GRU(512, 256, bidirectional=True)
  (FC): Linear(in_features=512, out_features=28, bias=True)
  (relu): ReLU(inplace=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (dropout3d): Dropout3d(p=0.5, inplace=False)
)

In [65]:
model.conv1

Conv3d(3, 32, kernel_size=(3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2))

In [66]:

loaded_checkpoint = torch.load(os.path.join(model_save_path, f"{config['alias']}.pt"))
model.load_state_dict(loaded_checkpoint['model_state_dict'])

<All keys matched successfully>

In [67]:
def encode_training_example(X, conv_gru_model):
    with torch.no_grad():
        X = conv_gru_model.conv1(X)
        X = conv_gru_model.pool1(X)
        X = conv_gru_model.conv2(X)
        X = conv_gru_model.pool2(X)
        X = conv_gru_model.conv3(X)
        x = conv_gru_model.pool3(X)
        # (B, C, T, H, W)->(T, B, C, H, W)
        x = x.permute(2, 0, 1, 3, 4).contiguous()
        # (B, C, T, H, W)->(T, B, C*H*W)
        x = x.view(x.size(0), x.size(1), -1)
        x = x.permute(1, 0, 2).contiguous() # (B, T, ...)
        return x


In [68]:
train_dataset = LipReadSet(config['video_path'],
                config['anno_path'],
                config['train_list'],
                config['vid_padding'],
                config['txt_padding'])

validation_dataset = LipReadSet(config['video_path'],
                config['anno_path'],
                config['validation_list'],
                config['vid_padding'],
                config['txt_padding'])

test_dataset = LipReadSet(config['video_path'],
                config['anno_path'],
                config['test_list'],
                config['vid_padding'],
                config['txt_padding'], 'test')
     
train_loader = DataLoader(train_dataset, 
                batch_size = config['batch_size'], 
                num_workers = config['num_workers'],
                shuffle = True)

validation_loader = DataLoader(validation_dataset, 
                batch_size = config['batch_size'],
                num_workers = config['num_workers'], 
                shuffle = True)

test_loader = DataLoader(test_dataset, 
                batch_size = config['batch_size'],
                num_workers = config['num_workers'], 
                shuffle = True)


In [70]:
i = 0
batch_counter = 0
labels = []
for X in train_loader:
    print(f"\rbatch {batch_counter} of 360", end = '')
    #print(X.keys())
    #print(X['txt'])
    Y = X['txt']
    X = X['vid'].to(device)
    
    #print(X['vid'])
    #break
    X = encode_training_example(X, model)
    
    for x,y in zip(X,Y):
        x = x.to('cpu')
        y = y.to('cpu')
        x_np = x.numpy()
        x_df = pd.DataFrame(x_np)
        y_np = y.numpy()
        y_df = pd.DataFrame(y_np)
        y_df = y_df.transpose()
        y_df.insert(0, "data", [f"train{i}.csv"], True)
        labels.append(y_df)
        
        x_df.to_csv(f'../encoding_data/train/imgs/train{i}.csv', index=False)
        i += 1
    
    batch_counter += 1    

pd.concat(labels).to_csv('../encoding_data/train/labels.csv', index=False)

batch 360 of 364

In [71]:
i = 0
batch_counter = 0
labels = []
for X in test_loader:
    print(f"\rtest: batch {batch_counter} of 63", end = '')
    #print(X.keys())
    #print(X['txt'])
    Y = X['txt']
    X = X['vid'].to(device)
    
    #print(X['vid'])
    #break
    X = encode_training_example(X, model)
    
    for x,y in zip(X,Y):
        x = x.to('cpu')
        y = y.to('cpu')
        x_np = x.numpy()
        x_df = pd.DataFrame(x_np)
        y_np = y.numpy()
        y_df = pd.DataFrame(y_np)
        y_df = y_df.transpose()
        y_df.insert(0, "data", [f"test{i}.csv"], True)
        labels.append(y_df)
        
        x_df.to_csv(f'../encoding_data/test/imgs/test{i}.csv', index=False)
        i += 1
    
    batch_counter += 1    

pd.concat(labels).to_csv('../encoding_data/test/labels.csv', index=False)
print("\n")
i = 0
batch_counter = 0
labels = []
for X in validation_loader:
    print(f"\rval batch {batch_counter} of 91", end = '')
    #print(X.keys())
    #print(X['txt'])
    Y = X['txt']
    X = X['vid'].to(device)
    
    #print(X['vid'])
    #break
    X = encode_training_example(X, model)
    
    for x,y in zip(X,Y):
        x = x.to('cpu')
        y = y.to('cpu')
        x_np = x.numpy()
        x_df = pd.DataFrame(x_np)
        y_np = y.numpy()
        y_df = pd.DataFrame(y_np)
        y_df = y_df.transpose()
        y_df.insert(0, "data", [f"val{i}.csv"], True)
        labels.append(y_df)
        
        x_df.to_csv(f'../encoding_data/val/imgs/val{i}.csv', index=False)
        i += 1
    
    batch_counter += 1    

pd.concat(labels).to_csv('../encoding_data/val/labels.csv', index=False)


test: batch 62 of 63

val batch 90 of 91

In [43]:
image = pd.read_csv("tmp.csv")
image.iloc[0]

0      -15532.1260
1      -30078.5550
2      -28204.3710
3      -26381.6170
4      -23910.1040
           ...    
3067    -6887.7710
3068    -6752.0850
3069    -6901.8423
3070    -7936.4863
3071    -5247.5845
Name: 0, Length: 3072, dtype: float64

In [41]:
image.iloc[0,1:]

1      -30078.5550
2      -28204.3710
3      -26381.6170
4      -23910.1040
5      -22451.6370
           ...    
3067    -6887.7710
3068    -6752.0850
3069    -6901.8423
3070    -7936.4863
3071    -5247.5845
Name: 0, Length: 3071, dtype: float64