In [None]:
#Notebook to generate new training data

In [1]:
from lipnet import ConvGRU

import os
from functions import *
import pandas as pd
from data.dataset import LipReadSet
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torch
import sys
import json

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [4]:
with open(os.path.join("configs", f"lipnet_unseen_mark2.json"), "r") as f:
        config = json.load(f)

In [5]:
config

{'alias': 'lipnet_unseen_mark2',
 'epochs': 15,
 'batch_size': 64,
 'num_workers': 4,
 'learning_rate': 0.0001,
 'video_path': '../grid_vidimgs',
 'anno_path': '../grid_anno',
 'train_list': 'data/unseen/train_dirs.txt',
 'validation_list': 'data/unseen/val_dirs.txt',
 'test_list': 'data/unseen/test_dirs.txt',
 'vid_padding': 75,
 'txt_padding': 32}

In [6]:


model = ConvGRU() # Needs to be hard-coded
model.to(device)
model_save_dir = os.path.join("models")
model_save_path = os.path.join(model_save_dir, config['alias'])

model
    

ConvGRU(
  (conv1): Conv3d(3, 32, kernel_size=(3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2))
  (pool1): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv3d(32, 64, kernel_size=(3, 5, 5), stride=(1, 1, 1), padding=(1, 2, 2))
  (pool2): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv3d(64, 96, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
  (pool3): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
  (gru1): GRU(3072, 256, bidirectional=True)
  (gru2): GRU(512, 256, bidirectional=True)
  (FC): Linear(in_features=512, out_features=28, bias=True)
  (relu): ReLU(inplace=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (dropout3d): Dropout3d(p=0.5, inplace=False)
)

In [7]:
model.conv1

Conv3d(3, 32, kernel_size=(3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2))

In [8]:

loaded_checkpoint = torch.load(os.path.join(model_save_path, f"{config['alias']}.pt"))
model.load_state_dict(loaded_checkpoint['model_state_dict'])

<All keys matched successfully>

In [9]:
def encode_training_example(X, conv_gru_model):
    with torch.no_grad():
        X = conv_gru_model.conv1(X)
        X = conv_gru_model.pool1(X)
        X = conv_gru_model.conv2(X)
        X = conv_gru_model.pool2(X)
        X = conv_gru_model.conv3(X)
        x = conv_gru_model.pool3(X)
        # (B, C, T, H, W)->(T, B, C, H, W)
        x = x.permute(2, 0, 1, 3, 4).contiguous()
        # (B, C, T, H, W)->(T, B, C*H*W)
        x = x.view(x.size(0), x.size(1), -1)
        x = x.permute(1, 0, 2).contiguous() # (B, T, ...)
        return x


In [10]:
train_dataset = LipReadSet(config['video_path'],
                config['anno_path'],
                config['train_list'],
                config['vid_padding'],
                config['txt_padding'])

validation_dataset = LipReadSet(config['video_path'],
                config['anno_path'],
                config['validation_list'],
                config['vid_padding'],
                config['txt_padding'])

test_dataset = LipReadSet(config['video_path'],
                config['anno_path'],
                config['test_list'],
                config['vid_padding'],
                config['txt_padding'], 'test')
     
train_loader = DataLoader(train_dataset, 
                batch_size = config['batch_size'], 
                num_workers = config['num_workers'],
                shuffle = True)

validation_loader = DataLoader(validation_dataset, 
                batch_size = config['batch_size'],
                num_workers = config['num_workers'], 
                shuffle = True)

test_loader = DataLoader(test_dataset, 
                batch_size = config['batch_size'],
                num_workers = config['num_workers'], 
                shuffle = True)


In [13]:
i = 0
batch_counter = 0
labels = []
for X in train_loader:
    print(f"\rbatch {batch_counter} of 360", end = '')
    #print(X.keys())
    #print(X['txt'])
    Y = X['txt']
    X = X['vid'].to(device)
    
    #print(X['vid'])
    #break
    X = encode_training_example(X, model)
    
    for x,y in zip(X,Y):
        x = x.to('cpu')
        y = y.to('cpu')
        x_np = x.numpy()
        x_df = pd.DataFrame(x_np)
        y_np = y.numpy()
        y_df = pd.DataFrame(y_np)
        y_df = y_df.transpose()
        y_df.insert(0, "data", [f"train{i}.pkl"], True)
        labels.append(y_df)
        
        x_df.to_pickle(f'../encoding_data/train/imgs/train{i}.pkl')
        i += 1
    
    batch_counter += 1    

pd.concat(labels).to_csv('../encoding_data/train/labels.csv', index=False)

batch 360 of 360

In [14]:
i = 0
batch_counter = 0
labels = []
for X in test_loader:
    print(f"\rtest: batch {batch_counter} of 63", end = '')
    #print(X.keys())
    #print(X['txt'])
    Y = X['txt']
    X = X['vid'].to(device)
    
    #print(X['vid'])
    #break
    X = encode_training_example(X, model)
    
    for x,y in zip(X,Y):
        x = x.to('cpu')
        y = y.to('cpu')
        x_np = x.numpy()
        x_df = pd.DataFrame(x_np)
        y_np = y.numpy()
        y_df = pd.DataFrame(y_np)
        y_df = y_df.transpose()
        y_df.insert(0, "data", [f"test{i}.pkl"], True)
        labels.append(y_df)
        
        x_df.to_pickle(f'../encoding_data/test/imgs/test{i}.pkl')
        i += 1
    
    batch_counter += 1    

pd.concat(labels).to_csv('../encoding_data/test/labels.csv', index=False)
print("\n")
i = 0
batch_counter = 0
labels = []
for X in validation_loader:
    print(f"\rval batch {batch_counter} of 91", end = '')
    #print(X.keys())
    #print(X['txt'])
    Y = X['txt']
    X = X['vid'].to(device)
    
    #print(X['vid'])
    #break
    X = encode_training_example(X, model)
    
    for x,y in zip(X,Y):
        x = x.to('cpu')
        y = y.to('cpu')
        x_np = x.numpy()
        x_df = pd.DataFrame(x_np)
        y_np = y.numpy()
        y_df = pd.DataFrame(y_np)
        y_df = y_df.transpose()
        y_df.insert(0, "data", [f"val{i}.pkl"], True)
        labels.append(y_df)
        
        x_df.to_pickle(f'../encoding_data/val/imgs/val{i}.pkl')
        i += 1
    
    batch_counter += 1    

pd.concat(labels).to_csv('../encoding_data/val/labels.csv', index=False)


test: batch 62 of 63

val batch 90 of 91

In [15]:
labels = pd.read_csv("../encoding_data/val/labels.csv")
labels.iloc[0]


Unnamed: 0,data,0,1,2,3,4,5,6,7,8,...,22,23,24,25,26,27,28,29,30,31
0,val0.pkl,20,6,21,1,3,13,22,6,1,...,6,2,20,6,0,0,0,0,0,0
1,val1.pkl,13,2,26,1,8,19,6,6,15,...,24,0,0,0,0,0,0,0,0,0
2,val2.pkl,13,2,26,1,8,19,6,6,15,...,0,0,0,0,0,0,0,0,0,0
3,val3.pkl,17,13,2,4,6,1,8,19,6,...,6,15,1,17,13,6,2,20,6,0
4,val4.pkl,20,6,21,1,24,9,10,21,6,...,13,6,2,20,6,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5795,val5795.pkl,3,10,15,1,8,19,6,6,15,...,6,2,20,6,0,0,0,0,0,0
5796,val5796.pkl,17,13,2,4,6,1,19,6,5,...,16,16,15,0,0,0,0,0,0,0
5797,val5797.pkl,20,6,21,1,3,13,22,6,1,...,24,0,0,0,0,0,0,0,0,0
5798,val5798.pkl,20,6,21,1,19,6,5,1,2,...,0,0,0,0,0,0,0,0,0,0


In [39]:
fname = labels.iloc[0][0]
labels["data"] = 28

a = np.append(np.trim_zeros(labels.iloc[0].to_numpy()), 29)
a.resize(34)
torch.from_numpy(a)

tensor([28, 20,  6, 21,  1,  3, 13, 22,  6,  1,  3, 26,  1,  6,  1,  6, 10,  8,
         9, 21,  1, 17, 13,  6,  2, 20,  6, 29,  0,  0,  0,  0,  0,  0])

In [45]:
image = pd.read_pickle(f"../encoding_data/train/imgs/train1.pkl")
torch.from_numpy(image.to_numpy())

tensor([[-15.9131, -17.1571, -17.2796,  ...,  29.1826,  30.1083,  25.6693],
        [ -8.3047,  -8.2258,  -8.5944,  ...,  26.6062,  28.1224,  23.1503],
        [  2.0509,   2.3354,   1.5428,  ...,  11.7610,  12.5050,   8.6639],
        ...,
        [-14.0184, -14.6593, -13.4382,  ...,  12.1173,  15.1479,  12.1776],
        [-13.0723, -13.7957, -12.6291,  ...,  14.6340,  17.7270,  17.2327],
        [ 20.0195,  21.3041,  19.3359,  ...,   1.7639,   3.2301,   2.2119]])