In [None]:
import glob
import torch

In [None]:
glob.glob("output_data/*/pyavi/tracks/*.avi")

In [None]:
print(list(range(1, 10, 3)))

In [None]:
class VideoIterableDataset(torch.utils.data.IterableDataset):
    
    def __init__(self, path):
        super(VideoIterableDataset).__init__()
        self.utts = []
        self.mp4s = []
        with open(path, 'r') as f:
            for i, line in enumerate(f):
                utt, _, _, _, _, mp4, _ = line.strip().split(None, 6)
                print(utt)
                print(mp4)
                self.utts.append(utt)
                self.mp4s.append(mp4)
    
    def __iter__(self):
        worker_info = torch.utils.data.get_worker_info()
        print(f"worker info: {worker_info}")
        if worker_info is None:
            offset = 0
            shift = 1
        else:
            offset = worker_info.id
            shift = worker_info.num_workers
        for i in range(offset, len(self.utts), shift):
            yield self.utts[i], self.load_frames(self.mp4s[i])
    
    def load_frames(self, videofile):
        cap = cv2.VideoCapture(videofile)
        frame_num = 1;
        frames = []
        while frame_num:
            frame_num += 1
            ret, image = cap.read()
            if ret == 0:
                break
            frames.append(cv2.resize(image, (224, 224)))
        frames = [frames[0], frames[0]] + frames + [frames[-1], frames[-1]]
        start = time.time()
        frames = np.stack(frames, axis=3)
        frames = np.transpose(frames, (2,3,0,1))
        frames = np.array([frames[:,i:i+5,:,:] for i in range(0, frames.shape[1] - 4)], dtype='float32')
        end = time.time()
        timetaken = end-start
        print(f"Time taken to reshape: {timetaken}")
        print(f"frame shape: {frames[0].shape}")
        return frames

In [None]:
dataset = VideoIterableDataset('%s/wav.scp' % directory)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=None, shuffle=False, num_workers=8)

In [None]:
glob.glob('/afs/inf.ed.ac.uk/group/cstr/datawww/asru/MGB1/scripts/*')

In [None]:
f = open( '/group/cstr/datawww/asru/MGB1/data/scoring/task2_eval.ref.ctm', 'r')
lines = f.read()
print(lines)
f.close()


In [None]:
!cp /afs/inf.ed.ac.uk/group/cstr/datawww/asru/MGB1/scripts/eval.task1~ .

In [None]:
filename = "20080505_180000_bbcfour_the_book_quiz"
evalfiles = glob.glob(f'/afs/inf.ed.ac.uk/group/project/nst/bbcdata/**/*{filename}*.ts', recursive=True)

In [None]:
evalfiles

In [None]:
glob.glob(" /afs/inf.ed.ac.uk/group/project/summa/MGB1/*")

# Dataloader stuff

In [1]:
import torch 
import numpy as np
import cv2
import time, glob, shutil, datetime
from matplotlib import pyplot as plt
import csv, json
import xml.etree.ElementTree as ET 
import pandas as pd
from collections import defaultdict
from syncnet_python.facetrack import *
from syncnet_python.syncnet import *
from itertools import cycle

In [2]:
class VideoIterableDataset(torch.utils.data.IterableDataset):
    
    def __init__(self, data_dir):
        super(VideoIterableDataset).__init__()
        self.utts = []
        self.avis = []
        for utt in glob.glob(data_dir+'*'):
            self.utts.append(utt)
            avi = utt+'/pyavi/tracks/video.avi'
            self.avis.append(avi)
    
    def __iter__(self):
        worker_info = torch.utils.data.get_worker_info()
        if worker_info is None:
            offset = 0
            shift = 1
        else:
            offset = worker_info.id
            shift = worker_info.num_workers
        for i in range(offset, len(self.utts), shift):
            yield self.utts[i], self.load_frames(self.avis[i])
    
    def load_frames(self, videofile):
        cap = cv2.VideoCapture(videofile)
        frame_num = 1;
        frames = []
        while frame_num:
            frame_num += 1
            ret, image = cap.read()
            if ret == 0:
                break            
            frames.append(image)
        return frames
    
def prepare_output_directory(location):
    ready = True
    does_not_require_processing = False
    incomplete_directory_exists = os.path.isdir(location) and not os.path.exists(f"{location}/utterance_info.csv")
    if(incomplete_directory_exists):
        shutil.rmtree(location)
    elif(os.path.isdir(location)):
        return does_not_require_processing  #  This utterance has been processed already. Continuing to next utterance..
    else:
        pass
    subprocess.run("mkdir -p " + location + "/pyavi/tracks/", stdout=subprocess.DEVNULL, shell=True)    
    return ready

def create_transcript_from_XML(location, item):
    # TODO: the transcript should only contain the words spoken in the final cropped video. 
    utterance = ""
    for child in item:
        utterance+=child.text + " "
    data = item.attrib
    data.update({"utterance": utterance})
    with open(location + '/transcript.txt', 'w') as outfile:
        outfile.write(str(data))

def cut_into_utterances(filename, output_dir, maxWMER=1000):
    
    xmldir = "/afs/inf.ed.ac.uk/group/cstr/datawww/asru/MGB1/data/xml/"
    xmlfile = xmldir + filename + ".xml"
    tree = ET.parse(xmlfile)
    root = tree.getroot()
    utterance_items = []
    paths = glob.glob(f"/afs/inf.ed.ac.uk/group/project/nst/bbcdata/ptn*/**/{filename}*.ts") + glob.glob(f"/afs/inf.ed.ac.uk/group/project/nst/bbcdata/raw/{filename}*.ts")
    inputVideo = paths[0]
    command_elems = ["ffmpeg -loglevel quiet -y -i " + inputVideo]
    for item in root.findall('./body/segments/segment'):
        if (item.attrib['id'].split('_')[-1]=='align' and float(item.attrib['WMER'])<=maxWMER):
            if (float(item.attrib['endtime']) - float(item.attrib['starttime'])<2):
                continue                        
            location = output_dir + item.attrib['id']
            reference = "tracks"
            status = prepare_output_directory(location)
            if status:
                utterance_items.append(item)
                data = item.attrib
                start = datetime.timedelta(seconds=float(data['starttime']))
                end = datetime.timedelta(seconds=float(data['endtime']))
                output = location + '/pyavi/tracks/video.avi'
                command_elems.append(" -ss " + str(start) + " -to " + str(end) + " -c copy " + output)
                create_transcript_from_XML(location, item)
    command = "".join(command_elems)
    s = time.time()
    result = subprocess.run(command, shell=True, stdout=None)
    if result.returncode != 0:
        print(f"ERROR: ffmpeg failed to trim video: {filename}")
        print(f"result: {result}")
    t = time.time() - s
    print(f"Took {t} seconds to trim {len(command_elems)-1} utterances")
    return utterance_items

def getGenre(filename):
    xmldir = "/afs/inf.ed.ac.uk/group/cstr/datawww/asru/MGB1/data/xml/"
    xmlfile = xmldir + filename + ".xml"
    tree = ET.parse(xmlfile)
    root = tree.getroot()
    head = root.find('./head/recording')
    genre = head.attrib["genre"]
    return genre

In [3]:
data_dir = '/disk/scratch/s1768177/pipeline/output_data/'
filelist = "/afs/inf.ed.ac.uk/group/cstr/datawww/asru/MGB1/scripts/train.short"
desired_genres = ["drama", "childrens", "news", "documentary"]

In [4]:
count = 1
with open(filelist, "r") as f:
    files = f.read().split()
files = files[:1]
print(f"\n{datetime.datetime.now()}. Cutting utterances from raw videos.")
total_utterances_processed = 0
for filename in files:
    start = time.time()
    genre = getGenre(filename)
    if (genre in desired_genres):
        print(f"{count}. {filename}. ({genre}) ")
        count += 1
        utterance_items = cut_into_utterances(filename, data_dir)
        total_utterances_processed += len(utterance_items)
print(f"\nFinished Cutting total {total_utterances_processed} utterances from {count-1} videos")


2021-07-27 14:22:49.429436. Cutting utterances from raw videos.
1. 20080505_000500_bbcone_weatherview. (news) 
Took 0.6894359588623047 seconds to trim 17 utterances

Finished Cutting total 17 utterances from 1 videos


In [5]:
dataset = VideoIterableDataset(data_dir)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=None, shuffle=False, num_workers=16)
facetrack = FaceTrack('cuda:1')
for i, (utt, frames) in enumerate(dataloader):
    print(i, utt, len(frames))
    facetrack.run(data_dir=utt, frames=frames)
    no_faces_found = len(os.listdir(utt + "/pycrop/tracks/")) == 0
    if(no_faces_found):
        shutil.rmtree(utt)

[S3FD] loading with cuda:1
[S3FD] finished loading (3.9826 sec)
0 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_9_align 224
1 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_8_align 643
2 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_12_align 90
3 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_4_align 281
4 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_6_align 117
5 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_15_align 522
6 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_13_align 251
7 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_20_align 83
8 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_2_align 124
9 /disk/scratch/s1768177/pipeline/output_da

In [6]:
class SyncNetIterableDataset(torch.utils.data.IterableDataset):    
    def __init__(self, path):
        super(SyncNetIterableDataset).__init__()
        self.utts = []
        self.avis = []
        for avi in glob.glob(data_dir+'*/pycrop/tracks/*.avi'):
            self.avis.append(avi)
            utt = avi.split('/pycrop/')[0]
            self.utts.append(utt)
    
    def __iter__(self):
        worker_info = torch.utils.data.get_worker_info()
        if worker_info is None:
            offset = 0
            shift = 1
        else:
            offset = worker_info.id
            shift = worker_info.num_workers
        for i in range(offset, len(self.utts), shift):
            yield self.utts[i], self.avis[i], self.load_frames(self.avis[i])
    
    def load_frames(self, videofile):
        cap = cv2.VideoCapture(videofile)
        frame_num = 1;
        frames = []
        while frame_num:
            frame_num += 1
            ret, image = cap.read()
            if ret == 0:
                break
            frames.append(cv2.resize(image, (224, 224)))
        cap.release()
        cv2.destroyAllWindows()
        frames = [frames[0], frames[0]] + frames + [frames[-1], frames[-1]]
        frames = np.stack(frames, axis=3)
        frames = np.transpose(frames, (2,3,0,1))
        frames = np.array([frames[:,i:i+5,:,:] for i in range(0, frames.shape[1] - 4)], dtype='float32')
        return frames

In [7]:
dataset = SyncNetIterableDataset(data_dir)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=None, shuffle=False, num_workers=16)
syncnet = SyncNet()
for i, (utt, avi, frames) in enumerate(dataloader):
    print(i, utt, avi, len(frames))
    syncnet.setup(utt)
    offset, conf, dist = syncnet.evaluate(avi,frames)
    print(offset, conf)

Model syncnet_python/data/syncnet_v2.model loaded.
0 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_8_align /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_8_align/pycrop/tracks/00001.avi 104
1 9.404757
1 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_8_align /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_8_align/pycrop/tracks/00000.avi 422
1 5.99399
2 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_4_align /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_4_align/pycrop/tracks/00000.avi 175
1 7.0156937
3 /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_15_align /disk/scratch/s1768177/pipeline/output_data/ID20080505_000500_bbcone_weatherview_utt_15_align/pycrop/tracks/00001.avi 117
1 7.0394135
4 /disk/scratch/s1768177/pipeline/outp