In [3]:
"""
Select video files from LedaSila database, 
extract n frames per video and save to disc

Sign language video files copyright Alpen Adria Universität Klagenfurt 
http://ledasila.aau.at

Code based on:
https://github.com/harvitronix/five-video-classification-methods
"""

'\nSelect video files from LedaSila database, \nextract n frames per video and save to disc\n\nSign language video files copyright Alpen Adria Universität Klagenfurt \nhttp://ledasila.aau.at\n\nCode based on:\nhttps://github.com/harvitronix/five-video-classification-methods\n'

In [93]:
import numpy as np
import pandas as pd

import os
import glob

from subprocess import call

import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
%matplotlib inline

In [117]:
# number of frames per video fixed upfront
nFrames_Video = 20

In [116]:
# Read all available videos in a dataframe

In [99]:
dfFiles = pd.DataFrame(glob.glob("../data/01-ledasila/**", recursive=True), dtype=str, columns=["sPath"])

In [100]:
dfFiles.head()

Unnamed: 0,sPath
0,../data/01-ledasila/
1,../data/01-ledasila/_tar
2,../data/01-ledasila/_tar/ÖGS Gebärden.tar
3,../data/01-ledasila/r
4,../data/01-ledasila/r/Ringen---k---vid-77479--...


In [101]:
# extract file names
dfFiles["sFile"] = dfFiles.sPath.apply(lambda s: s.split("/")[-1])
#dfFiles.head()

In [102]:
# drop files/folders without extension
dfFiles.loc[dfFiles.sFile.apply(lambda s: len(s.split("."))<=1),:] = None
dfFiles.dropna(inplace=True)
#dfFiles.head()

In [103]:
# only retain mp4 files
dfFiles.loc[dfFiles.sFile.apply(lambda s: s.split(".")[1] != "mp4"),:] = None
dfFiles.dropna(inplace=True)
#dfFiles.head()

In [104]:
# extract word
dfFiles["sWord"] = dfFiles.sFile.apply(lambda s: s.split("-")[0])
dfFiles.head()

Unnamed: 0,sPath,sFile,sWord
4,../data/01-ledasila/r/Ringen---k---vid-77479--...,Ringen---k---vid-77479---lsid-46634.mp4,Ringen
5,../data/01-ledasila/r/Rote__R_uebe---ogs---vid...,Rote__R_uebe---ogs---vid-27931---lsid-29402.mp4,Rote__R_uebe
6,../data/01-ledasila/r/reden---k---vid-76401---...,reden---k---vid-76401---lsid-46120.mp4,reden
7,../data/01-ledasila/r/rauchen---ogs---vid-2697...,rauchen---ogs---vid-26975---lsid-28269.mp4,rauchen
8,../data/01-ledasila/r/riechen---s---vid-76597-...,riechen---s---vid-76597---lsid-46215.mp4,riechen


In [105]:
print("%d videos, with %d unique words" % (dfFiles.shape[0], dfFiles.sWord.unique().shape[0]))

33308 videos, with 15684 unique words


In [106]:
# select videos with min n occurences
dfWord_freq = dfFiles.groupby("sWord").size().sort_values(ascending=False).reset_index(name="nCount")

nMinOccur = 18
dfWord_top = dfWord_freq.loc[dfWord_freq.nCount >= nMinOccur, :]
dfWord_top

Unnamed: 0,sWord,nCount
0,nein,34
1,schw_aermen,27
2,was,25
3,wann,24
4,nicht,22
5,fertig,22
6,ja,21
7,Banane,20
8,Motorrad,20
9,Fenster,20


In [107]:
dfVideos = pd.merge(dfFiles, dfWord_top, how="right", on="sWord")

In [108]:
print("%d videos, %d unique words, min %d occurences" % 
      (dfVideos.shape[0], dfVideos.sWord.unique().shape[0], dfWord_top.nCount.min()))

440 videos, 21 unique words, min 18 occurences


In [109]:
dfVideos.sample(5)

Unnamed: 0,sPath,sFile,sWord,nCount
272,../data/01-ledasila/m/Motorrad---s---vid-34801...,Motorrad---s---vid-34801---lsid-17690.mp4,Motorrad,20
23,../data/01-ledasila/n/nicht---t-ogs---vid-1512...,nicht---t-ogs---vid-1512---lsid-4755.mp4,nicht,22
429,../data/01-ledasila/w/was---v---vid-12877---ls...,was---v---vid-12877---lsid-17075.mp4,was,25
282,../data/01-ledasila/j/ja---ooe---vid-10589---l...,ja---ooe---vid-10589---lsid-14652.mp4,ja,21
439,../data/01-ledasila/w/was---w---vid-12875---ls...,was---w---vid-12875---lsid-17074.mp4,was,25


In [115]:
# test ffmpeg on one video

#seVideo = dfVideos.loc[0,:]
#print(os.getcwd())
#print(seVideo.sPath)
#call(["ffmpeg", "-i", seVideo.sPath, "-frames", "30", "../data/10-frames/Nachmittag---Oô-UK---vid-41601---lsid-34163/frame-%04d.jpg"])

/Users/Frederik/Dev/sign-language/02-leadsila
../data/01-ledasila/n/Nachmittag---Oô-UK---vid-41601---lsid-34163.mp4
0


In [127]:
# extract frames from each video
for pos, seVideo in dfVideos.iterrows():
    
    # for each video create separate directory in 10-frames
    sDir = os.path.join("../data/10-frames", seVideo.sFile.split(".")[0])
    if not os.path.exists(sDir):
        os.mkdir(sDir)
    dfVideos.loc[pos, "sPathFrames"] = sDir
    
    # call ffmpeg to extract frames from videos
    sFrames = os.path.join(sDir, "frame-%04d.jpg")
    #print(sFrames)
    call(["ffmpeg", "-i", seVideo.sPath, "-frames", str(nFrames_Video), sFrames])

In [None]:
# Extract InceptionV3 features from all frames and save to file

In [126]:
# get the InceptionV3 model
from cnninception import InceptionV3_features

cnn = InceptionV3_features()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [139]:
# feed all frames into cnn, save results in file per video
for pos, seVideo in dfVideos.iterrows():
    
    print("Extracting features from", seVideo.sFile)
    # retrieve frame files
    sFrames = glob.glob(os.path.join(seVideo.sPathFrames, "*.jpg"))
    
    # run cnn on each frame, collect the resulting features
    liFeatures = []
    for sFrame in sFrames:
        features = cnn.extract(sFrame)
        liFeatures.append(features)
    
    # save resulting list of features in file in "data/20-features"
    sFeatures = os.path.join("../data/20-features", seVideo.sFile.split(".")[0] + \
                             "-" + str(nFrames_Video) + "features")
    np.save(sFeatures, liFeatures)

Extracting features from Nachmittag---Oô-UK---vid-41601---lsid-34163.mp4
Extracting features from Nachmittag---k---vid-14232---lsid-17843.mp4
Extracting features from Nachmittag---st---vid-14227---lsid-17841.mp4
Extracting features from Nachmittag---w---vid-19534---lsid-20522.mp4
Extracting features from Nachmittag---ooe---vid-14729---lsid-18092.mp4
Extracting features from Nachmittag---v---vid-14226---lsid-17839.mp4
Extracting features from Nachmittag---k---vid-66331---lsid-42446.mp4
Extracting features from Nachmittag---w---vid-42507---lsid-31082.mp4
Extracting features from Nachmittag---w---vid-14741---lsid-18098.mp4
Extracting features from Nachmittag---ogs---vid-28973---lsid-29926.mp4
Extracting features from Nachmittag---k---vid-14233---lsid-17844.mp4
Extracting features from Nachmittag---w---vid-14223---lsid-17840.mp4
Extracting features from Nachmittag---b---vid-19530---lsid-20520.mp4
Extracting features from Nachmittag---s-k---vid-14235---lsid-17845.mp4
Extracting features fro