In [1]:
'''Go through movie->features pipeline manually'''
# First load the stimulus
from featurex.tests.utils import get_test_data_path
from os.path import join
from featurex.stimuli.video import VideoStim

filename = join(get_test_data_path(), 'video', 'obama_speech.mp4')
video = VideoStim(filename)

In [2]:
# Subsample the video and extract the audio
from featurex.converters.video import VideoToAudioConverter, FrameSamplingConverter
conv = VideoToAudioConverter()
audio = conv.convert(video)
conv = FrameSamplingConverter(every=15)
derived = conv.convert(video)

[MoviePy] Writing audio in featurex/tests/data/video/obama_speech.wav


100%|██████████| 193/193 [00:00<00:00, 2438.59it/s]

[MoviePy] Done.





In [3]:
# Extract text from the audio
from featurex.converters.api import IBMSpeechAPIConverter
conv = IBMSpeechAPIConverter()
audio_text = conv.convert(audio)

In [4]:
# Extract text from the video
from featurex.converters.api import TesseractAPIConverter
conv = TesseractAPIConverter()
visual_texts = []
for frame in derived:
    visual_texts.append(conv.convert(frame))

In [5]:
# Extract a low-level image feature from each frame
from featurex.extractors import ExtractorResult
from featurex.extractors.image import VibranceExtractor
ext = VibranceExtractor()
visual_features = []
for frame in derived:
    visual_features.append(ext.extract(frame))

In [6]:
# Extract word length from both audio and visual text
from featurex.extractors.text import LengthExtractor
ext = LengthExtractor()
visual_length = [ext.extract(t) for t in visual_texts]
audio_length = [ext.extract(t) for t in audio_text]

In [7]:
# Merge and display results
print ExtractorResult.merge_stims(visual_features)
print ExtractorResult.merge_stims(visual_length)
print ExtractorResult.merge_stims(audio_length)

        onset  duration    vibrance
  stim                             
0 0       NaN       NaN  418.851528
  15      NaN       NaN  441.618626
  30      NaN       NaN  506.321013
  45      NaN       NaN  575.903070
  60      NaN       NaN  586.500651
  75      NaN       NaN  590.007483
  90      NaN       NaN  589.524823
                                                      onset  duration  \
  stim                                                                  
0                                                       NaN       NaN   
                                                        NaN       NaN   
                                                        NaN       NaN   
  mslnsu-r onu‘ﬁ. SAVEIENV nu IRAN                      NaN       NaN   
  1'"\nPIESIIIE Y' I' I ‘ s\nt 1'. I)“; 555'} HEM...    NaN       NaN   
                                                        NaN       NaN   
  , 7 V\nPnEslnEMJ'W min: an Inn:\nA ‘                  NaN       NaN   

                  

In [8]:
# Graph way of doing the same pipeline
from featurex.graph import Graph, Node
visual_g = [(FrameSamplingConverter(every=15), 'framesampling', 
             [(TesseractAPIConverter(), 'visual_text', 
                [(LengthExtractor(), 'text_length')]), 
              (VibranceExtractor(), 'visual_vibrance')])]
audio_g = [(VideoToAudioConverter(), 'audio', 
            [(IBMSpeechAPIConverter(), 'audio_text', 
              [(LengthExtractor(), 'text_length')])])]

visual_graph = Graph(visual_g)
audio_graph = Graph(audio_g)

In [9]:
# Needs fixing
visual_graph.extract(video)

TypeError: Transformers of type TesseractAPIConverter can only be applied to stimuli of type(s) ImageStim, not type DerivedVideoStim.