In [1]:
from google.cloud import videointelligence

In [2]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/datalab/google-api.json"

In [3]:
video_client = videointelligence.VideoIntelligenceServiceClient()

In [4]:
from google.cloud.speech import enums
from google.cloud.speech import types

In [5]:
features = [videointelligence.enums.Feature.LABEL_DETECTION]

In [6]:
mode = videointelligence.enums.LabelDetectionMode.SHOT_AND_FRAME_MODE
config = videointelligence.types.LabelDetectionConfig(
    label_detection_mode=mode)
context = videointelligence.types.VideoContext(
    label_detection_config=config)

In [7]:
path="gs://kish-bucket/Hemanvi_video.mp4"
operation = video_client.annotate_video(
        path, features=features, video_context=context)

In [8]:
result = operation.result(timeout=90)

In [9]:
segment_labels = result.annotation_results[0].segment_label_annotations
for i, segment_label in enumerate(segment_labels):
    print('Video label description: {}'.format(
        segment_label.entity.description))
    for category_entity in segment_label.category_entities:
        print('\tLabel category description: {}'.format(
            category_entity.description))

    for i, segment in enumerate(segment_label.segments):
        start_time = (segment.segment.start_time_offset.seconds +
                      segment.segment.start_time_offset.nanos / 1e9)
        end_time = (segment.segment.end_time_offset.seconds +
                    segment.segment.end_time_offset.nanos / 1e9)
        positions = '{}s to {}s'.format(start_time, end_time)
        confidence = segment.confidence
        print('\tSegment {}: {}'.format(i, positions))
        print('\tConfidence: {}'.format(confidence))
    print('\n')

Video label description: leisure
	Segment 0: 0.0s to 15.6s
	Confidence: 0.470728754997


Video label description: play
	Segment 0: 0.0s to 15.6s
	Confidence: 0.909762144089


Video label description: individual sports
	Label category description: sports
	Segment 0: 0.0s to 15.6s
	Confidence: 0.402013421059


Video label description: play
	Label category description: person
	Segment 0: 0.0s to 15.6s
	Confidence: 0.938421726227


Video label description: playground
	Label category description: city
	Segment 0: 0.0s to 15.6s
	Confidence: 1.0


Video label description: playground slide
	Segment 0: 0.0s to 15.6s
	Confidence: 0.790848553181


Video label description: public space
	Label category description: city
	Segment 0: 0.0s to 15.6s
	Confidence: 0.596177875996


Video label description: outdoor play equipment
	Segment 0: 0.0s to 15.6s
	Confidence: 0.932455003262




In [10]:
shot_labels = result.annotation_results[0].shot_label_annotations
for i, shot_label in enumerate(shot_labels):
    print('Shot label description: {}'.format(
        shot_label.entity.description))
    for category_entity in shot_label.category_entities:
        print('\tLabel category description: {}'.format(
            category_entity.description))

    for i, shot in enumerate(shot_label.segments):
        start_time = (shot.segment.start_time_offset.seconds +
                      shot.segment.start_time_offset.nanos / 1e9)
        end_time = (shot.segment.end_time_offset.seconds +
                    shot.segment.end_time_offset.nanos / 1e9)
        positions = '{}s to {}s'.format(start_time, end_time)
        confidence = shot.confidence
        print('\tSegment {}: {}'.format(i, positions))
        print('\tConfidence: {}'.format(confidence))
    print('\n')


Shot label description: playground
	Label category description: city
	Segment 0: 0.0s to 13.0s
	Confidence: 1.0
	Segment 1: 13.033333s to 14.0s
	Confidence: 0.959590852261
	Segment 2: 14.033333s to 15.6s
	Confidence: 0.966488361359


Shot label description: play
	Label category description: person
	Segment 0: 0.0s to 13.0s
	Confidence: 0.922978341579
	Segment 1: 13.033333s to 14.0s
	Confidence: 0.937754929066
	Segment 2: 14.033333s to 15.6s
	Confidence: 0.933511078358


Shot label description: playground slide
	Segment 0: 0.0s to 13.0s
	Confidence: 0.784813344479
	Segment 1: 13.033333s to 14.0s
	Confidence: 0.475744605064
	Segment 2: 14.033333s to 15.6s
	Confidence: 0.530465841293


Shot label description: public space
	Label category description: city
	Segment 0: 0.0s to 13.0s
	Confidence: 0.620512962341
	Segment 1: 13.033333s to 14.0s
	Confidence: 0.491739958525
	Segment 2: 14.033333s to 15.6s
	Confidence: 0.491739958525


Shot label description: fun
	Segment 0: 13.033333s to 14.0s
	

In [11]:
frame_labels = result.annotation_results[0].frame_label_annotations
for i, frame_label in enumerate(frame_labels):
    print('Frame label description: {}'.format(
        frame_label.entity.description))
    for category_entity in frame_label.category_entities:
        print('\tLabel category description: {}'.format(
            category_entity.description))

    # Each frame_label_annotation has many frames,
    # here we print information only about the first frame.
    frame = frame_label.frames[0]
    time_offset = (frame.time_offset.seconds +
                   frame.time_offset.nanos / 1e9)
    print('\tFirst frame time offset: {}s'.format(time_offset))
    print('\tFirst frame confidence: {}'.format(frame.confidence))
    print('\n')

Frame label description: fun
	First frame time offset: 0.951819s
	First frame confidence: 0.627658963203


Frame label description: city
	Label category description: geographical feature
	First frame time offset: 7.081855s
	First frame confidence: 0.483886122704


Frame label description: playground slide
	First frame time offset: 0.951819s
	First frame confidence: 0.682778358459


Frame label description: toy
	First frame time offset: 0.951819s
	First frame confidence: 0.436657190323


Frame label description: recreation
	First frame time offset: 0.951819s
	First frame confidence: 0.697136819363


Frame label description: sand
	Label category description: material
	First frame time offset: 4.976033s
	First frame confidence: 0.563841640949


Frame label description: sandpit
	First frame time offset: 14.167364s
	First frame confidence: 0.404543936253


Frame label description: tree
	Label category description: plant
	First frame time offset: 8.962995s
	First frame confidence: 0.49536225