<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"></ul></div>

In [None]:
import urllib
from query.models import Video, Shot, VideoTag

There are three parameters for this script:
* video_id: the ID of the video in the Esper database
* url: the cinemetrics URL of the raw data for (James Cutting's) human-annotated shot boundaries
* frame_offset: James Cutting doesn't start annotating shots until after the initial studio logos, so we need to offset his annotations by some frame offset
* first_cutting_shot: Sometimes it's hard to match up James Cutting's human-annotated shot boundaries with the actual movie, so we only start recording Cutting's shots starting at this shot number

In [None]:
video_id = 377
url = "http://cinemetrics.lv/data.php?movie_ID=7716"
frame_offset = 2048
first_cutting_shot = 2

In [None]:
# Load video from database
video = Video.objects.filter(id=video_id).all()[0]

In [None]:
# Read the raw data from Cinemetrics
f = urllib.request.urlopen(url)
content = f.read().decode('utf-8')

In [None]:
# Parse the HTML
shot_annotations = []
table = content.split('\n')[12]
delimiter = '</tr>'
while table.find(delimiter) is not -1:
    index = table.find(delimiter)
    row = table[0:index + len(delimiter)].strip()
    shot_num = int(row[len('<tr><td>'):row.find('</td>')])
    row = row[row.find('</td><td>')+len('</td><td>'):len(row)]
    shot_length = int(row[0:row.find('</td><td>')])
    row = row[row.find('</td><td>')+len('</td><td>'):len(row)]
    time_code = int(row[0:row.find('</td>')])
    shot_annotations.append((shot_num, shot_length, time_code))
    table = table[table.find(delimiter) + len(delimiter):len(table)]

In [None]:
# Remove the initial (first_cutting_shot - 1) shots
shot_annotations = shot_annotations[first_cutting_shot - 1:len(shot_annotations)]

time_offset = shot_annotations[0][1] - shot_annotations[0][2]

# Update all the timecodes
shot_annotations = [
    (shot_num, shot_length, time_code + time_offset)
    for shot_num, shot_length, time_code in shot_annotations
]

In [None]:
# Labelers for these shots
LABELER, _ = Labeler.objects.get_or_create(name='shot-jamescutting')
LABELED_TAG, _ = Tag.objects.get_or_create(name='shot-jamescutting:labeled')

In [None]:
def time_to_frame(time):
    return time / 10 * video.fps + frame_offset

new_shots = [
    Shot(
        min_frame=time_to_frame(time_code - length),
        max_frame=max(time_to_frame(time_code) - 1, time_to_frame(time_code - length)),
        labeler=LABELER,
        video_id=video_id
    )
    for num, length, time_code in shot_annotations
    if length > 0
]

In [None]:
Shot.objects.bulk_create(new_shots)

In [None]:
VideoTag(tag=LABELED_TAG, video=video).save()