# Action Query

In [None]:
from esper.prelude import *
from esper.rekall import intrvllists_to_result_with_objects
from tqdm import tqdm_notebook as tqdm

In [None]:
def consecutive_short_shots():
    from query.models import Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.temporal_predicates import meets_before, overlaps
    from esper.rekall import intrvllists_to_result_with_objects
    from django.db.models import ExpressionWrapper, FloatField
    from esper.captions import get_all_segments
    from rekall.merge_ops import payload_first, payload_plus, merge_named_payload

    NUM_SHOTS=5
    MAX_SHOT_DURATION=0.8

    # payload: shot_id, vid
    short_shots = VideoIntervalCollection.from_django_qs(Shot.objects.annotate(
        duration = ExpressionWrapper((F('max_frame') - F('min_frame')) / F('video__fps'), output_field=FloatField())
    ).filter(
        duration__lt=MAX_SHOT_DURATION,
        duration__gt=0.,
        cinematic=True,
        video__ignore_film=False
    ).exclude(
        video__genres__name="animation"
    ), with_payload = lambda shot: {'shot_id': shot.id, 'vid': shot.video_id})
    
    # Slow
    def get_frame_brightness(intvl):
        vid = intvl.payload['vid']
        start = intvl.start
        end = intvl.end
        qs = Frame.objects.filter(video_id=vid, number__gte=start, number__lte=end).order_by('number')
        intvl.payload['brightness'] = [f.brightness for f in qs]
        return intvl
    
    def payload_in_array(intvl):
        return (intvl.start, intvl.end, [intvl.payload])
    
    def add_brightness_array(intvl):
        if 'brightness' not in intvl.payload:
            intvl.payload['brightness'] = []
        return intvl       
    
    def bright_enough(intvl):
        BRIGHTNESS_THRESHOLD = 20.0
        average_above_threshold = np.array(
            [np.mean(np.array(p['brightness'])) if len(p['brightness'])>0 else 0 for p in intvl.payload]
        )> BRIGHTNESS_THRESHOLD
        return np.any(average_above_threshold)
    
    def get_all_frame_numbers(intervallist):
        frame = []
        for intvl in intervallist.intrvls:
            frame += list(range(intvl.start, intvl.end+1))
        return frame
    
    print("Getting brightness for {0} intervals".format(sum(
        [len(l.intrvls) for l in short_shots.get_allintervals().values()])))
    
    # Get brightnesses for each frame in payload
    for vid, intervals in tqdm(short_shots.get_allintervals().items()):
        frames = get_all_frame_numbers(intervals)
        qs = Frame.objects.filter(video_id=vid, number__in=frames, regularly_sampled=True).order_by('number').values()
        interval_index = 0
        for frame in qs:
            while frame['number'] > intervals.intrvls[interval_index].end:
                interval_index += 1
            if frame['number'] >= intervals.intrvls[interval_index].start:
                intvl = intervals.intrvls[interval_index]
                if 'brightness' in intvl.payload:
                    intvl.payload['brightness'].append(frame['brightness'])
                else:
                    intvl.payload['brightness'] = [frame['brightness']] 
    short_shots = short_shots.map(add_brightness_array)
    n_shots = short_shots.map(payload_in_array)
    for n in range(2, NUM_SHOTS + 1):
        print('Constructing {} consecutive short shots'.format(n))
        
        n_shots = n_shots.merge(
            short_shots, predicate=meets_before(epsilon=1), working_window=1,
            payload_merge_op = lambda arr, el: arr+[el]
        )

        print('There are {} videos with {} consecutive short shots'.format(
            len(n_shots.get_allintervals().keys()), n)
    )
    n_shots = n_shots.coalesce().filter(bright_enough).filter_length(min_length=1)
    
    # Filter out any intervals with too many spoken lines
    print("Getting captions")
    vids = n_shots.get_allintervals().keys()
    caption_results = get_all_segments(vids)
    fps_map = dict((i, Video.objects.get(id=i).fps) for i in vids)
    captioned_intervals = VideoIntervalCollection({
        video_id: [(
            word[0] * fps_map[video_id], # start frame
            word[1] * fps_map[video_id], # end frame
            {'shots': [],
             'words': [word[2]]
            }
            ) # payload is the word
            for word in words]
        for video_id, words in caption_results
    })
    
    def organize_payload(intvl):
        p = intvl.payload
        new_payload = {
            'shots': p,
            'words': []
        }
        return (intvl.start, intvl.end, new_payload)
    
    # payload:
    # shots: list of shots
    # words: list of word intervals
    n_shots = n_shots.map(organize_payload)
    n_shots = n_shots.join(
        captioned_intervals,
        lambda shot_interval, word_interval:
            [(shot_interval.start, shot_interval.end, {
                'shots': shot_interval.payload['shots'],
                'words': word_interval.payload['words']
            })],
        predicate=overlaps(),
        working_window=1
    ).coalesce(payload_merge_op=merge_named_payload({
        'shots': payload_first,
        'words': payload_plus,
    })).set_union(n_shots).coalesce(
        payload_merge_op=merge_named_payload({
            'shots': payload_first,
            'words': payload_plus
    }))
    
    def has_few_words(intvl):
        MAX_NUM_WORDS_PER_SECOND = 1.0
        
        p = intvl.payload
        n_words = len(p['words'])
        time = (intvl.end-intvl.start) / fps_map[p['shots'][0]['vid']]
        return n_words / time <= MAX_NUM_WORDS_PER_SECOND
    
    action_shots = n_shots.filter(has_few_words)

    return action_shots


In [None]:
shots = consecutive_short_shots()

In [None]:
esper_widget(intrvllists_to_result_with_objects(shots.get_allintervals(), lambda p,v:[], stride=17), 
             show_middle_frame=False, disable_caption=True)

# Occurrence of such sequences in film

In [None]:
# Divide film into `num_bucket`s. On average, what percentage of quick action sequences falls into each bucket?
def get_normalized_frequencies(intervals, num_buckets=100):
    vids = list(intervals.get_allintervals().keys())
    num_vids = len(vids)
    bucket_proportions = np.zeros((num_vids, num_buckets))
    for i, vid in enumerate(tqdm(vids)):
        num_frames = Video.objects.get(id=vid).num_frames
        bucket_width = num_frames/num_buckets
        intervallist = intervals.get_intervallist(vid)
        num_seqs = len(intervallist.intrvls)
        unit = 1/num_seqs
        for intvl in intervallist.intrvls:
            start_bucket = int(intvl.start/bucket_width)
            end_bucket = int(intvl.end/bucket_width)
            num_buckets_spanned = end_bucket-start_bucket+1
            count_unit = unit / num_buckets_spanned
            for bucket_index in range(start_bucket, end_bucket+1):
                bucket_proportions[i, bucket_index] += count_unit
    normalized_percentages_per_bucket = np.mean(bucket_proportions, axis=0)
    return normalized_percentages_per_bucket
    
def plot_buckets(normalized_percentages):
    num_buckets = len(normalized_percentages)
    x = [(i + 0.5) / num_buckets for i in range(0, num_buckets)]
    y = normalized_percentages*100
    fig, ax = plt.subplots(figsize=(6.666, 3.333))
    ax.bar(x, y, width=1/num_buckets)
    ax.set_xlabel("Proportion of movie")
    ax.set_ylabel('Average Percentages of Short Action Sequences')
    ax.set_title("Distribution of Short Action Sequences")
    ax.set_xlim(0, 1)
    
    ax.axvline(x=0.25, color='k')
    ax.axvline(x=0.5, color='k')
    ax.axvline(x=0.75, color='k')
    
    ax.text(x=0.09, y=0.15, s='Setup')
    ax.text(x=0.29, y=0.15, s='Complication')
    ax.text(x=0.54, y=0.15, s='Development')
    ax.text(x=0.83, y=0.15, s='Climax')
    
    plt.show()
    
data = get_normalized_frequencies(shots, num_buckets=50)
plot_buckets(data)
        
    

# Quick Dialogues

In [None]:
def quick_dialogues():
    from query.models import Shot
    from rekall.video_interval_collection import VideoIntervalCollection
    from rekall.temporal_predicates import meets_before, overlaps
    from esper.rekall import intrvllists_to_result_with_objects
    from django.db.models import ExpressionWrapper, FloatField
    from esper.captions import get_all_segments
    from rekall.merge_ops import payload_first, payload_plus, merge_named_payload

    NUM_SHOTS=4
    MAX_SHOT_DURATION=1.5

    # payload: shot_id, vid
    short_shots = VideoIntervalCollection.from_django_qs(Shot.objects.annotate(
        duration = ExpressionWrapper((F('max_frame') - F('min_frame')) / F('video__fps'), output_field=FloatField())
    ).filter(
        duration__lt=MAX_SHOT_DURATION,
        duration__gt=0.,
        cinematic=True,
        video__ignore_film=False
    ).exclude(
        video__genres__name="animation"
    ), with_payload = lambda shot: {'shot_id': shot.id, 'vid': shot.video_id})
    
    # Slow
    def get_frame_brightness(intvl):
        vid = intvl.payload['vid']
        start = intvl.start
        end = intvl.end
        qs = Frame.objects.filter(video_id=vid, number__gte=start, number__lte=end).order_by('number')
        intvl.payload['brightness'] = [f.brightness for f in qs]
        return intvl
    
    def payload_in_array(intvl):
        return (intvl.start, intvl.end, [intvl.payload])
    
    def add_brightness_array(intvl):
        if 'brightness' not in intvl.payload:
            intvl.payload['brightness'] = []
        return intvl       
    
    def bright_enough(intvl):
        BRIGHTNESS_THRESHOLD = 20.0
        average_above_threshold = np.array(
            [np.mean(np.array(p['brightness'])) if len(p['brightness'])>0 else 0 for p in intvl.payload]
        )> BRIGHTNESS_THRESHOLD
        return np.any(average_above_threshold)
    
    def get_all_frame_numbers(intervallist):
        frame = []
        for intvl in intervallist.intrvls:
            frame += list(range(intvl.start, intvl.end+1))
        return frame
    
    print("Getting brightness for {0} intervals".format(sum(
        [len(l.intrvls) for l in short_shots.get_allintervals().values()])))
    
    # Get brightnesses for each frame in payload
    for vid, intervals in tqdm(short_shots.get_allintervals().items()):
        frames = get_all_frame_numbers(intervals)
        qs = Frame.objects.filter(video_id=vid, number__in=frames, regularly_sampled=True).order_by('number').values()
        interval_index = 0
        for frame in qs:
            while frame['number'] > intervals.intrvls[interval_index].end:
                interval_index += 1
            if frame['number'] >= intervals.intrvls[interval_index].start:
                intvl = intervals.intrvls[interval_index]
                if 'brightness' in intvl.payload:
                    intvl.payload['brightness'].append(frame['brightness'])
                else:
                    intvl.payload['brightness'] = [frame['brightness']] 
    short_shots = short_shots.map(add_brightness_array)
    n_shots = short_shots.map(payload_in_array)
    for n in range(2, NUM_SHOTS + 1):
        print('Constructing {} consecutive short shots'.format(n))
        
        n_shots = n_shots.merge(
            short_shots, predicate=meets_before(epsilon=1), working_window=1,
            payload_merge_op = lambda arr, el: arr+[el]
        )

        print('There are {} videos with {} consecutive short shots'.format(
            len(n_shots.get_allintervals().keys()), n)
    )
    n_shots = n_shots.coalesce().filter(bright_enough).filter_length(min_length=1)
    
    # Filter out any intervals with too many spoken lines
    print("Getting captions")
    vids = n_shots.get_allintervals().keys()
    caption_results = get_all_segments(vids)
    fps_map = dict((i, Video.objects.get(id=i).fps) for i in vids)
    captioned_intervals = VideoIntervalCollection({
        video_id: [(
            word[0] * fps_map[video_id], # start frame
            word[1] * fps_map[video_id], # end frame
            {'shots': [],
             'words': [word[2]]
            }
            ) # payload is the word
            for word in words]
        for video_id, words in caption_results
    })
    
    def organize_payload(intvl):
        p = intvl.payload
        new_payload = {
            'shots': p,
            'words': []
        }
        return (intvl.start, intvl.end, new_payload)
    
    print("Captions loaded")
    # payload:
    # shots: list of shots
    # words: list of word intervals
    n_shots = n_shots.map(organize_payload)
    n_shots = n_shots.join(
        captioned_intervals,
        lambda shot_interval, word_interval:
            [(shot_interval.start, shot_interval.end, {
                'shots': shot_interval.payload['shots'],
                'words': word_interval.payload['words']
            })],
        predicate=overlaps(),
        working_window=1
    ).coalesce(payload_merge_op=merge_named_payload({
        'shots': payload_first,
        'words': payload_plus,
    })).set_union(n_shots).coalesce(
        payload_merge_op=merge_named_payload({
            'shots': payload_first,
            'words': payload_plus
    }))
    
    def has_many_words(intvl):
        MIN_NUM_WORDS_PER_SECOND = 1
        
        p = intvl.payload
        n_words = len(p['words'])
        time = (intvl.end-intvl.start) / fps_map[p['shots'][0]['vid']]
        return n_words / time > MIN_NUM_WORDS_PER_SECOND
    
    dialogs = n_shots.filter(has_many_words)

    return dialogs


In [None]:
dialogs = quick_dialogues()

In [None]:
esper_widget(intrvllists_to_result_with_objects(dialogs.get_allintervals(), lambda p,v:[], stride=1), 
             show_middle_frame=False, disable_caption=True)

# Speaking speed in shots of people talking

In shots of people talking, what is the average words per second spoken in a film?

In [None]:
def get_vids_with_high_alignment_score(min_score=0.9):
    alignment_scores = {
    "grumpier_old_men_1995": 0.48147120055517, 
    "police_academy_1984": 0.8998576174655909, 
    "jason_bourne_2016": 0.8051052086926527, 
    "the_heat_2013": 0.23738149462422542, 
    "ordinary_people_1980": 0.23176387912860155, 
    "live_and_let_die_1973": 0.38713826366559484, 
    "aventure_malgache_1944": 0.16605075141660508, 
    "daddys_home_2015": 0.9036019154694982, 
    "inception_2010": 0.9400017216148747, 
    "police_academy_6_city_under_siege_1989": 0.9445371916156557, 
    "scary_movie_2_2001": 0.8460346527679955, 
    "pocahontas_1995": 0.8074631545939166, 
    "braveheart_1995": 0.9458286674132139, 
    "captain_fantastic_2016": 0.8991862567811935, 
    "aguirre_the_wrath_of_god_1972": 0.767223382045929, 
    "interstellar_2014": 0.9578446909667195, 
    "the_danish_girl_2015": 0.9512663755458516, 
    "the_first_wives_club_1996": 0.9077186596724119, 
    "star_trek_vi_the_undiscovered_country_1991": 0.9579180509413068, 
    "the_lord_of_the_rings_the_two_towers_2002": 0.5964880022064908, 
    "harry_potter_and_the_deathly_hallows_part_1_2010": 0.890369625345738, 
    "ferris_buellers_day_off_1986": 0.9397544128933231, 
    "casino_royale_2006": 0.2749802683504341, 
    "phantom_2013": 0.9692092372288313, 
    "octopussy_1983": 0.3456965394853594, 
    "home_alone_1990": 0.9035191775405299, 
    "mad_max_2_the_road_warrior_1981": 0.8685166831846713, 
    "san_andreas_2015": 0.8654390934844193, 
    "the_bourne_identity_2002": 0.7361851118891765, 
    "sleepers_1996": 0.42012729220492784, 
    "rope_1948": 0.9766465973362525, 
    "the_night_of_the_hunter_1955": 0.9044892648015614, 
    "american_ultra_2015": 0.3046293744833287, 
    "terminator_genisys_2015": 0.9561227055569524, 
    "the_flight_of_the_phoenix_1965": 0.23087775874231584, 
    "25th_hour_2002": 0.8230385115260962, 
    "butterfield_8_1960": 0.9732806134675294, 
    "road_to_perdition_2002": 0.9449809117448911, 
    "fight_club_1999": 0.9451470123300664, 
    "dick_tracy_1990": 0.2327360774818402, 
    "suicide_squad_2016": 0.7048129588626034, 
    "gods_of_egypt_2016": 0.9652196315857272, 
    "the_company_you_keep_2012": 0.43783638320775026, 
    "saboteur_1942": 0.9718287442304632, 
    "american_sniper_2014": 0.8971611721611722, 
    "ghost_town_2008": 0.8761219305673158, 
    "killing_them_softly_2012": 0.5759394058626796, 
    "foreign_correspondent_1940": 0.9608408590726265, 
    "hyde_park_on_hudson_2012": 0.9380762138905961, 
    "titus_1999": 0.8547035082272586, 
    "star_trek_insurrection_1998": 0.962449872402479, 
    "district_9_2009": 0.77524557956778, 
    "contagion_2011": 0.9462575929167095, 
    "inside_out_2015": 0.8258212375859435, 
    "home_alone_2_lost_in_new_york_1992": 0.9398473282442749, 
    "die_hard_1988": 0.9046372867162148, 
    "the_equalizer_2014": 0.8976642147373854, 
    "ace_ventura_when_nature_calls_1995": 0.8471544715447155, 
    "the_tempest_1980": 0.36293007769145397, 
    "for_a_few_dollars_more_1965": 0.2749609113245477, 
    "xmen_2000": 0.976930548810102, 
    "the_witch_2015": 0.7853199754651401, 
    "nine_to_five_1980": 0.9303482587064676, 
    "the_philadelphia_story_1940": 0.39854183701281004, 
    "hello_dolly_1969": 0.24962472406181016, 
    "bessie_2015": 0.8258363471971067, 
    "detour_1945": 0.5794100636205899, 
    "a_most_violent_year_2014": 0.9357472488462903, 
    "captain_blood_1935": 0.9607950401167031, 
    "what_women_want_2000": 0.9457267163188555, 
    "oceans_thirteen_2007": 0.9629000283205891, 
    "quantum_of_solace_2008": 0.3131752943073698, 
    "rebecca_1940": 0.554445165476963, 
    "locke_2013": 0.9573227854689289, 
    "star_wars_episode_ii_attack_of_the_clones_2002": 0.9582691319979456, 
    "hot_tub_time_machine_2_2015": 0.9523623888975519, 
    "the_messenger_the_story_of_joan_of_arc_1999": 0.42946236559139783, 
    "men_in_black_3_2012": 0.9547535979066725, 
    "lethal_weapon_4_1998": 0.3065436978480457, 
    "doctor_zhivago_1965": 0.9502262443438914, 
    "rocky_v_1990": 0.18462316641375823, 
    "scary_movie_3_2003": 0.272517911975435, 
    "topaz_1969": 0.9719178082191781, 
    "popeye_1980": 0.8399744979279566, 
    "the_taming_of_the_shrew_1967": 0.8042242079610074, 
    "the_congress_2013": 0.3660202463715087, 
    "trumbo_2015": 0.9504942130607418, 
    "planet_of_the_apes_1968": 0.9773006134969325, 
    "fahrenheit_451_1966": 0.19021185000566443, 
    "mad_max_beyond_thunderdome_1985": 0.8978123200921129, 
    "xmen_apocalypse_2016": 0.366738894907909, 
    "the_revenant_2015": 0.7608471074380165, 
    "house_of_flying_daggers_2004": 0.9248488338612151, 
    "shadow_of_a_doubt_1943": 0.9453634483855575, 
    "papillon_1973": 0.908761110103392, 
    "jaws_2_1978": 0.40372759856630824, 
    "live_free_or_die_hard_2007": 0.9490425531914893, 
    "twister_1996": 0.7794051878354203, 
    "the_godfather_part_ii_1974": 0.3168627450980392, 
    "the_rocky_horror_picture_show_1975": 0.8731955380577427, 
    "suspicion_1941": 0.6222572075174653, 
    "despicable_me_2_2013": 0.8992335015890821, 
    "the_wackness_2008": 0.7514657980456027, 
    "the_good_the_bad_and_the_ugly_1966": 0.9485462018572081, 
    "pollyanna_1960": 0.9126555529467011, 
    "shampoo_1975": 0.8844128113879004, 
    "die_hard_2_1990": 0.8989497587283565, 
    "suffragette_2015": 0.8731366952109102, 
    "die_another_day_2002": 0.37839673913043476, 
    "excalibur_1981": 0.9397669456753652, 
    "how_the_grinch_stole_christmas_2000": 0.8816964285714286, 
    "rocknrolla_2008": 0.9626441881100266, 
    "maggie_2015": 0.9161166116611661, 
    "mr_mrs_smith_2005": 0.9165584415584416, 
    "from_russia_with_love_1963": 0.24301851270787575, 
    "skyfall_2012": 0.4352994282275053, 
    "home_alone_3_1997": 0.5025842464337399, 
    "a_view_to_a_kill_1985": 0.3897748592870544, 
    "stir_crazy_1980": 0.9199864191942055, 
    "beneath_the_planet_of_the_apes_1970": 0.9418545081967213, 
    "the_man_from_laramie_1955": 0.5618269106007525, 
    "unbroken_2014": 0.9070203644158628, 
    "foxcatcher_2014": 0.8597076580870671, 
    "nightcrawler_2014": 0.9249935561474354, 
    "her_2013": 0.9439872577647995, 
    "age_of_uprising_the_legend_of_michael_kohlhaas_2013": 0.4324580828851629, 
    "charly_1968": 0.3553956834532374, 
    "bridge_of_spies_2015": 0.9449198305225555, 
    "the_perfect_storm_2000": 0.8599260523321957, 
    "diablo_2015": 0.8923319327731093, 
    "the_hobbit_the_desolation_of_smaug_2013": 0.9116561181434599, 
    "his_girl_friday_1940": 0.5785857650146673, 
    "mission_impossible_rogue_nation_2015": 0.9748135468883726, 
    "big_eyes_2014": 0.9626288659793815, 
    "thunderball_1965": 0.22095096582466567, 
    "toy_story_1995": 0.838788996465345, 
    "broken_city_2013": 0.8969495673455118, 
    "the_way_way_back_2013": 0.9318931573906566, 
    "all_about_eve_1950": 0.9788163485129555, 
    "splash_1984": 0.87340785027294, 
    "the_drop_2014": 0.9371328583927009, 
    "mr_mrs_smith_1941": 0.21060171919770773, 
    "the_fall_of_the_roman_empire_1964": 0.1852327447833066, 
    "dog_day_afternoon_1975": 0.9163172288058341, 
    "cromwell_1970": 0.8863573977491078, 
    "twin_peaks_fire_walk_with_me_1992": 0.9734140563306133, 
    "the_trouble_with_harry_1955": 0.9513544448683708, 
    "hitch_2005": 0.9535923958624546, 
    "the_paperboy_2012": 0.8794958165815495, 
    "batman_begins_2005": 0.9640829694323144, 
    "psycho_1960": 0.9838536060279871, 
    "goodfellas_1990": 0.8913898047172204, 
    "the_39_steps_1935": 0.9230205278592375, 
    "indiana_jones_and_the_last_crusade_1989": 0.7940177849636216, 
    "lifeboat_1944": 0.2051948051948052, 
    "miss_congeniality_2000": 0.9096139763573595, 
    "wedding_crashers_2005": 0.9367507234394378, 
    "midnight_cowboy_1969": 0.3645635436456354, 
    "waynes_world_1992": 0.9211631663974151, 
    "you_only_live_twice_1967": 0.35072619688004303, 
    "licence_to_kill_1989": 0.2963842702560042, 
    "conquest_of_the_planet_of_the_apes_1972": 0.9532972972972973, 
    "wild_2014": 0.9301859034372074, 
    "torn_curtain_1966": 0.9753768844221106, 
    "harry_potter_and_the_chamber_of_secrets_2002": 0.964837978922486, 
    "guardians_of_the_galaxy_2014": 0.9485037406483791, 
    "rock_the_kasbah_2015": 0.8562778475951103, 
    "home_alone_4_2002": 0.9014937993235626, 
    "the_terminator_1984": 0.835237842147223, 
    "the_great_dictator_1940": 0.30871970997901166, 
    "star_wars_episode_i_the_phantom_menace_1999": 0.925638429519631, 
    "prisoners_2013": 0.8940006556660475, 
    "crimson_peak_2015": 0.36511456023651145, 
    "shaun_the_sheep_movie_2015": 0.37830446672743845, 
    "abraham_lincoln_1930": 0.8611787627861666, 
    "the_intern_2015": 0.8745198463508322, 
    "citizenfour_2014": 0.9727606461086638, 
    "harry_potter_and_the_goblet_of_fire_2005": 0.8937701396348013, 
    "spotlight_2015": 0.9641593806313822, 
    "the_haunting_1999": 0.752362381880906, 
    "nymphomaniac_vol_i_2013": 0.2878124194795156, 
    "i_origins_2014": 0.9442477876106194, 
    "dances_with_wolves_1990": 0.7563977577382404, 
    "toy_story_2_1999": 0.8257538910505836, 
    "on_her_majestys_secret_service_1969": 0.2628236443575965, 
    "battle_for_the_planet_of_the_apes_1973": 0.9533001245330013, 
    "planet_of_the_apes_2001": 0.7742011278195489, 
    "star_trek_v_the_final_frontier_1989": 0.5975971731448764, 
    "hook_1991": 0.6547358729428012, 
    "rocky_ii_1979": 0.1412037037037037, 
    "rambo_first_blood_part_ii_1985": 0.9187538179596824, 
    "oceans_eleven_1960": 0.960443915973048, 
    "the_lord_of_the_rings_the_fellowship_of_the_ring_2001": 0.7823595716701108, 
    "airplane_1980": 0.9450536062378168, 
    "strangers_on_a_train_1951": 0.9294680988740779, 
    "witness_1985": 0.6431243124312431, 
    "spartan_2004": 0.31802965088474416, 
    "apollo_13_1995": 0.9340277777777778, 
    "teenage_mutant_ninja_turtles_out_of_the_shadows_2016": 0.8909497921330348, 
    "never_say_never_again_1983": 0.9248661311914324, 
    "harry_potter_and_the_deathly_hallows_part_2_2011": 0.9286516853932584, 
    "starship_troopers_1997": 0.8511335342681169, 
    "restoration_1995": 0.9392185238784371, 
    "face_off_1997": 0.9079318212320494, 
    "jurassic_world_2015": 0.900296815974096, 
    "hamlet_2_2008": 0.22189911642992954, 
    "family_plot_1976": 0.9585614600107354, 
    "lady_and_the_tramp_1955": 0.8540145985401459, 
    "the_treasure_of_the_sierra_madre_1948": 0.9447832261298137, 
    "war_of_the_worlds_2005": 0.7912744658598536, 
    "godzilla_2014": 0.8857686815802, 
    "pitch_black_2000": 0.778094694808899, 
    "three_days_of_the_condor_1975": 0.7607486776074868, 
    "mad_max_1979": 0.7440651141758987, 
    "victor_frankenstein_2015": 0.4208837324117502, 
    "deadpool_2016": 0.935852165725047, 
    "room_2015": 0.8326666666666667, 
    "the_matrix_1999": 0.9667356797791581, 
    "batman_returns_1992": 0.9436135465016331, 
    "a_night_to_remember_1958": 0.8873225254254355, 
    "calvary_2014": 0.31356843992827255, 
    "top_hat_1935": 0.9683131407269339, 
    "mission_impossible_iii_2006": 0.8944267124969311, 
    "jumanji_1995": 0.9154707735386769, 
    "goldeneye_1995": 0.37458745874587457, 
    "secret_agent_1936": 0.3255909277223664, 
    "lover_come_back_1961": 0.27325004957366644, 
    "sense_and_sensibility_1995": 0.9134782608695652, 
    "casino_royale_1967": 0.23333333333333334, 
    "terminator_2_judgment_day_1991": 0.6584184114898771, 
    "ted_2_2015": 0.9444931987713909, 
    "the_return_of_the_pink_panther_1975": 0.3221838908054597, 
    "my_best_friends_wedding_1997": 0.8743408360128617, 
    "labor_day_2013": 0.9098680263947211, 
    "valentines_day_2010": 0.9339684866251374, 
    "mr_holmes_2015": 0.3590338427947598, 
    "two_mules_for_sister_sara_1970": 0.21697054698457224, 
    "rocky_iii_1982": 0.19113508442776736, 
    "the_comedians_1967": 0.26833035278816453, 
    "benhur_1959": 0.9628140703517588, 
    "la_femme_nikita_1990": 0.5101152925821187, 
    "1984_1984": 0.8782241675785525, 
    "donnie_darko_2001": 0.935902327355971, 
    "goin_south_1978": 0.33863597901506176, 
    "escape_from_the_planet_of_the_apes_1971": 0.9762560671651581, 
    "oceans_eleven_2001": 0.964119170984456, 
    "the_lord_of_the_rings_the_return_of_the_king_2003": 0.5542576083615125, 
    "the_world_is_not_enough_1999": 0.4554160125588697, 
    "zootopia_2016": 0.561885453623923, 
    "creed_2015": 0.7593116454502593, 
    "walk_the_line_2005": 0.8184920106524634, 
    "hardcore_henry_2015": 0.9216867469879518, 
    "catch_22_1970": 0.2489725286610426, 
    "dr_no_1962": 0.23075589459084606, 
    "blazing_saddles_1974": 0.9230416857535502, 
    "grumpy_old_men_1993": 0.9557627929220469, 
    "romancing_the_stone_1984": 0.9511059371362048, 
    "truth_2015": 0.9682746965739745, 
    "brooklyn_2015": 0.22757039947609692, 
    "serenity_2005": 0.9614090195198564, 
    "harry_potter_and_the_order_of_the_phoenix_2007": 0.89484827099506, 
    "superman_ii_1980": 0.9347916956945799, 
    "die_hard_with_a_vengeance_1995": 0.9029013883944464, 
    "the_music_man_1962": 0.916371158392435, 
    "the_time_machine_1960": 0.9751979991663193, 
    "big_hero_6_2014": 0.9243891194098663, 
    "vacation_2015": 0.8851635300566417, 
    "airport_1970": 0.934472746469579, 
    "scary_movie_4_2006": 0.8696433508432403, 
    "mr_right_2015": 0.9019955240581873, 
    "12_years_a_slave_2013": 0.9338812103100486, 
    "mission_impossible_ii_2000": 0.9195473569989149, 
    "lethal_weapon_1987": 0.48276873441853646, 
    "jaws_3_1983": 0.5280837004405287, 
    "waterworld_1995": 0.8529505076142132, 
    "the_hunt_for_red_october_1990": 0.9483598272535146, 
    "goldfinger_1964": 0.2573181419807187, 
    "brazil_1985": 0.45769748653500897, 
    "dirty_rotten_scoundrels_1988": 0.9081644605833431, 
    "raiders_of_the_lost_ark_1981": 0.6540389972144847, 
    "the_departed_2006": 0.5511687571265679, 
    "star_trek_generations_1994": 0.7257083260907353, 
    "the_6th_day_2000": 0.927143024207695, 
    "runner_runner_2013": 0.8991899189918992, 
    "prometheus_2012": 0.8300760853349246, 
    "rocky_1976": 0.20200417536534446, 
    "the_dark_knight_rises_2012": 0.9498458042525564, 
    "schindlers_list_1993": 0.22605397016984222, 
    "lethal_weapon_3_1992": 0.26548954108009226, 
    "the_hunger_games_mockingjay_part_1_2014": 0.9174300720654697, 
    "hotel_rwanda_2004": 0.7800680201999382, 
    "the_dark_knight_2008": 0.9501324606513948, 
    "the_good_german_2006": 0.9706891621389763, 
    "rambo_2008": 0.36053077357425184, 
    "the_assassination_of_jesse_james_by_the_coward_robert_ford_2007": 0.9516065053550179, 
    "the_woman_in_black_2_angel_of_death_2014": 0.8849246231155778, 
    "notorious_1946": 0.967219708396179, 
    "dangerous_beauty_1998": 0.910856134157105, 
    "harvey_1950": 0.9555170806419392, 
    "scary_movie_2000": 0.8225596014707627, 
    "diabolique_1955": 0.2532730853898737, 
    "stage_fright_1950": 0.9408097165991903, 
    "harry_potter_and_the_halfblood_prince_2009": 0.9289345477653039, 
    "home_alone_the_holiday_heist_2012": 0.9330616277633691, 
    "tron_legacy_2010": 0.9220292815798434, 
    "the_shawshank_redemption_1994": 0.808739012846518, 
    "the_life_aquatic_with_steve_zissou_2004": 0.9764275553749238, 
    "mud_2012": 0.921069883309296, 
    "charlies_angels_2000": 0.9372188139059304, 
    "harry_potter_and_the_prisoner_of_azkaban_2004": 0.9028061498763574, 
    "moonraker_1979": 0.33723747980613894, 
    "the_pink_panther_strikes_again_1976": 0.2900122441840126, 
    "ace_ventura_pet_detective_1994": 0.9476896251089799, 
    "2010_the_year_we_make_contact_1984": 0.9786383247395559, 
    "half_shot_at_sunrise_1930": 0.9178870947552885, 
    "man_of_la_mancha_1972": 0.8743834265044393, 
    "sisters_2015": 0.32317750898986597, 
    "ex_machina_2015": 0.9744660350088326, 
    "my_big_fat_greek_wedding_2002": 0.3712178232726178, 
    "harry_potter_and_the_sorcerers_stone_2001": 0.9228256925123987, 
    "fantasia_1940": 0.994991652754591, 
    "valkyrie_2008": 0.8136508376021044, 
    "the_bourne_ultimatum_2007": 0.9490379615184608, 
    "the_martian_2015": 0.9178882909630729, 
    "star_trek_first_contact_1996": 0.9567015995587425, 
    "2001_a_space_odyssey_1968": 0.9656932816009802, 
    "dial_m_for_murder_1954": 0.9622798397460125, 
    "the_godfather_part_iii_1990": 0.9046815689582455, 
    "mars_attacks_1996": 0.8887974995887481, 
    "lucy_2014": 0.9186664224216889, 
    "the_lady_vanishes_1938": 0.9254691689008043, 
    "a_good_day_to_die_hard_2013": 0.8857025809094633, 
    "snowden_2016": 0.24683879760732944, 
    "antony_and_cleopatra_1972": 0.8140004034698406, 
    "king_solomons_mines_1950": 0.9052614052614053, 
    "deep_cover_1992": 0.8826451412604781, 
    "star_trek_iv_the_voyage_home_1986": 0.939512298110419, 
    "mental_2012": 0.24042835481425323, 
    "avengers_age_of_ultron_2015": 0.8564889863640698, 
    "the_bourne_supremacy_2004": 0.9046401887534408, 
    "john_wick_2014": 0.9181657848324515, 
    "side_effects_2013": 0.9427917620137299, 
    "man_in_the_wilderness_1971": 0.814974182444062, 
    "date_night_2010": 0.9053700569318357, 
    "star_trek_2009": 0.9083938660209847, 
    "the_maltese_falcon_1941": 0.9644699617240805, 
    "the_aristocats_1970": 0.2756542699724518, 
    "kellys_heroes_1970": 0.9034974734964827, 
    "star_trek_ii_the_wrath_of_khan_1982": 0.9512792596624932, 
    "the_day_the_earth_stood_still_1951": 0.16371420135813405, 
    "pretty_woman_1990": 0.9114134303567192, 
    "trance_2013": 0.9370972691009513, 
    "five_easy_pieces_1970": 0.8191194968553459, 
    "anna_karenina_1935": 0.9461287595120184, 
    "to_catch_a_thief_1955": 0.16011145643553745, 
    "born_yesterday_1950": 0.9273921687267698, 
    "minions_2015": 0.7663192539769611, 
    "anchors_aweigh_1945": 0.6976467589839436, 
    "the_social_network_2010": 0.21977894124490982, 
    "birdman_or_the_unexpected_virtue_of_ignorance_2014": 0.9248554913294798, 
    "the_living_daylights_1987": 0.35072815533980584, 
    "pulp_fiction_1994": 0.8757408405172413, 
    "broken_arrow_1950": 0.9873843566021867, 
    "pinocchio_1940": 0.27985254047122937, 
    "the_birds_1963": 0.8862591290235325, 
    "star_trek_the_motion_picture_1979": 0.9504695546803998, 
    "pillow_talk_1959": 0.8979826353421859, 
    "waynes_world_2_1993": 0.9465891794658918, 
    "rebel_without_a_cause_1955": 0.9658215436335621, 
    "southpaw_2015": 0.78504743261557, 
    "mission_impossible_ghost_protocol_2011": 0.20515381542149422, 
    "mutiny_on_the_bounty_1935": 0.22038125779440584, 
    "last_knights_2015": 0.23824451410658307, 
    "straight_outta_compton_2015": 0.28183311817861756, 
    "superman_iii_1983": 0.9295677233429395, 
    "super_8_2011": 0.8182447808449589, 
    "kubo_and_the_two_strings_2016": 0.9758137843244227, 
    "star_wars_episode_vi_return_of_the_jedi_1983": 0.9582985821517932, 
    "police_academy_2_their_first_assignment_1985": 0.9077506318449874, 
    "one_flew_over_the_cuckoos_nest_1975": 0.884326628973955, 
    "star_wars_episode_iv_a_new_hope_1977": 0.9223886598974566, 
    "the_grand_budapest_hotel_2014": 0.9787135047109456, 
    "the_nightmare_before_christmas_1993": 0.6605898123324396, 
    "mad_max_fury_road_2015": 0.7884922749067661, 
    "vertigo_1958": 0.9564105415323029, 
    "the_princess_bride_1987": 0.9608617189262351, 
    "no_escape_2015": 0.8084923500085955, 
    "thunderheart_1992": 0.9153571900171662, 
    "taken_3_2014": 0.9552218241939928, 
    "madagascar_2005": 0.8646937193073145, 
    "the_wrong_man_1956": 0.7829998941462898, 
    "the_jewel_of_the_nile_1985": 0.9075832956429971, 
    "pride_2014": 0.8698270094619134, 
    "the_hobbit_the_battle_of_the_five_armies_2014": 0.9040697674418605, 
    "erin_brockovich_2000": 0.9059347679892401, 
    "cinderella_2015": 0.7164715274081959, 
    "superman_1978": 0.8530128874956461, 
    "love_story_1970": 0.8145408875244209, 
    "dick_1999": 0.7562767094017094, 
    "stoker_2013": 0.9142212189616253, 
    "independence_day_resurgence_2016": 0.7159160383126146, 
    "the_hateful_eight_2015": 0.9579464773347898, 
    "kill_bill_vol_2_2004": 0.4725274725274725, 
    "charlies_angels_full_throttle_2003": 0.8904634878292764, 
    "men_in_black_ii_2002": 0.6630105354307981, 
    "macbeth_1982": 0.27452574525745255, 
    "indiana_jones_and_the_temple_of_doom_1984": 0.7775005670220004, 
    "into_the_woods_2014": 0.8391691394658753, 
    "finding_dory_2016": 0.6768872320596458, 
    "back_to_the_future_part_ii_1989": 0.26684131736526945, 
    "clueless_1995": 0.9429243638898571, 
    "cymbeline_2014": 0.9578709412248253, 
    "spellbound_1945": 0.9770175903827455, 
    "the_good_son_1993": 0.9117383512544803, 
    "police_academy_4_citizens_on_patrol_1987": 0.9032097948378557, 
    "cinderella_1950": 0.7799164699473398, 
    "stardust_2007": 0.9266582365789171, 
    "mission_impossible_1996": 0.9188063844552394, 
    "1911_2011": 0.6327272727272727, 
    "spies_like_us_1985": 0.9807630650849631, 
    "star_trek_into_darkness_2013": 0.9491866203619083, 
    "nymphomaniac_vol_ii_2013": 0.2023468803663423, 
    "kind_hearts_and_coronets_1949": 0.9163126252505011, 
    "first_blood_1982": 0.5917096602028109, 
    "wyatt_earp_1994": 0.9596460176991151, 
    "the_magnificent_seven_1960": 0.9731672871998973, 
    "berserk_the_golden_age_arc_iii_the_advent_2013": 0.5287569573283859, 
    "the_man_with_the_golden_gun_1974": 0.4615827338129496, 
    "the_nice_guys_2016": 0.19939354957272812, 
    "the_mummy_returns_2001": 0.8074703430586726, 
    "men_in_black_1997": 0.9371021521673234, 
    "shanghai_noon_2000": 0.712579265595045, 
    "batman_robin_1997": 0.9659795542148484, 
    "swiss_family_robinson_1960": 0.8994191049913941, 
    "z_for_zachariah_2015": 0.8597626752966558, 
    "legend_2015": 0.30127690623859904, 
    "under_the_skin_2013": 0.7529812606473595, 
    "tale_of_two_cities_1935": 0.9509496154449851, 
    "the_twilight_saga_eclipse_2010": 0.9614102564102565, 
    "dumb_and_dumber_to_2014": 0.9583620096352374, 
    "the_prince_of_egypt_1998": 0.3815294609277058, 
    "tora_tora_tora_1970": 0.7671180863181876, 
    "the_hitchhikers_guide_to_the_galaxy_2005": 0.8322155986090413, 
    "the_second_best_exotic_marigold_hotel_2015": 0.9534883720930233, 
    "tomorrow_never_dies_1997": 0.24585477415666096, 
    "the_hunger_games_2012": 0.8834638415476739, 
    "the_godfather_1972": 0.9380325329202169, 
    "the_imitation_game_2014": 0.284442116291252, 
    "the_fifth_element_1997": 0.8072002007024586, 
    "jaws_1975": 0.867618844596035, 
    "help_1965": 0.6291948833709556, 
    "casper_1995": 0.8987320371935756, 
    "swiss_army_man_2016": 0.8833434466019418, 
    "berserk_the_golden_age_arc_ii_the_battle_for_doldrey_2012": 0.41964285714285715, 
    "trail_of_the_pink_panther_1982": 0.2217086625913225, 
    "patriot_games_1992": 0.8546058330926942, 
    "despicable_me_2010": 0.919524813182602, 
    "little_big_man_1970": 0.9021739130434783, 
    "black_orpheus_1959": 0.4710017574692443, 
    "the_frisco_kid_1979": 0.6747632770687526, 
    "galaxy_quest_1999": 0.900435380384968, 
    "berserk_the_golden_age_arc_i_the_egg_of_the_king_2012": 0.4621524910542252, 
    "the_longest_yard_2005": 0.2578564940962761, 
    "the_usual_suspects_1995": 0.9726453550740185, 
    "fifty_shades_of_grey_2015": 0.6438081603435934, 
    "indiana_jones_and_the_kingdom_of_the_crystal_skull_2008": 0.9576951130561634, 
    "diamonds_are_forever_1971": 0.27266684392448814, 
    "police_academy_mission_to_moscow_1994": 0.9441988950276243, 
    "the_walk_2015": 0.9139220044453424, 
    "the_pink_panther_1963": 0.19668597602047688, 
    "enemy_2013": 0.7584576209530739, 
    "saving_private_ryan_1998": 0.8824274139095206, 
    "the_lion_king_1994": 0.8859915100060642, 
    "marnie_1964": 0.9525086505190311, 
    "in_the_cut_2003": 0.9579033134166214, 
    "a_midsummer_nights_dream_1999": 0.20651700087183958, 
    "shenandoah_1965": 0.9796778563901851, 
    "inherit_the_wind_1960": 0.9493903238494821, 
    "rocky_iv_1985": 0.18572600492206726, 
    "the_breakfast_club_1985": 0.9118792599805258, 
    "the_grapes_of_wrath_1940": 0.2884494003787082, 
    "steve_jobs_2015": 0.9619089801793821, 
    "jauja_2014": 0.34173430158052115, 
    "peeping_tom_1960": 0.9570656809781063, 
    "pan_2015": 0.9158586688578472, 
    "the_man_who_would_be_king_1975": 0.9571274168367959, 
    "everest_2015": 0.8720264317180617, 
    "the_big_short_2015": 0.9332035053554041, 
    "the_sum_of_all_fears_2002": 0.7087390168606031, 
    "coal_miners_daughter_1980": 0.8702089855317708, 
    "the_flame_and_the_arrow_1950": 0.9738050229543613, 
    "toy_story_3_2010": 0.8540499235863475, 
    "shrek_2001": 0.8530941513639742, 
    "killer_elite_2011": 0.35278154681139756, 
    "rambo_iii_1988": 0.8029815843320667, 
    "mandela_long_walk_to_freedom_2013": 0.8481208833785354, 
    "philadelphia_1993": 0.1948647857073123, 
    "pale_rider_1985": 0.9336124401913876, 
    "entourage_2015": 0.9549062302451359, 
    "romy_and_micheles_high_school_reunion_1997": 0.8441500112283854, 
    "star_wars_episode_iii_revenge_of_the_sith_2005": 0.9697306303804498, 
    "star_trek_iii_the_search_for_spock_1984": 0.9665753424657534, 
    "the_truth_about_cats_dogs_1996": 0.8792310052152572, 
    "mongol_the_rise_of_genghis_khan_2007": 0.5947734039034072, 
    "bon_voyage_1944": 0.21599402092675635, 
    "2012_2009": 0.8422436459246275, 
    "v_for_vendetta_2005": 0.21670820240546104, 
    "hang_em_high_1968": 0.916654264027385, 
    "back_to_the_future_part_iii_1990": 0.5953978906999041, 
    "mash_1970": 0.21897588069694895, 
    "lethal_weapon_2_1989": 0.48712624584717606, 
    "dolls_1987": 0.23301561549381464, 
    "how_to_train_your_dragon_2_2014": 0.9507575757575758, 
    "poltergeist_2015": 0.8700427960057061, 
    "blackmail_1929": 0.38283723875870806, 
    "total_recall_1990": 0.912644062134625, 
    "rear_window_1954": 0.9610617640983268, 
    "star_wars_the_force_awakens_2015": 0.46145172656800565, 
    "revenge_of_the_pink_panther_1978": 0.24378183997602637, 
    "ran_1985": 0.5336083892040571, 
    "brubaker_1980": 0.9061001342796854, 
    "the_saint_1997": 0.7052845528455285, 
    "star_trek_nemesis_2002": 0.9718476811379464, 
    "cocoon_1985": 0.9252583513578466, 
    "ghost_1990": 0.9170246716582792, 
    "the_asphalt_jungle_1950": 0.9821170520231214, 
    "mystery_science_theater_3000_the_movie_1996": 0.9558325133923691, 
    "batman_1989": 0.9517550889565636, 
    "dinosaur_2000": 0.9380530973451328, 
    "oceans_twelve_2004": 0.9620264888394925, 
    "the_blue_lagoon_1980": 0.6518685023883113, 
    "the_postman_always_rings_twice_1946": 0.9816574795666223, 
    "the_elephant_man_1980": 0.3560702875399361, 
    "star_wars_episode_v_the_empire_strikes_back_1980": 0.9379151416564999, 
    "the_taming_of_the_shrew_1980": 0.9402612826603325, 
    "a_night_at_the_opera_1935": 0.9817545613596601, 
    "sixteen_candles_1984": 0.7744568977445689, 
    "police_academy_5_assignment_miami_beach_1988": 0.9041175614724284, 
    "police_academy_3_back_in_training_1986": 0.8850481004402413, 
    "speed_1994": 0.7865041751297676, 
    "a_shot_in_the_dark_1964": 0.2107466852756455, 
    "carol_2015": 0.8616116476863197, 
    "the_colony_2013": 0.4240155296727676, 
    "fading_gigolo_2013": 0.9159799882283697, 
    "kill_me_three_times_2014": 0.9242466910729372, 
    "east_of_eden_1955": 0.866221392790492, 
    "the_mummy_1999": 0.9079320113314447, 
    "north_by_northwest_1959": 0.9674315321983715, 
    "teenage_mutant_ninja_turtles_1990": 0.9251668255481411, 
    "jaws_the_revenge_1987": 0.6711573790569504, 
    "a_clockwork_orange_1971": 0.9296545105566218, 
    "hell_or_high_water_2016": 0.9256277292576419, 
    "airplane_ii_the_sequel_1982": 0.16946182728410514, 
    "princess_mononoke_1997": 0.16240116521015396, 
    "gravity_2013": 0.8988542078229949, 
    "star_trek_beyond_2016": 0.8388785046728972, 
    "for_your_eyes_only_1981": 0.2801841389412011, 
    "the_spy_who_loved_me_1977": 0.36531025593813293, 
    "beach_blanket_bingo_1965": 0.2980587121212121, 
    "sausage_party_2016": 0.8661934526962602, 
    "i_confess_1953": 0.8279631105544802, 
    "salt_2010": 0.7316645408163265, 
    "the_man_who_knew_too_much_1956": 0.9084322801368485, 
    "apocalypto_2006": 0.6732380482996551, 
    "the_lodger_1927": 0.5591133004926109, 
    "remember_2015": 0.47061873487729, 
    "the_hunger_games_catching_fire_2013": 0.9562323745064861, 
    "batman_the_killing_joke_2016": 0.9669893667671798, 
    "batman_forever_1995": 0.9404016064257028, 
    "chicken_little_2005": 0.846886218198845, 
    "the_day_of_the_jackal_1973": 0.8931572629051621, 
    "kill_bill_vol_1_2003": 0.861439842209073, 
    "back_to_the_future_1985": 0.536457216384302, 
    "alice_in_wonderland_2010": 0.912357286769644, 
    "cast_away_2000": 0.8404693289419514, 
    "spectre_2015": 0.48554913294797686, 
    "the_forest_2016": 0.30306631804135964
    }
    result = []
    for name, score in alignment_scores.items():
        if score >= min_score:
            try:
                result.append(Video.objects.filter(path="movies/"+name+".mp4")[0].id)
            except:
                print(name)
    return result
vids = get_vids_with_high_alignment_score(min_score=0.9)
len(vids)

In [None]:
def get_average_words_speed(vid):
    from rekall.video_interval_collection import VideoIntervalCollection
    from query.models import Shot, Video
    from esper.captions import get_all_segments
    from rekall.temporal_predicates import overlaps
    from rekall.merge_ops import payload_first, payload_plus, merge_named_payload
    
    shots = VideoIntervalCollection.from_django_qs(Shot.objects.annotate(
        duration = ExpressionWrapper((F('max_frame') - F('min_frame')) / F('video__fps'), output_field=FloatField())
    ).filter(
        cinematic=True,
        video_id = vid
    ), with_payload = lambda shot: {'duration': shot.duration})
    if vid not in shots.get_allintervals():
        return None, None, None
    # Load captions
    caption_results = get_all_segments([vid])
    fps = Video.objects.get(id=vid).fps
    captioned_intervals = VideoIntervalCollection({
        video_id: [(
            word[0] * fps, # start frame
            word[1] * fps, # end frame
            word[2]
            ) # payload is the word
            for word in words]
        for video_id, words in caption_results
    })
    
    # Get shots with spoken lines
    shots_with_lines = shots.join(
        captioned_intervals,
        lambda shot_interval, word_interval:
            [(shot_interval.start, shot_interval.end, {
                'duration': shot_interval.payload['duration'],
                'words': [word_interval.payload]
            })],
        predicate=overlaps(),
        working_window=1
    ).coalesce(payload_merge_op=merge_named_payload({
        'duration': payload_first,
        'words': payload_plus
    })).filter_length(min_length=1)
    
    def get_num_words(words_in_shot):
        return sum((len(words) for words in words_in_shot))
    
    # Compute average words spoken per second
    total_num_words = sum((len(intvl.payload) for intvl in captioned_intervals.get_intervallist(vid).intrvls))
    total_time = sum((intvl.payload['duration'] for intvl in shots.get_intervallist(vid).intrvls))
    average_words_per_sec = total_num_words / total_time
    # Compute averaged word speed across shots
    if vid not in shots_with_lines.get_allintervals():
        return None, None, None
    words_per_sec_for_shots = np.array([get_num_words(intvl.payload['words']) / intvl.payload['duration']
                                       for intvl in shots_with_lines.get_intervallist(vid).intrvls])
    return shots_with_lines, average_words_per_sec, words_per_sec_for_shots

result = {}
for vid in tqdm(vids):
    intvls, average, per_shot = get_average_words_speed(vid)
    if intvls is not None:
        result[vid]=(average, per_shot)
        if np.any(per_shot>15):
            i = np.nonzero(per_shot>15)[0][0]
            print(vid, i)
            print(intvls.get_intervallist(vid).intrvls[i])

In [None]:
def scatter(x, y):
    import matplotlib.pyplot as plt
    ax = plt.gca()
    ax.scatter(x,y)
    ax.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)))
    plt.show()
    
def split_data(year, data_year, data_shots):
    def flatten(ds):
        return [d for dd in ds for d in dd]
    d1 = [data_shots[i] for i, y in enumerate(data_year) if y<=year]
    d2 = [data_shots[i] for i, y in enumerate(data_year) if y> year]
    return d1, d2
    
def hist(xs):
    import matplotlib.pyplot as plt
    ax = plt.gca()
    ax.hist(xs, bins=50)
    plt.show()
    
def plot_buckets(data_pair, label_pair, max_val=7):
    old, new = data_pair
    old_label, new_label = label_pair
    num_buckets = len(old)
    width = max_val/num_buckets
    x = np.array([(i+0.5)*width for i in range(0, num_buckets)])
    y1 = old*100
    y2 = new*100
    fig, ax = plt.subplots(figsize=(6.666, 3.333))
    old_handle = ax.bar(x-width/2, y1, width=width/2, align='edge', label=old_label)   
    new_handle = ax.bar(x, y2, width=width/2, align='edge', label=new_label)
    ax.set_xlabel("Words spoken per second in a shot")
    ax.set_ylabel('Average Percentages of Shots')
    #ax.set_xlim(0,7)
    ax.set_title("Distribution of Speaking Speed per shot")
    ax.legend()
    plt.show()
    
def get_normalized_frequencies(data, num_buckets=32, max_val=7):
    num_vids = len(data)
    bucket_proportions = np.zeros((num_vids, num_buckets))
    for i in tqdm(range(num_vids)):
        bucket_width = max_val/num_buckets
        num_seqs = len(data[i])
        unit = 1/num_seqs
        for d in data[i]:
            bucket_index = min(int(d/bucket_width), num_buckets-1)
            bucket_proportions[i, bucket_index] += unit
    normalized_percentages_per_bucket = np.mean(bucket_proportions, axis=0)
    return normalized_percentages_per_bucket

data_vids = list(result.keys())
data_year = [Video.objects.get(id=vid).year for vid in data_vids]
data_average = [result[vid][0] for vid in data_vids]
data_average_shots = [result[vid][1] for vid in data_vids]
scatter(data_year, data_average)
d1, d2= split_data(1980, data_year, data_average_shots)
plot_buckets((get_normalized_frequencies(d1), get_normalized_frequencies(d2)), ("Before 1980", "After 1980"))

# Scratch

In [None]:
Video.objects.get(id=5).fps

In [None]:
shots.filter(lambda intvl:intvl.start==38011).get_intervallist(5)

In [None]:
Video.objects.filter(genres__name='action')[0].genres.all()

In [None]:
ids=list(shots.get_allintervals().keys())
ls=[(v.genres.all()) for v in Video.objects.filter(id__in=ids)]
lls=[l for l in ls if 'action' in [g.name for g in l]]

In [None]:
len(ls)

In [None]:
len(lls)

In [None]:
155/391

In [None]:
Frame.objects.filter(video_id=525, number=159364)

In [None]:
Frame.objects.filter(video_id=14, number=177768)

In [None]:
Frame.objects.filter(video_id=564, number=163126)

In [None]:
Frame.objects.filter(video_id=557, number=140614)

In [None]:
import numpy as np
np.mean([]) > 0

In [None]:
Frame.objects.filter(brightness=None).count()

In [None]:
intvl = dialogs.filter(lambda intvl:intvl.start==19324).get_allintervals()[469].intrvls[0]
intvl

In [None]:
fps_map = dict((i, Video.objects.get(id=i).fps) for i in [27])
def has_few_words(intvl):
    MAX_NUM_WORDS_PER_SECOND = 1

    p = intvl.payload
    n_words = len(p['words'])
    time = (intvl.end-intvl.start) / fps_map[p['shots'][0]['vid']]
    return n_words / time <= MAX_NUM_WORDS_PER_SECOND

has_few_words(intvl)


In [None]:
Video.objects.get(id=12)

In [None]:
fps_map={}
for v in Video.objects.all():
    fps_map[v.id] = v.fps
    
def has_many_words(intvl):
    MIN_NUM_WORDS_PER_SECOND = 1
    p = intvl.payload
    n_words = len(p['words'])
    time = (intvl.end-intvl.start) / fps_map[p['shots'][0]['vid']]
    return n_words / time > MIN_NUM_WORDS_PER_SECOND

esper_widget(intrvllists_to_result_with_objects(
    dialogs.filter(has_many_words).get_allintervals(), lambda p,v:[], stride=1),
             show_middle_frame=False, disable_caption=True)

In [None]:
Video.objects.get(id=1)