# scene_clustering
This notebook contains inital code for clustering frames into shots, identifying the A/B/A/B pattern, and using the image classifier model to see if they're MCUs

In [1]:
import sys
import os
sys.path.append('site-packages') # manually put all packages/libraries into this folder
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras import models
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering

Using TensorFlow backend.


## Clustering
### For POC, designating a specific scene's worth of frames

In [2]:
# choose film and frames
film = 'hustle'
frame_choice = list(range(600, 1000)) # The Hustle, threshold 3100, +/- ~100 frames either side

In [3]:
# establish folder for this film
dialogue_folder = os.path.join('dialogue_frames', film)

print('There are', len(os.listdir(dialogue_folder)), 'images in the folder')
print('Selected', len(frame_choice), 'of those frames')

There are 5877 images in the folder
Selected 400 of those frames


In [4]:
model = VGG16(weights='imagenet', include_top=False)
model.summary()

vgg16_feature_list = []


for x in frame_choice:
    img_path = dialogue_folder + '/' + film + '_frame'+ str(x) + '.jpg'
    img = image.load_img(img_path, target_size=(256, 256))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)

    vgg16_feature = model.predict(img_data)
    vgg16_feature_np = np.array(vgg16_feature)
    vgg16_feature_list.append(vgg16_feature_np.flatten())

    x += 1

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

In [5]:
# convert to NumPy array and verify shape
vgg16_feature_list_np = np.array(vgg16_feature_list)
vgg16_feature_list_np.shape

(400, 32768)

In [24]:
hac = AgglomerativeClustering(n_clusters = None, distance_threshold = 2900).fit(vgg16_feature_list_np)
hac_labels = hac.labels_
print('Number of clusters:', hac.n_clusters_)
print(hac_labels)

Number of clusters: 39
[ 5  5 29 29 29 23 23 23 23 23 23 23 23 23 23  5  5  5  5  5  5  5  0  0
  0  0 20 20 20 20  8  8  8  8  8  8  8  8  8 11 11 11 28 28 28 11 11 11
  2  2  2  2  2 25 25 17 17 31 31 31 31 31 31 26 26 26 14 14 14 14 26 26
 26 26 26 35 31 31 31 31 14 14  2  2  2 17 17 17 17 10 10 10  4  4 30 30
 30 30 10 10  4  4 17 17 17 25 25 25 25  2  2  2 35 17 17 17 17 17 17 17
 35 35 35 35 30 30  4  4  4  4 30 30 30 30  4  4 26 26 26 26 26 14 14 14
 14 14 30 30 30 35 35 35 35 30 30 30 35 30 10 10 10 12 12 12 27 27 27 27
 27 22 22 22 22 22 22 22 22  8  8  8  8  8 27 27 27 27 27 27 27 27  0  0
  0  0 27  0  0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  9  9
  9  9  9  9 33 33 33  9  9 33  9  9  9  9  9  9  9  9 33 33 33 33 33  9
  9  9  9  9  9  9  9  9 21 21 21 21 21 21 21 21 21 21 21 33 33 33 33 33
 16 16 18 33 33  9  9 33 33 33  9  9  9  9  9 18 18 18 18 18  0  0  0 33
 33 33 16 16 16 16 16 16  9  9  9  9 33 33  9  9  9  9 33 33 33 33  9  9
  9  9  9  9  9  9 19 19 19 

## Load Saved Model and Identify MCUs

In [25]:
tuned_model = models.load_model('saved_models/tuned_model')

In [26]:
image_list = []
for x in frame_choice:
    image_list.append(img_to_array(load_img(dialogue_folder + '/' + film + '_frame'+ str(x) + '.jpg', target_size = (128, 128), color_mode = 'grayscale')))

In [27]:
image_array = np.array(image_list)
y_pred = tuned_model.predict_classes(image_array)

In [28]:
# the model's predict_classes method creates a NumPy array of arrays; this converts it to a list of 0/1 integers
y_pred_values = []
for prediction in y_pred:
    y_pred_values.append(prediction[0])

In [11]:
def get_shot_ids(frame_choice, hac_labels):
    shot_id = 0
    shot_id_list = []
    prev_frame = 1000

    for frame_file, cluster in zip(frame_choice, hac_labels):
        if cluster != prev_frame and prev_frame != 1000:
            shot_id += 1
        shot_id_list.append(shot_id)
        # print(frame_file, '\t', mcu_flag, '\t', cluster, '\t', prev_frame, '\t', shot_id,'\tend')
        prev_frame = cluster
    
    return shot_id_list

In [29]:
shot_id_list = get_shot_ids(frame_choice, hac_labels)

In [30]:
scene_df = pd.DataFrame(zip(frame_choice, hac_labels, shot_id_list, y_pred_values), columns=['frame_file', 'cluster', 'shot_id', 'mcu'])
scene_df.head(7)

Unnamed: 0,frame_file,cluster,shot_id,mcu
0,600,5,0,0
1,601,5,0,0
2,602,29,1,0
3,603,29,1,0
4,604,29,1,0
5,605,23,2,0
6,606,23,2,0


# Scene Pattern Algorithm

1. Check all clusters for an A/B/A/B pattern of two clusters
2. Check if the two clusters in each pattern are MCUs, and discard non-MCU patterns
3. Get the earliest and latest frames with either speaker cluster, to determine the scene's anchor frames
4. Get all clusters that lie in between the anchors, to determine the cutaway clusters
5. Expand the scene in either direction by checking for adjacent cutaway clusters before the starting anchor and after the ending anchor.

### Checking for A/B/A/B cluster pairs

In [74]:
prev_clust_1 = 1001
prev_clust_1_list = []
prev_clust_2 = 1002
prev_clust_2_list = []
prev_clust_3 = 1003
prev_clust_3_list = []
prev_shot_id = -1
alternate_a_list = []
alternate_b_list = []

for frame_file, cluster, mcu_flag, shot_id in zip(frame_choice, hac_labels, y_pred_values, shot_id_list):
    if cluster == prev_clust_2 and prev_clust_1 == prev_clust_3:
        alternate_a_list.append(min(cluster, prev_clust_1))
        alternate_b_list.append(max(cluster, prev_clust_1))

    if shot_id != prev_shot_id:
        prev_shot_id = shot_id
        prev_clust_3 = prev_clust_2
        prev_clust_2 = prev_clust_1
        prev_clust_1 = cluster
    prev_clust_1_list.append(prev_clust_1)
    prev_clust_2_list.append(prev_clust_2)
    prev_clust_3_list.append(prev_clust_3)
    # print(frame_file, '\t', mcu_flag, '\t', cluster,'\t', shot_id, '\t', prev_shot_id, '\t', prev_clust_1, '\t', prev_clust_2, '\t', prev_clust_3, '\tend')

alternating_pairs = []
for a, b, in zip(alternate_a_list, alternate_b_list):
    if [int(a), int(b)] not in alternating_pairs:
        alternating_pairs.append([int(a), int(b)])
alternating_pairs

[[4, 30], [30, 35], [0, 27], [9, 33]]

### Checking both clusters if they're MCUs

In [87]:
speaker_pairs = []
print('cluster\t', 'count\t', 'mcu probability')
for pair in alternating_pairs:
    mean_a = scene_df.loc[scene_df['cluster'] == pair[0]]['mcu'].mean()
    mean_b = scene_df.loc[scene_df['cluster'] == pair[1]]['mcu'].mean()
    print(pair[0], '\t', scene_df.loc[scene_df['cluster'] == pair[0]]['mcu'].count(), '\t', '{0:.2f}%'.format(mean_a * 100))
    print(pair[1], '\t', scene_df.loc[scene_df['cluster'] == pair[1]]['mcu'].count(), '\t', '{0:.2f}%'.format(mean_b * 100))
    if mean_a > .5 and mean_b > .5:
        print('Passes MCU check')
        speaker_pairs.append(pair)
    else:
        print('Fails MCU check')
    print()
speaker_pairs

cluster	 count	 mcu probability
4 	 10 	 0.00%
30 	 17 	 88.24%
Fails MCU check

30 	 17 	 88.24%
35 	 11 	 81.82%
Passes MCU check

0 	 30 	 33.33%
27 	 14 	 100.00%
Fails MCU check

9 	 48 	 97.92%
33 	 28 	 100.00%
Passes MCU check



[[30, 35], [9, 33]]

### Establishing first and last frames of anchor clusters

In [89]:
pair = speaker_pairs[0]
pair

[30, 35]

In [77]:
scene_df.loc[(scene_df['cluster'] == pair[0]) | (scene_df['cluster'] == pair[1])].head(3)

Unnamed: 0,frame_file,cluster,shot_id,mcu
75,675,35,17,0
94,694,30,24,0
95,695,30,24,1


In [58]:
scene_df.loc[(scene_df['cluster'] == pair[0]) | (scene_df['cluster'] == pair[1])].tail(3)

Unnamed: 0,frame_file,cluster,shot_id,mcu
155,755,30,41,1
156,756,35,42,1
157,757,30,43,1


In [59]:
anchor_start = scene_df.loc[(scene_df['cluster'] == pair[0]) | (scene_df['cluster'] == pair[1])].frame_file.min()
anchor_end = scene_df.loc[(scene_df['cluster'] == pair[0]) | (scene_df['cluster'] == pair[1])].frame_file.max()
print(anchor_start, anchor_end)

675 757


### Finding cutaways and expanding the scene's beginning and end

In [56]:
cutaways = scene_df.loc[(scene_df['frame_file'] > anchor_start) & (scene_df['frame_file'] < anchor_end)].cluster.unique()
cutaways = cutaways[cutaways != pair[0]]
cutaways = cutaways[cutaways != pair[1]]
cutaways

array([31, 14,  2, 17, 10,  4, 25, 26])

In [66]:
scene_start = anchor_start
min_flag = 0

while min_flag == 0:
    try:
        if int(scene_df.loc[scene_df['frame_file'] == (scene_start - 1)].cluster) in cutaways:
            scene_start -= 1
        else:
            min_flag = 1
    except TypeError: # error if hitting the beginning of the frame list
        min_flag = 1
scene_start

648

In [68]:
scene_end = anchor_end
max_flag = 0
while max_flag == 0:
    try:
        if int(scene_df.loc[scene_df['frame_file'] == (scene_end + 1)].cluster) in cutaways:
            scene_end += 1
        else:
            max_flag = 1
    except TypeError: # error if hitting the end of the frame list
        max_flag = 1
scene_end

760

# Functions

In [102]:
alternating_pairs = get_alternating_pairs(frame_choice, hac_labels, y_pred_values, shot_id_list)
alternating_pairs

[[4, 30], [30, 35], [0, 27], [9, 33]]

In [105]:
speaker_pairs = mcu_check(alternating_pairs)
speaker_pairs

cluster	 count	 mcu probability
4 	 10 	 0.00%
30 	 17 	 88.24%
Fails MCU check

30 	 17 	 88.24%
35 	 11 	 81.82%
Passes MCU check

0 	 30 	 33.33%
27 	 14 	 100.00%
Fails MCU check

9 	 48 	 97.92%
33 	 28 	 100.00%
Passes MCU check



[[30, 35], [9, 33]]

In [111]:
scenes = expand_scenes(speaker_pairs, scene_df)
scenes

Speaker A and B clusters: [30, 35]
First, last frames of speakers A and B: 675 757
Cutaway clusters: [31 14  2 17 10  4 25 26]
Speaker A and B clusters: [9, 33]
First, last frames of speakers A and B: 814 917
Cutaway clusters: [21 16 18  0]


[(648, 760), (814, 917)]

In [101]:
def get_alternating_pairs(frame_choice, hac_labels, y_pred_values, shot_id_list):
    
    prev_clust_1 = 1001
    prev_clust_1_list = []
    prev_clust_2 = 1002
    prev_clust_2_list = []
    prev_clust_3 = 1003
    prev_clust_3_list = []
    prev_shot_id = -1
    alternate_a_list = []
    alternate_b_list = []

    for frame_file, cluster, mcu_flag, shot_id in zip(frame_choice, hac_labels, y_pred_values, shot_id_list):
        if cluster == prev_clust_2 and prev_clust_1 == prev_clust_3:
            alternate_a_list.append(min(cluster, prev_clust_1))
            alternate_b_list.append(max(cluster, prev_clust_1))

        if shot_id != prev_shot_id:
            prev_shot_id = shot_id
            prev_clust_3 = prev_clust_2
            prev_clust_2 = prev_clust_1
            prev_clust_1 = cluster
        prev_clust_1_list.append(prev_clust_1)
        prev_clust_2_list.append(prev_clust_2)
        prev_clust_3_list.append(prev_clust_3)
        # print(frame_file, '\t', mcu_flag, '\t', cluster,'\t', shot_id, '\t', prev_shot_id, '\t', prev_clust_1, '\t', prev_clust_2, '\t', prev_clust_3, '\tend')

    alternating_pairs = []
    for a, b, in zip(alternate_a_list, alternate_b_list):
        if [int(a), int(b)] not in alternating_pairs:
            alternating_pairs.append([int(a), int(b)])
    
    return alternating_pairs

In [104]:
def mcu_check(alternating_pairs):
    speaker_pairs = []
    print('cluster\t', 'count\t', 'mcu probability')
    
    for pair in alternating_pairs:
        mean_a = scene_df.loc[scene_df['cluster'] == pair[0]]['mcu'].mean()
        mean_b = scene_df.loc[scene_df['cluster'] == pair[1]]['mcu'].mean()
        print(pair[0], '\t', scene_df.loc[scene_df['cluster'] == pair[0]]['mcu'].count(), '\t', '{0:.2f}%'.format(mean_a * 100))
        print(pair[1], '\t', scene_df.loc[scene_df['cluster'] == pair[1]]['mcu'].count(), '\t', '{0:.2f}%'.format(mean_b * 100))
        if mean_a > .5 and mean_b > .5:
            print('Passes MCU check')
            speaker_pairs.append(pair)
        else:
            print('Fails MCU check')
        print()
    
    return speaker_pairs

In [110]:
def expand_scenes(speaker_pairs, scene_df):
    scenes = []

    for pair in speaker_pairs:
        anchor_start = scene_df.loc[(scene_df['cluster'] == pair[0]) | (scene_df['cluster'] == pair[1])].frame_file.min()
        anchor_end = scene_df.loc[(scene_df['cluster'] == pair[0]) | (scene_df['cluster'] == pair[1])].frame_file.max()
        cutaways = scene_df.loc[(scene_df['frame_file'] > anchor_start) & (scene_df['frame_file'] < anchor_end)].cluster.unique()
        cutaways = cutaways[cutaways != pair[0]]
        cutaways = cutaways[cutaways != pair[1]]
        print('Speaker A and B clusters:', pair)
        print('First, last frames of speakers A and B:', anchor_start, anchor_end)
        print('Cutaway clusters:', cutaways)

        scene_start = anchor_start
        min_flag = 0

        while min_flag == 0:
            try:
                if int(scene_df.loc[scene_df['frame_file'] == (scene_start - 1)].cluster) in cutaways:
                    scene_start -= 1
                else:
                    min_flag = 1
            except TypeError: # error if hitting the beginning of the frame list
                min_flag = 1

        scene_end = anchor_end
        max_flag = 0
        while max_flag == 0:
            try:
                if int(scene_df.loc[scene_df['frame_file'] == (scene_end + 1)].cluster) in cutaways:
                    scene_end += 1
                else:
                    max_flag = 1
            except TypeError: # error if hitting the end of the frame list
                max_flag = 1

        scenes.append((scene_start, scene_end))
            
    return scenes