# scene_clustering
This notebook contains inital code for clustering frames into shots, identifying the A/B/A/B pattern, and using the image classifier model to see if they're MCUs

In [63]:
import sys
import os
sys.path.append('site-packages') # manually put all packages/libraries into this folder
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras import models
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering

## Clustering
### For POC, designating a specific scene's worth of frames

In [13]:
# input film and frame 
film = 'booksmart'
frame_choice = list(range(1001, 1163)) # good example for Booksmart!!! 6 clusters, 2500 distance_threshold
# frames = list(range(1619, 1641))
# frames = list(range(1710, 1901))

In [18]:
# establish folder for this film
dialogue_folder = os.path.join('dialogue_frames', film)

print('There are', len(os.listdir(dialogue_folder)), 'images in the folder')
print('Selected', len(frame_choice), 'of those frames')

There are 6395 images in the folder
Selected 162 of those frames


In [19]:
model = VGG16(weights='imagenet', include_top=False)
model.summary()

vgg16_feature_list = []


for x in frame_choice:
    img_path = dialogue_folder + '/' + film + '_frame'+ str(x) + '.jpg'
    img = image.load_img(img_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)

    vgg16_feature = model.predict(img_data)
    vgg16_feature_np = np.array(vgg16_feature)
    vgg16_feature_list.append(vgg16_feature_np.flatten())

    print(vgg16_feature.shape)

    x += 1

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0     

In [5]:
vgg16_feature_list_np = np.array(vgg16_feature_list)

In [6]:
vgg16_feature_list_np.shape

(162, 25088)

In [6]:
# not used, since k clusters needs to be pre-defined
kmeans = KMeans(n_clusters=3, random_state=0).fit(vgg16_feature_list_np)

print(kmeans.labels_)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 1 1 0 0 0 0
 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [7]:
hac = AgglomerativeClustering(n_clusters = None, distance_threshold = 2500).fit(vgg16_feature_list_np)
print('Number of clusters:', hac.n_clusters_)
print(hac.labels_)

Number of clusters: 6
[3 3 3 1 1 1 1 2 2 2 2 2 2 1 1 2 0 0 0 0 0 2 2 2 2 1 1 1 1 1 2 1 1 1 1 1 1
 2 2 2 2 2 2 2 3 3 3 3 1 1 2 1 1 2 2 1 1 1 2 2 1 1 2 2 1 2 2 2 2 2 1 1 1 1
 2 2 2 1 2 2 2 1 1 2 0 0 0 0 0 0 0 2 2 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1
 2 1 2 2 1 5 5 5 4 5 4 4 4 4 4 5 5 5 5 5 4 4 4 4 4 4 5 5 4 4 5 5 4 5 5 5 5
 5 5 4 4 4 4 4 4 5 5 0 0 0 0]


In [9]:
# identify clusters for each frame, no longer needed since dataframe built below
zip(hac.labels_, frames)
for frame, label in zip(frames, hac.labels_):
    print(frame, label)

1001 3
1002 3
1003 3
1004 1
1005 1
1006 1
1007 1
1008 2
1009 2
1010 2
1011 2
1012 2
1013 2
1014 1
1015 1
1016 2
1017 0
1018 0
1019 0
1020 0
1021 0
1022 2
1023 2
1024 2
1025 2
1026 1
1027 1
1028 1
1029 1
1030 1
1031 2
1032 1
1033 1
1034 1
1035 1
1036 1
1037 1
1038 2
1039 2
1040 2
1041 2
1042 2
1043 2
1044 2
1045 3
1046 3
1047 3
1048 3
1049 1
1050 1
1051 2
1052 1
1053 1
1054 2
1055 2
1056 1
1057 1
1058 1
1059 2
1060 2
1061 1
1062 1
1063 2
1064 2
1065 1
1066 2
1067 2
1068 2
1069 2
1070 2
1071 1
1072 1
1073 1
1074 1
1075 2
1076 2
1077 2
1078 1
1079 2
1080 2
1081 2
1082 1
1083 1
1084 2
1085 0
1086 0
1087 0
1088 0
1089 0
1090 0
1091 0
1092 2
1093 2
1094 1
1095 1
1096 1
1097 1
1098 1
1099 1
1100 1
1101 1
1102 2
1103 2
1104 1
1105 1
1106 1
1107 1
1108 1
1109 1
1110 1
1111 1
1112 2
1113 1
1114 2
1115 2
1116 1
1117 5
1118 5
1119 5
1120 4
1121 5
1122 4
1123 4
1124 4
1125 4
1126 4
1127 5
1128 5
1129 5
1130 5
1131 5
1132 4
1133 4
1134 4
1135 4
1136 4
1137 4
1138 5
1139 5
1140 4
1141 4
1142 5
1143 5

## Load Saved Model and Identify MCUs

In [12]:
tuned_model = models.load_model('saved_models/tuned_model')

In [None]:
# create image data from frame_choice
test_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(dialogue_folder, target_size=(128, 128), color_mode = 'grayscale', batch_size = 1000)

In [None]:
class_labels = ['Non-MCU', 'MCU']
test_images, test_labels = next(test_generator)
test_y = np.reshape(test_labels[:,0], (2285,1))
print ("test_images shape: " + str(test_images.shape))
print ("test_labels shape: " + str(test_labels.shape))

In [31]:
x = 1023
sample_image = img_to_array(load_img(dialogue_folder + '/' + film + '_frame'+ str(x) + '.jpg', target_size = (128, 128), color_mode = 'grayscale'))

In [32]:
sample_image.shape

(128, 128, 1)

In [40]:
x_list = [1023, 1024]
sample_list = []
for x in x_list:
    sample_list.append(img_to_array(load_img(dialogue_folder + '/' + film + '_frame'+ str(x) + '.jpg', target_size = (128, 128), color_mode = 'grayscale')))

In [48]:
sample_list = []
for x in frame_choice:
    sample_list.append(img_to_array(load_img(dialogue_folder + '/' + film + '_frame'+ str(x) + '.jpg', target_size = (128, 128), color_mode = 'grayscale')))

In [49]:
sample_array = np.array(sample_list)

In [50]:
y_pred = tuned_model.predict_classes(sample_array)

In [61]:
for frame, cluster, prediction in zip(frame_choice, hac.labels_, list(y_pred)):
    print(frame, cluster, prediction[0])


1001 3 0
1002 3 0
1003 3 0
1004 1 1
1005 1 1
1006 1 1
1007 1 1
1008 2 1
1009 2 1
1010 2 1
1011 2 1
1012 2 1
1013 2 1
1014 1 1
1015 1 1
1016 2 1
1017 0 1
1018 0 1
1019 0 1
1020 0 1
1021 0 0
1022 2 1
1023 2 1
1024 2 1
1025 2 1
1026 1 1
1027 1 1
1028 1 1
1029 1 1
1030 1 1
1031 2 1
1032 1 1
1033 1 1
1034 1 1
1035 1 1
1036 1 1
1037 1 1
1038 2 1
1039 2 1
1040 2 1
1041 2 1
1042 2 1
1043 2 1
1044 2 1
1045 3 0
1046 3 0
1047 3 0
1048 3 0
1049 1 1
1050 1 1
1051 2 1
1052 1 0
1053 1 1
1054 2 1
1055 2 1
1056 1 1
1057 1 1
1058 1 1
1059 2 1
1060 2 1
1061 1 1
1062 1 1
1063 2 1
1064 2 1
1065 1 1
1066 2 1
1067 2 1
1068 2 1
1069 2 1
1070 2 1
1071 1 1
1072 1 1
1073 1 1
1074 1 1
1075 2 1
1076 2 1
1077 2 1
1078 1 1
1079 2 1
1080 2 1
1081 2 1
1082 1 1
1083 1 1
1084 2 1
1085 0 1
1086 0 1
1087 0 1
1088 0 1
1089 0 1
1090 0 1
1091 0 1
1092 2 1
1093 2 1
1094 1 1
1095 1 1
1096 1 1
1097 1 0
1098 1 1
1099 1 1
1100 1 1
1101 1 1
1102 2 1
1103 2 1
1104 1 0
1105 1 0
1106 1 0
1107 1 0
1108 1 0
1109 1 0
1110 1 0
1111 1 0
1

In [72]:
y_pred_values = []
for prediction in y_pred:
    y_pred_values.append(prediction[0])

In [75]:
scene_df = pd.DataFrame(zip(frame_choice, hac.labels_, y_pred_values), columns=['frame_file', 'cluster', 'mcu'])

In [93]:
scene_df

Unnamed: 0,frame_file,cluster,mcu
0,1001,3,0
1,1002,3,0
2,1003,3,0
3,1004,1,1
4,1005,1,1
5,1006,1,1
6,1007,1,1
7,1008,2,1
8,1009,2,1
9,1010,2,1


In [80]:
scene_df.loc[scene_df['cluster'] == 3]['mcu'].mean()

0.6190476190476191

In [83]:
for x in range(0,6):
    print(x, scene_df.loc[scene_df['cluster'] == x]['mcu'].mean())

0 0.75
1 0.8076923076923077
2 1.0
3 0.0
4 0.6190476190476191
5 0.8571428571428571


In [86]:
scene_df.loc[scene_df['cluster'] == 3]

Unnamed: 0,frame_file,cluster,mcu
0,1001,3,0
1,1002,3,0
2,1003,3,0
44,1045,3,0
45,1046,3,0
46,1047,3,0
47,1048,3,0


In [92]:
pd.options.display.max_rows=200

In [90]:
scene_df.loc[scene_df['mcu'] == 1]

Unnamed: 0,frame_file,cluster,mcu
3,1004,1,1
4,1005,1,1
5,1006,1,1
6,1007,1,1
7,1008,2,1
8,1009,2,1
9,1010,2,1
10,1011,2,1
11,1012,2,1
12,1013,2,1
