In [None]:
import tensorflow as tf
import pandas as pd
import glob
import cv2
import numpy as np
import pymongo

In [None]:
from keras.applications.inception_v3 import InceptionV3

### open databes and set a collection to store features and labels in

In [None]:
mc = pymongo.MongoClient()
db = mc['model']
deep_features_db = db['labels_deep_features']

## import labels csv and set neural network parameters

In [None]:
model = InceptionV3(include_top=False, weights='imagenet', input_shape=(299,299,3), pooling='avg')

In [None]:
df = pd.read_csv('movie_genre_matrix.csv')

In [None]:
#takes movie_title, sets a list of strings with all file names, then returns a numpy array version
def png_to_numpy(movie_name):
    movie_file_strings = glob.glob(f'trailer_test/{movie_name}*.png')
    movie_file_strings.sort()
    movie_file_strings = movie_file_strings[len(movie_file_strings)//3:-len(movie_file_strings)//3]
    
    return [cv2.imread(movie) for movie in movie_file_strings]
    

In [None]:
#input one frame at a time, run it through a neural network, return back 2048 deep features
def get_deep_features(frame):
    prediction = model.predict(frame.reshape((-1, 299, 299, 3)))
    return prediction

In [None]:
#one dataframe as argument, get all frames by title, extract features. store them in mongodb with their labels
def store_deep_features(movie_df):
    movie_list = movie_df['movie_title']
    
    for movie in movie_list:
        frame_id =  0
        movie_labels = movie_df[movie_df['movie_title'] == movie].to_dict('records')[0]
        movie_frames = png_to_numpy(movie)
        for frame in movie_frames:
            frame_id += 1
            deep_features = get_deep_features(frame)[0].tolist()
            features_dict = {str(i): feature for i, feature in enumerate(deep_features)}
            
            final_dict = {**movie_labels, **features_dict, 'frame_id': movie+"_"+str(frame_id)}
            
            deep_features_db.insert_one(final_dict)

## run single function to extract and store deep features in mongodb

In [None]:
store_deep_features(df)