In [1]:
import pickle
import numpy as np
import pandas as pd
from collections import Counter
import math
from sklearn.svm import SVC

In [2]:
def fill_dropped_frame(name_folder, dropped_frames, df_old):
    # fill in the probabilities for dropped frames
    name_labels = ['prob_Neutral','prob_Anger','prob_Disgust','prob_Fear','prob_Happiness','prob_Sadness','prob_Surprise']
    df_new__ = pd.DataFrame(columns=['name_file','emotion','frame'] + name_labels)

    for i in name_folder:
        df_n = dropped_frames.loc[dropped_frames.name_file==i].copy()
        all_frame_n = df_n.frame.tolist()
        all_prob_n = pd.DataFrame(df_n.drop(['name_file', 'emotion', 'frame'], axis=1)).values
        for j in all_frame_n:
            if j - 1 not in all_frame_n:
                if j - 1 == 0:
                    c_df = df_old.loc[df_old.name_file == i].copy()
                    c_p = c_df.iloc[0,:].drop(['name_file', 'emotion', 'frame']).values
                    all_prob_n[all_frame_n.index(j)] = c_p
                else:
                    c_df = df_old[(df_old.name_file == i) & (df_old.frame <= j)]
                    if len(c_df) == 0:
                        c_df = df_old[(df_old.name_file == i) & (df_old.frame > j)]
                        c_p = c_df.iloc[0,:].drop(['name_file', 'emotion', 'frame']).values
                    else:
                        c_p = c_df.iloc[-1,:].drop(['name_file', 'emotion', 'frame']).values
                    all_prob_n[all_frame_n.index(j)]=c_p
            else:
                all_prob_n[all_frame_n.index(j)] = all_prob_n[all_frame_n.index(j-1)]

        for j in range(len(name_labels)):
            df_n[name_labels[j]]=all_prob_n[:,j].tolist()

        df_new__ = pd.concat([df_new__, df_n], ignore_index=True)
    return df_new__

def df_full(dict_, df):
    # create new df
    name_prob_emotion = ['prob_Neutral','prob_Anger', 'prob_Disgust','prob_Fear','prob_Happiness', 'prob_Sadness','prob_Surprise']
    df_all = pd.DataFrame(columns=['name','name_img','true_emotion'] + name_prob_emotion)
    name_new = []
    frame_new = []
    prob_new = []
    emotion_true = []
    for i in range(len(pred)):
        curr_len = len(dict_['index'][i])
        curr_df = df.loc()[df.index.isin(dict_['index'][i])].copy()
        curr_frame = curr_df.name_img.tolist()
        curr_emotion = curr_df.emotion.tolist()
        curr_name = curr_df.name_folder.tolist()
        curr_prob = np.asarray([pred[i].tolist()]*curr_len)
        curr_df = pd.DataFrame(columns=['name','name_img','true_emotion'] + name_prob_emotion)
        curr_df.name = curr_name
        curr_df.name_img = curr_frame
        curr_df.true_emotion = curr_emotion
        for i in range(len(name_prob_emotion)):
            curr_df[name_prob_emotion[i]]=curr_prob[:,i].tolist()
        # union df
        df_all = pd.concat([df_all, curr_df], ignore_index=True)
    return df_all

def df_group(df_all):
    # grouping probabilities by frames
    df_all_group = df_all.groupby(['name', 'name_img']).agg({'true_emotion':'unique', 'prob_Neutral':'mean','prob_Anger':'mean','prob_Disgust':'mean',
                                                'prob_Fear':'mean','prob_Happiness':'mean', 'prob_Sadness':'mean',
                                                'prob_Surprise':'mean'})

    df_all_group[['true_emotion']] = df_all_group[['true_emotion']].astype(int)

    df_all_group.reset_index(inplace=True)
    df_all_group['name_img'] = df_all_group['name_img'].astype(str)

    names_split = df_all_group.name_img.str.split('.')
    res = names_split.str[0].astype(int)
    # create a new column 'frame'
    df_all_group['frame'] = res
    # rename columns
    df_all_group.rename(columns={'name': 'name_file','true_emotion':'emotion'}, inplace=True)
    # sort by name and frame number
    df_all_group = df_all_group.sort_values(by=['name_file', 'frame'])
    # reset indexes
    df_all_group.reset_index(drop=True)
    # remove unnecessary column
    df_all_group = pd.DataFrame(df_all_group.drop(['name_img'], axis=1))
    # set a new order
    df_all_group = df_all_group[['name_file','emotion','frame','prob_Neutral','prob_Anger','prob_Disgust','prob_Fear','prob_Happiness','prob_Sadness','prob_Surprise']]
    return df_all_group

def df_dropprd_frames(dict_not_frame, name_folder):
    # create df from dropprd frames
    name_file = []
    name_frame = []
    for k, v in dict_not_frame.items():
        if k in name_folder:
            name_file += [k]*len(v)
            name_frame += v

    zero = np.zeros((len(name_file),7))
    name_labels = ['prob_Neutral','prob_Anger','prob_Disgust','prob_Fear','prob_Happiness','prob_Sadness','prob_Surprise']

    dropped_frames = pd.DataFrame(columns=['name_file','emotion', 'frame'] + name_labels)

    dropped_frames.name_file = name_file
    dropped_frames.frame = name_frame

    for i in range(len(name_labels)):
        dropped_frames[name_labels[i]]=zero[:,i].tolist()

    dropped_frames[['frame']] = dropped_frames[['frame']].astype(int)
    return dropped_frames

In [3]:
path_data = 'data_sequence_svm/'
path_model = 'models/svm/'

In [4]:
# load data
dict_test = pickle.load(open(path_data + 'dict_valid_window_4_s.pickle', 'rb'))
# load model
loaded_model = pickle.load(open(path_model + 'model_log_weight.sav', 'rb'))

In [5]:
pred = loaded_model.predict_proba(dict_test['feature'])

In [6]:
filename = 'path_images_valid.csv'
df_test = pd.read_csv(path_data + filename, dtype={'name_folder': str})

names_ids_1 = df_test.path_images.str.split('/')
res = names_ids_1.str[1]
df_test['name_img'] = res

df_all = df_full(dict_test, df_test)
df_all_group = df_group(df_all)

In [7]:
# create a dataframe with dropped frames
name_folder = Counter(df_all_group.name_file.tolist()).keys()
path_counter_frame = 'files_needed_to_get_final_probabilities/counter_frame.csv'
counter_frame = pd.read_csv(path_counter_frame)
name_counter_frame = [i.split('.')[0] for i in counter_frame.name_video.tolist()]
total_counter_frame = counter_frame.total_frame.tolist()

dict_not_frame = pickle.load(open('files_needed_to_get_final_probabilities/frame_with_face_not_detected.pickle', 'rb'))
dropped_frames = df_dropprd_frames(dict_not_frame, name_folder)

In [8]:
# fill in the probabilities for dropped frames
df_new__ = fill_dropped_frame(name_folder, dropped_frames, df_all_group)

# merge two dataframes
df_new_union = pd.concat([df_all_group, df_new__], ignore_index=True)
df_new_union_sort = df_new_union.sort_values(by=['name_file', 'frame'])

In [9]:
# save probability
for i in name_folder:
    if len(i.split('_')) > 1 and len(i.split('_')[1]) > 1:
        counter_i = total_counter_frame[name_counter_frame.index(i.split('_')[0])]
    else:
        counter_i = total_counter_frame[name_counter_frame.index(i)]
    c_df = df_new_union_sort.loc[df_new_union_sort.name_file == i].copy()
    c_p = pd.DataFrame(c_df.drop(['name_file','emotion'], axis=1)).values
    df_recording = pd.DataFrame(data=c_p[:counter_i],columns=['frame_id','neutral_probability', 'anger_probability','disgust_probability','fear_probability','happiness_probability','sadness_probability','surprise_probability'])
    filename = '{}.txt'.format(i)
    df_recording.to_csv('probability/svm/test/' + filename, index=False)