In [None]:
import pandas as pd
import pydub
import os
import tqdm
from utils import get_end_from_start, get_start_end_from_file
from data import read_interview, get_features
import matplotlib.pyplot as plt
import numpy as np
import pickle
from lime import lime_tabular    
import dill

pydub.AudioSegment.converter = r"C:/Users/Kinza/anaconda3/envs/pie/Library/bin/ffmpeg.exe" #CHANGE THIS

# Load videos and extract features

In [None]:
# Load videos and calculate feats
video_folder = 'predict/' #CHANGE THIS. Name of the folder containing videos.
df_name = 'data/times.xlsx'  #CHANGE THIS. Name of an Excel containing columns 'email','question','start' and 'end'
output_file = "data/audio_lexic.csv" # CHANGE THIS.

RECALC_FEATS = False #CHANGE THIS TO TRUE IF YOU HAVE NEW VIDEOS

if RECALC_FEATS:
    filenames = tqdm(os.listdir(video_folder))
    df_startend = get_start_end_from_file(df_name)

    interviews = []
    for f in filenames : 
        interviews.append(read_interview(video_folder,df_startend,f))

    # Tidy feats and save to csv so you don't have to recalculate everything
    feats = [item for sublist in interviews for item in sublist]
    feats = get_features(feats)
    feats.to_csv(output_file)
else:
    feats = pd.read_csv(output_file)
    feats = feats.set_index('id')

In [None]:
feats.head()

# Visualize features

In [None]:
from visu import *
index_candidate = 0
index_feature = 0
fts = feats.iloc[index_candidate,index_feature] 
heatmap_vizualisation(fts,"voiced count")

In [None]:
import ipywidgets as widgets
from ipywidgets import Layout
import seaborn as sns

def print_boxplot(email,variable):
        var = round(feats.loc[email,variable],3)

        sns.set_theme(style="whitegrid")
        ax = sns.boxplot(y=variable, data=feats)
        ax.axhline(var,c='r')

        plt.text(-0.3, var*1.005, var, horizontalalignment='left', size='small', color='red', weight='normal')

email = widgets.Select(
    options = feats.index.tolist(),
    description='Interview',
    disabled=False,
    layout = Layout(width='50%', height='80px', display='flex')
)

variable = widgets.Select(
    options = feats.columns.tolist(),
    description='Variable',
    disabled=False,
    layout = Layout(width='50%', height='80px', display='flex')
)  

widgets.interactive(print_boxplot,email=email,variable=variable)

# Load saved model and explainer

In [None]:
#Lime works only with single output 

model_name = 'models/RF_single_output'
loaded_model = pickle.load(open(model_name, 'rb'))

explainer_name = 'models/explainer'
with open(explainer_name, 'rb') as f:
    explainer = dill.load(f)

# Predict scores

In [None]:
predictions = loaded_model.predict(feats)
#predictions = [tuple(p) for p in predictions]
predictions

# Explain results

In [None]:
# asking for explanation for LIME model

i = np.random.randint(0, feats.shape[0]) #Choose the video to explain, by default it explains a random video

exp = explainer.explain_instance(feats.iloc[i,:], loaded_model.predict_proba, top_labels=1)
exp.show_in_notebook(show_table=True, show_all=False)