In [74]:
import pandas as pd
import os
import librosa
import numpy as np
import scipy
import re


#Pickling
from six.moves import cPickle as pickle

#HTTP
import requests
import json

#
import collections
import math

import matplotlib.pyplot as plt
import matplotlib.style as ms
import librosa.display
import IPython.display as ipd
import seaborn
ms.use('seaborn-muted')
%matplotlib inline


In [2]:
def get_pattern(text):
    text = json.dumps(text)
    url = 'http://192.168.2.101:7878/api/get_patt'
    data = dict(input_tweets = text)
    resp = requests.post(url=url, data=data)
    r = json.loads(resp.text)
    return map(lambda x: x['pattern'],r)
    
def get_deep_emotion(text):
    text = json.dumps(text)
    url = 'http://192.168.2.101:7878/api/get_emo'
    data = dict(input_tweets = text)
    resp = requests.post(url=url, data=data)
    r = json.loads(resp.text)
    return r



def clean_text(text, remove_actions = True):
    punct_str = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~«»“…‘”'
    if(remove_actions):
        text = re.sub(r" ?\[[^)]+\]", "", text)
    for p in punct_str:
        text = text.replace(p,' ')
    text = re.sub(' +', ' ', text)
    return text.lower().strip()
    

In [4]:
# full = generate_IEMOCAP_df()
data = pd.read_csv('data/IEMOCAP_sentences.csv',index_col=0)
# data.head(3)
emotional_mapping = {'ang': 0, 'sad': 1, 'exc': 2, 'neu': 3,'fru': 4,'hap': 5,'fea': 6,'sur': 7,'dis': 8, 'xxx':9,'oth':10}
data['emotion_code'] = data['emotion'].map( emotional_mapping ).astype(int)
data = data[data.emotion_code < 6]

data['text'] = data['text'].apply(clean_text)
data.head(5)

Unnamed: 0,index,start_time,end_time,text,wav_path,alignment_path,emotion,valence,arousal,dominance,gender,emotion_code
0,Ses01F_impro01_F000,6.2901,8.2357,excuse me,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,neu,2.5,2.5,2.5,F,3
1,Ses01F_impro01_F001,10.01,11.3925,yeah,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,neu,2.5,2.5,2.5,F,3
2,Ses01F_impro01_F002,14.8872,18.0175,is there a problem,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,neu,2.5,2.5,2.5,F,3
5,Ses01F_impro01_F005,27.46,31.49,well what's the problem let me change it,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,neu,2.5,3.5,2.0,F,3
6,Ses01F_impro01_F006,38.965,43.59,what i'm getting an id this is why i'm here my...,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,fru,2.0,3.5,3.5,F,4


In [142]:
# patterns = {'.+ .+ a', '.+ a', '.+ a .+', '.+ problem', '.+ there', 'a .+', 'is .+', 'is .+ a', 'is there .+','there .+','there a .+'}
# patterns = {}

def get_patterns_load(patterns):
    load = []
    for pat in patterns.iterrows():
        load = load + list(pat[1].dropna())
    return load

def extract_patterns(data,extract=False):
    if(extract):
        patterns = {}
        for index, row in data.iterrows():
            patterns[row['index']] = set(get_pattern([row['text']])[0].values())
            print('Extracted pattern from '+ row['index'] + ' index:'+ str(index))
            print('Size: ', len(patterns[row['index']]), 'Patterns size', len(patterns))
        try:
            print('Saving Pickle')
            with open('pickles/patterns/pattern.pickle','wb') as f:
                save = {
                    'patterns' : patterns
                }
                pickle.dump(save,f,pickle.HIGHEST_PROTOCOL)
                print('Successfully saved in pattern.pickle')
                return patterns
        except Exception as e:
            print('Unable to save data to pickle', e)
            print('Patterns probably not saved.')
            return patterns
    else:
        try:
            with open('pickles/patterns/pattern.pickle','rb') as f:
                save = pickle.load(f)
                patterns = save['patterns']
                del save
                return patterns
        except Exception as e:
            print('Error loading base datasets pickle: ', e)
        
        
patterns = extract_patterns(data)  
patterns_df = pd.DataFrame.from_dict(patterns, orient='index')

In [145]:
# ang_index = data[data.emotion_code == 0]['index']
# sad_index = data[data.emotion_code == 1]['index']
# exc_index = data[data.emotion_code == 2]['index']
# neu_index = data[data.emotion_code == 3]['index']
# fru_index = data[data.emotion_code == 4]['index']
# hap_index = data[data.emotion_code == 5]['index']
# ang_patterns = patterns_df.loc[list(ang_index)]
# sad_patterns = patterns_df.loc[list(sad_index)]
# exc_patterns = patterns_df.loc[list(exc_index)]
# neu_patterns = patterns_df.loc[list(neu_index)]
# fru_patterns = patterns_df.loc[list(fru_index)]
# hap_patterns = patterns_df.loc[list(hap_index)]

# ang_load = get_patterns_load(ang_patterns)
# sad_load = get_patterns_load(sad_patterns)
# exc_load = get_patterns_load(exc_patterns)
# neu_load = get_patterns_load(neu_patterns)
# fru_load = get_patterns_load(fru_patterns)
# hap_load = get_patterns_load(hap_patterns)
# all_patterns = get_patterns_load(patterns_df)

# emotions_load ={'ang':ang_load,'sad':sad_load, 'exc':exc_load,'neu':neu_load,'fru':fru_load, 'hap': hap_load }

In [147]:
def pf_ief(pattern,emotions_load,emotions_counter,emotion):
    pf = 0.0
    counter = emotions_counter[emotion]
    for patt in emotions_load[emotion]:
        pf += counter[patt]
    pf += 1
    pf = math.log((pf) / (counter[pattern] + 1),10)
    ief = math.log((counter[pattern] + 1) / (sum_freq(pattern,emotions_counter) + 1),10)
    return pf * ief
    
def sum_freq(pattern,emotions_counter):
    res = 0.0
    for emotion in emotions_counter:
        res += emotions_counter[emotion][pattern]
    return res

pf_ief(pattern,emotions_load,emotions_counter,'ang')

def build_pf_ief_model(data):
    data_patterns = extract_patterns(data)  
    patterns_df = pd.DataFrame.from_dict(data_patterns, orient='index')
    emo_list = list(data.emotion.unique())
    emotions_load = {}
    emotions_counter = {}
    for emotion in emo_list:
        index = data[data.emotion == emotion]['index']
        emo_patterns = patterns_df.loc[list(ang_index)]
        emotions_load[emotion] = get_patterns_load(emo_patterns)
        emotions_counter[emotion] = collections.Counter(emotions_load[emotion])
    df = {}
    all_patterns = get_patterns_load(patterns_df)
    for pattern in all_patterns:
        emo_scores = {}
        for emotion in emo_list:
            emo_scores[emotion] = pf_ief(pattern,emotions_load,emotions_counter,emotion)
        df[pattern] = emo_scores
    return df

In [None]:
em_df = build_pf_ief_model(data)

In [None]:
em_df

In [87]:
emotion = 'ang'
pattern = ang_load[0]
pattern
counter = collections.Counter(ang_load)

In [88]:
result = 0
counter = collections.Counter(emotions_load[emotion])
for patt in emotions_load[emotion]:
    result += counter[patt]

43
58
52
120
24
65
('test', Counter({u'you .+': 416, u'.+ you': 395, u'you .+ .+': 394, u'.+ you .+': 369, u'i .+': 367, u'.+ .+ you': 356, u'.+ i': 306, u'.+ i .+': 304, u'to .+': 292, u'.+ to': 287, u'.+ to .+': 285, u'.+ .+ to': 283, u'.+ .+ i': 281, u'to .+ .+': 275, u'a .+': 219, u'.+ a .+': 218, u'.+ a': 218, u'the .+': 216, u'.+ the': 214, u'.+ the .+': 213, u'and .+': 208, u'.+ and': 201, u'.+ .+ a': 200, u'.+ and .+': 199, u'the .+ .+': 198, u'.+ .+ and': 198, u'a .+ .+': 198, u'.+ it': 169, u'.+ me': 168, u'.+ .+ me': 164, u'.+ that': 161, u'what .+': 157, u'.+ .+ that': 156, u'it .+': 147, u'.+ it .+': 144, u'.+ is': 143, u'is .+': 138, u'me .+': 136, u'.+ is .+': 136, u'.+ me .+': 135, u'that .+': 131, u'of .+': 128, u'.+ that .+': 128, u'.+ of': 127, u'.+ of .+': 127, u'.+ have': 123, u'.+ have .+': 122, u'have .+': 122, u'me .+ .+': 120, u'.+ .+ of': 120, u'.+ what': 117, u'do .+': 114, u'.+ .+ is': 114, u'.+ not': 111, u'.+ do': 110, u'in .+': 109, u'.+ do .+': 109, u'no

0.0

In [58]:
collections.Counter(emotions_load[emotion])

Counter({u'not .+ up': 1,
         u'you my .+': 1,
         u'.+ for you': 12,
         u'.+ do you': 57,
         u'was .+ like': 1,
         u'with .+ in': 1,
         u'.+ of what': 3,
         u'done with .+': 1,
         u'a week .+': 1,
         u'.+ can not': 1,
         u'.+ makes you': 1,
         u'.+ be': 57,
         u'can .+ a': 2,
         u'is for .+': 4,
         u'the .+ this': 3,
         u'to make .+': 7,
         u'.+ kids': 8,
         u'.+ turn': 11,
         u'.+ deal': 7,
         u'do .+ have': 7,
         u'busy .+': 1,
         u'.+ by': 5,
         u'.+ last week': 3,
         u'is this .+': 7,
         u'they were .+': 1,
         u'.+ husband': 1,
         u'.+ of her': 2,
         u'.+ me': 168,
         u'do .+ it': 1,
         u'single .+': 3,
         u'.+ stop .+': 11,
         u'oh oh .+': 2,
         u'to .+ you': 18,
         u'minutes .+': 2,
         u'.+ from .+': 15,
         u'.+ right .+': 31,
         u'stand .+': 7,
         u'go away .+':

In [116]:
10/2

5