In [7]:
import pandas as pd
import os
import librosa
import numpy as np
import scipy
import re


#Pickling
from six.moves import cPickle as pickle

#HTTP
import requests
import json

#
import collections

import matplotlib.pyplot as plt
import matplotlib.style as ms
import librosa.display
import IPython.display as ipd
import seaborn
ms.use('seaborn-muted')
%matplotlib inline


In [47]:
def get_pattern(text):
    text = json.dumps(text)
    url = 'http://192.168.2.101:7878/api/get_patt'
    data = dict(input_tweets = text)
    resp = requests.post(url=url, data=data)
    r = json.loads(resp.text)
    return map(lambda x: x['pattern'],r)
    
def get_deep_emotion(text):
    text = json.dumps(text)
    url = 'http://192.168.2.101:7878/api/get_emo'
    data = dict(input_tweets = text)
    resp = requests.post(url=url, data=data)
    r = json.loads(resp.text)
    return r



def clean_text(text, remove_actions = True):
    punct_str = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~«»“…‘”'
    if(remove_actions):
        text = re.sub(r" ?\[[^)]+\]", "", text)
    for p in punct_str:
        text = text.replace(p,' ')
    text = re.sub(' +', ' ', text)
    return text.lower().strip()
    

In [13]:
# full = generate_IEMOCAP_df()
data = pd.read_csv('data/IEMOCAP_sentences.csv',index_col=0)
# data.head(3)
emotional_mapping = {'ang': 0, 'sad': 1, 'exc': 2, 'neu': 3,'fru': 4,'hap': 5,'fea': 6,'sur': 7,'dis': 8, 'xxx':9,'oth':10}
data['emotion_code'] = data['emotion'].map( emotional_mapping ).astype(int)
data = data[data.emotion_code < 6]

In [38]:
data['text'] = data['text'].apply(clean_text)
data.head(5)

Unnamed: 0,index,start_time,end_time,text,wav_path,alignment_path,emotion,valence,arousal,dominance,gender,emotion_code
0,Ses01F_impro01_F000,6.2901,8.2357,excuse me,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,neu,2.5,2.5,2.5,F,3
1,Ses01F_impro01_F001,10.01,11.3925,yeah,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,neu,2.5,2.5,2.5,F,3
2,Ses01F_impro01_F002,14.8872,18.0175,is there a problem,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,neu,2.5,2.5,2.5,F,3
5,Ses01F_impro01_F005,27.46,31.49,well what's the problem let me change it,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,neu,2.5,3.5,2.0,F,3
6,Ses01F_impro01_F006,38.965,43.59,what i'm getting an id this is why i'm here my...,data/IEMOCAP_full_release/Session1/sentences/w...,data/IEMOCAP_full_release/Session1/sentences/F...,fru,2.0,3.5,3.5,F,4


In [113]:
# patterns = {'.+ .+ a', '.+ a', '.+ a .+', '.+ problem', '.+ there', 'a .+', 'is .+', 'is .+ a', 'is there .+','there .+','there a .+'}
# patterns = {}
def extract_patterns(data,extract=False):
    if(extract):
        patterns = {}
        for index, row in data.iterrows():
            patterns[row['index']] = set(get_pattern([row['text']])[0].values())
            print('Extracted pattern from '+ row['index'] + ' index:'+ str(index))
            print('Size: ', len(patterns[row['index']]), 'Patterns size', len(patterns))
        try:
            print('Saving Pickle')
            with open('pickles/patterns/pattern.pickle','wb') as f:
                save = {
                    'patterns' : patterns
                }
                pickle.dump(save,f,pickle.HIGHEST_PROTOCOL)
                print('Successfully saved in pattern.pickle')
                return patterns
        except Exception as e:
            print('Unable to save data to pickle', e)
            print('Patterns probably not saved.')
            return patterns
    else:
        try:
            with open('pickles/patterns/pattern.pickle','rb') as f:
                save = pickle.load(f)
                patterns = save['patterns']
                del save
                return patterns
        except Exception as e:
            print('Error loading base datasets pickle: ', e)
        
        
patterns = extract_patterns(data)      

In [126]:
def extract_deep_emo(data,extract=False):
    if(extract):
        emotions = {}
        for index, row in data.iterrows():
            emotions[row['index']] = get_deep_emotion([row['text']])[0]
            print(index, emotions[row['index']])
        try:
            print('Saving Pickle')
            with open('pickles/deepemo.pickle','wb') as f:
                save = {
                    'emotions' : emotions
                }
                pickle.dump(save,f,pickle.HIGHEST_PROTOCOL)
                print('Successfully saved in deepemo.pickle')
                return emotions
        except Exception as e:
            print('Unable to save data to pickle: ', e)
            print('Emotions probably not saved.')
            return emotions
    else:
        try:
            with open('pickles/deepemo.pickle','rb') as f:
                save = pickle.load(f)
                emotions = save['emotions']
                del save
                return emotions
        except Exception as e:
            print('Error loading base datasets pickle: ', e)

        
        
deep_emotions = extract_deep_emo(data)   
deep_emotions = pd.DataFrame(deep_emotions).T
deep_emotions['pred_emotion'] = map(lambda x: x[1].idxmax(),deep_emotions.iterrows())
deep_emotions['labeled_emotion'] = list(data['emotion'])


In [149]:
deep_emotions.head()

anger               0.118369
anticipation       0.0926268
disgust            0.0788833
fear                0.125745
joy                 0.190445
sadness             0.125877
surprise            0.136117
trust               0.131937
pred_emotion             joy
labeled_emotion          neu
Name: Ses01F_impro01_F000, dtype: object

In [157]:
patterns_df = pd.DataFrame.from_dict(patterns, orient='index')
patterns_df.loc[['Ses01F_script02_2_F042','Ses01F_script02_2_F041']]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,210,211,212,213,214,215,216,217,218,219
Ses01F_script02_2_F042,.+ that .+,.+ i,.+ i .+,.+ .+ that,white .+,hmm .+,.+ that,i .+ that,big .+,i .+,...,,,,,,,,,,
Ses01F_script02_2_F041,the .+,we .+,the .+ .+,right .+,.+ wrong,.+ but .+,.+ but we,.+ the right,.+ but,so .+,...,,,,,,,,,,


In [158]:
ang_index = data[data.emotion_code == 0]['index']
sad_index = data[data.emotion_code == 1]['index']
exc_index = data[data.emotion_code == 2]['index']
neu_index = data[data.emotion_code == 3]['index']
fru_index = data[data.emotion_code == 4]['index']
hap_index = data[data.emotion_code == 5]['index']

In [159]:
ang_patterns = patterns_df.loc[list(ang_index)]
sad_patterns = patterns_df.loc[list(sad_index)]
exc_patterns = patterns_df.loc[list(exc_index)]
neu_patterns = patterns_df.loc[list(neu_index)]
fru_patterns = patterns_df.loc[list(fru_index)]
hap_patterns = patterns_df.loc[list(hap_index)]

In [206]:
def get_set(patterns):
    load = []
    for pat in patterns.iterrows():
        load = load + list(pat[1].dropna())
    return set(load)


In [207]:
ang_set = get_set(ang_patterns)
sad_set = get_set(sad_patterns)
exc_set = get_set(exc_patterns)
neu_set = get_set(neu_patterns)
fru_set = get_set(fru_patterns)
hap_set = get_set(hap_patterns)


In [214]:
ang_set.diffœœerence(hap_set).difference(sad_set).difference(exc_set).difference(neu_set).difference(fru_set)

{u'.+ a simple',
 u'.+ and his',
 u'.+ and in',
 u'.+ and make',
 u'.+ and stop',
 u'.+ and tell',
 u'.+ are like',
 u'.+ ass',
 u'.+ ass .+',
 u'.+ at and',
 u'.+ attention',
 u'.+ away with',
 u'.+ challenge',
 u'.+ clear',
 u'.+ clothes',
 u'.+ com',
 u'.+ comes home',
 u'.+ coming to',
 u'.+ del',
 u'.+ die in',
 u'.+ dying',
 u'.+ easily',
 u'.+ event',
 u'.+ evil',
 u'.+ forever and',
 u'.+ from an',
 u'.+ from one',
 u'.+ fucking',
 u'.+ fucking .+',
 u'.+ getting out',
 u'.+ here on',
 u'.+ his room',
 u'.+ home to',
 u'.+ i found',
 u'.+ i seriously',
 u'.+ ill',
 u'.+ is coming',
 u'.+ is driving',
 u'.+ is now',
 u'.+ it off',
 u'.+ it when',
 u'.+ like me',
 u'.+ listening',
 u'.+ listening to',
 u'.+ me more',
 u'.+ me sometimes',
 u'.+ me when',
 u'.+ music',
 u'.+ my family',
 u'.+ now just',
 u'.+ official',
 u'.+ on about',
 u'.+ out by',
 u'.+ out how',
 u'.+ pray',
 u'.+ self',
 u'.+ shooting',
 u'.+ sleeps',
 u'.+ slept',
 u'.+ smoking',
 u'.+ so am',
 u'.+ someone 

In [215]:
sad_set.difference(hap_set).difference(ang_set).difference(exc_set).difference(neu_set).difference(fru_set)

{u'.+ about everything',
 u'.+ about her',
 u'.+ about how',
 u'.+ after a',
 u'.+ and is',
 u'.+ are the',
 u'.+ as me',
 u'.+ at her',
 u'.+ back into',
 u'.+ because he',
 u'.+ begin',
 u'.+ brain',
 u'.+ but now',
 u'.+ but still',
 u'.+ but when',
 u'.+ by his',
 u'.+ came on',
 u'.+ cancer',
 u'.+ commercials',
 u'.+ dancing',
 u'.+ dreams',
 u'.+ email',
 u'.+ emails',
 u'.+ for her',
 u'.+ for them',
 u'.+ fucked',
 u'.+ fucked up',
 u'.+ gee',
 u'.+ goal',
 u'.+ going back',
 u'.+ going for',
 u'.+ group',
 u'.+ he would',
 u'.+ her head',
 u'.+ hospital',
 u'.+ i miss',
 u'.+ i put',
 u'.+ i wish',
 u'.+ in any',
 u'.+ in bed',
 u'.+ in life',
 u'.+ it as',
 u'.+ jus',
 u'.+ just called',
 u'.+ just thinking',
 u'.+ kicking',
 u'.+ killed',
 u'.+ last night',
 u'.+ loved',
 u'.+ me an',
 u'.+ me pictures',
 u'.+ memories',
 u'.+ met',
 u'.+ my brother',
 u'.+ my dad',
 u'.+ my ears',
 u'.+ of being',
 u'.+ of its',
 u'.+ of life',
 u'.+ on her',
 u'.+ one day',
 u'.+ out what

In [216]:
hap_set.difference(sad_set).difference(ang_set).difference(exc_set).difference(neu_set).difference(fru_set)

{u'.+ a picture',
 u'.+ after all',
 u'.+ and her',
 u'.+ and said',
 u'.+ are on',
 u'.+ as an',
 u'.+ beginning',
 u'.+ cat',
 u'.+ changing',
 u'.+ concert',
 u'.+ covered',
 u'.+ decent',
 u'.+ every other',
 u'.+ for months',
 u'.+ girls',
 u'.+ got on',
 u'.+ happiness',
 u'.+ hopefully',
 u'.+ hot',
 u'.+ in all',
 u'.+ in april',
 u'.+ in college',
 u'.+ it already',
 u'.+ just say',
 u'.+ just told',
 u'.+ last minute',
 u'.+ likes',
 u'.+ loves you',
 u'.+ may be',
 u'.+ off with',
 u'.+ off your',
 u'.+ pissed',
 u'.+ player',
 u'.+ points',
 u'.+ pool',
 u'.+ sat',
 u'.+ schools',
 u'.+ song',
 u'.+ study',
 u'.+ tears',
 u'.+ than anything',
 u'.+ that looks',
 u'.+ that song',
 u'.+ the beginning',
 u'.+ the old',
 u'.+ the red',
 u'.+ the weekend',
 u'.+ to change',
 u'.+ towards',
 u'.+ was actually',
 u'.+ white',
 u'.+ won',
 u'.+ ya',
 u'.+ ya .+',
 u'.+ you from',
 u'a .+ weekend',
 u'about .+ now',
 u'accidentally .+',
 u'already getting .+',
 u'an awesome .+',
 u'

In [218]:
exc_set.difference(sad_set).difference(ang_set).difference(hap_set).difference(neu_set).difference(fru_set)

{u'.+ a dream',
 u'.+ a real',
 u'.+ about people',
 u'.+ about to',
 u'.+ although',
 u'.+ american',
 u'.+ and brings',
 u'.+ and eat',
 u'.+ and sad',
 u'.+ and some',
 u'.+ are all',
 u'.+ are doing',
 u'.+ are gonna',
 u'.+ are here',
 u'.+ are just',
 u'.+ as the',
 u'.+ aw',
 u'.+ being in',
 u'.+ bet',
 u'.+ boyfriend',
 u'.+ bread',
 u'.+ by my',
 u'.+ called me',
 u'.+ caught',
 u'.+ church',
 u'.+ comes up',
 u'.+ down my',
 u'.+ down on',
 u'.+ down with',
 u'.+ er',
 u'.+ even though',
 u'.+ everyone is',
 u'.+ excited for',
 u'.+ for anything',
 u'.+ for its',
 u'.+ for real',
 u'.+ for sure',
 u'.+ forgot to',
 u'.+ from all',
 u'.+ from u',
 u'.+ g',
 u'.+ ho',
 u'.+ i bet',
 u'.+ i wonder',
 u'.+ in town',
 u'.+ in what',
 u'.+ inside',
 u'.+ into it',
 u'.+ is totally',
 u'.+ just saw',
 u'.+ lo',
 u'.+ lo .+',
 u'.+ love it',
 u'.+ magazine',
 u'.+ math',
 u'.+ me by',
 u'.+ me into',
 u'.+ me not',
 u'.+ me up',
 u'.+ me with',
 u'.+ mess',
 u'.+ minds',
 u'.+ mo',


In [35]:
counter = collections.Counter(list(hap_sentences))
counter.most_common()

[('Or not.', 11),
 ('[LAUGHTER]', 9),
 ('Every day since.', 7),
 ("What'll I do with a fortune?", 4),
 ('Yeah.', 4),
 ('And very much sillier.', 3),
 ('I guess this is why I came.', 3),
 ('What would I do with a fortune?', 3),
 ('yeah.', 3),
 ('Which particular one?', 3),
 ('[BREATHING]', 3),
 ('I kissed you.', 3),
 ('Behave exquisitely.', 2),
 ('What am I going to do with a fortune?', 2),
 ('Now and always sweet.', 2),
 ("Not like that you're not.", 2),
 ("That was a rouser, wasn't it?", 2),
 ("Actually, now that you mention it, no.  I don't.", 2),
 ('You felt something that far back?', 2),
 ('Thank you.', 2),
 ('It burned my comb too and all the towels in the bathroom.', 2),
 ("I guess we don't need glasses.", 2),
 ("It's not champagne.", 2),
 ('You started to write me.', 2),
 ('Look what we got here.', 2),
 ("Shh.  If we're very quiet the fish might come.", 2),
 ("Why didn't you let me know?", 2),
 ('oh.', 2),
 ("You're the only one I know who loves his parents.", 1),
 ('Oh, dear go

In [37]:
pd.DataFrame(get_pattern(list(sentences[0:3])))

Unnamed: 0,0,1,10,11,12,2,3,4,5,6,7,8,9
0,.+ me,,,,,,,,,,,,
1,,,,,,,,,,,,,
2,.+ .+ a,.+ .+ a,a .+,there .+,there a .+,.+ there,.+ a .+,.+ a .+,.+ a,.+ problem,is there .+,is .+,is .+ a


In [40]:
get_pattern(list(sentences[3:6]))

[{u'0': u'let .+',
  u'1': u'let me .+',
  u'10': u'.+ problem',
  u'11': u'.+ .+ me',
  u'12': u'.+ .+ me',
  u'13': u'.+ let',
  u'14': u'.+ let me',
  u'15': u'.+ me .+',
  u'16': u'.+ me .+',
  u'17': u'.+ me',
  u'18': u'.+ change',
  u'19': u'.+ it',
  u'2': u'me .+ it',
  u'20': u'well .+',
  u'21': u'the .+',
  u'22': u'the .+ .+',
  u'23': u'the .+ .+',
  u'24': u'problem .+',
  u'3': u'me .+ .+',
  u'4': u'me .+ .+',
  u'5': u'me .+',
  u'6': u'change .+',
  u'7': u'.+ the .+',
  u'8': u'.+ the .+',
  u'9': u'.+ the'},
 {u'0': u'id .+',
  u'1': u'.+ getting',
  u'10': u'.+ is',
  u'11': u'.+ is .+',
  u'12': u'.+ is .+',
  u'13': u'.+ why',
  u'14': u'.+ .+ my',
  u'15': u'.+ .+ my',
  u'16': u'.+ here',
  u'17': u'.+ my',
  u'18': u'.+ my .+',
  u'19': u'.+ my .+',
  u'2': u'.+ an',
  u'20': u'.+ was .+',
  u'21': u'.+ was .+',
  u'22': u'.+ was',
  u'23': u'what .+',
  u'24': u'is .+',
  u'25': u'an .+',
  u'26': u'why .+',
  u'27': u'here .+',
  u'28': u'here .+ .+',
  u'2

In [None]:
set(get_pattern(list(sentences[3]))[0].values())

In [None]:
from six.moves import cPickle as pickle

def extract_patterns(data):
    patterns = {}
    for index, row in data.iterrows():
        patterns[row['index']] = get_pattern([row['text']]) # Get pattern
        print('Extracted pattern from '+ row['index'] + ' index:'+ str(index))
        print('Size: ', len(patterns[row['index']]), 'Patterns size', len(patterns))
    try:
        print('Saving Pickle')
        with open('patterns.pickle','wb') as f:
            save = {
                'patterns' : patterns
            }
            pickle.dump(save,f,pickle.HIGHEST_PROTOCOL)
            print('Successfully saved in pattern.pickle')
            return patterns
    except Exception as e:
        print('Unable to save data to pickle', e)
        print('Patterns probably not saved.')
        return patterns

        
        
patterns = extract_patterns(data)      