In [18]:
import pandas as pd
import numpy as np
import copy
import scipy

In [4]:
# Import data
full_stimuli = pd.read_csv("../data/emotional_data/full_stimuli.csv")
full_stimuli = full_stimuli[(full_stimuli["constraint"] == "Non-constraining") & (full_stimuli["completion"] == "b")]
words = pd.read_csv('../data/emotional_data/affective_norms.txt', sep="\t", header=0)
words = words.apply(lambda x: x.astype(str).str.lower())
segments = pd.read_csv('../data/other/eeg_summary.csv', index_col = 0)

In [5]:
segments

Unnamed: 0,respondent,.id,.recording,segment,trial,type,description,region,cond,N_250to600,...,bc_stemnoun_2,c_constraint,c_completion,s_gen_constraint,s_noun_constraint_2,constraint,gender,item,completion,correct
1,0,3,0,3,1,Stimulus,s22,adjective,1,2.167901,...,0.380952,1,-1,0.853392,-0.465744,Constraining,masc,48,b,1.0
2,0,4,0,4,1,Stimulus,s23,noun,1,-14.829157,...,0.380952,1,-1,0.853392,-0.465744,Constraining,masc,48,b,1.0
3,0,8,0,8,2,Stimulus,s42,adjective,3,-6.284737,...,0.333333,-1,-1,-0.391496,-0.652685,Non-constraining,masc,42,b,1.0
4,0,9,0,9,2,Stimulus,s43,noun,3,6.025566,...,0.333333,-1,-1,-0.391496,-0.652685,Non-constraining,masc,42,b,1.0
5,0,13,0,13,3,Stimulus,s42,adjective,3,0.566325,...,0.285714,-1,-1,-0.936135,-0.839626,Non-constraining,masc,162,b,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6668,76,1273,99_1094,1273,255,Stimulus,s42,adjective,3,5.866306,...,0.571429,-1,-1,1.009003,0.282018,Non-constraining,neut,274,b,
6669,76,1333,99_1094,1333,267,Stimulus,s42,adjective,3,-5.307336,...,0.190476,-1,-1,0.697781,-1.213507,Non-constraining,fem,18,b,1.0
6670,76,1383,99_1094,1383,277,Stimulus,s42,adjective,3,-2.438210,...,0.190476,-1,-1,-1.096608,-1.213507,Non-constraining,fem,70,b,0.0
6671,76,1384,99_1094,1384,277,Stimulus,s43,noun,3,-1.586421,...,0.190476,-1,-1,-1.096608,-1.213507,Non-constraining,fem,70,b,0.0


In [6]:
adjectives = list(full_stimuli["adj"])
nouns = list(full_stimuli["noun"])

In [7]:
# Strip unnecessary characters
def strip(dataset):
    result = []
    for i in dataset:
        i = str(i).strip("\.\,\?\!")
        i = i.replace("ß", "ss")
        result += [i.lower()]
    return result

In [8]:
adjectives = strip(adjectives)
nouns = strip(nouns)
word_list = strip(words["Word"])

In [9]:
# Get emotion for every word
def computeEmotion():
    not_found_fixed = []
    not_found = []
    result = {"item":[], "region":[], "word": [], "arousal": [], "valence": []}
    
    # Loop through the item numbers, adjectives and the nouns
    for i,adj,nou in zip(full_stimuli["item"], adjectives, nouns):
        
        # If the adjective is in the word list, add arousal & valence (+ some others) to result
        if adj in word_list:
            arousal = words[words["Word"] == adj]["Arou"].values[0]
            valence = words[words["Word"] == adj]["Val"].values[0]
            result["item"] += [i]
            result["region"] += ["adjective"]
            result["word"] += [adj]
            result["arousal"] += [float(arousal)]
            result["valence"] += [float(valence)]
        
        # If not in word list, check if smaller word is in the list and add it to the result
        else:
            for n in range(1, 3):
                adj2 = copy.copy(adj)
                adj2 = adj2[0:(len(adj2)-n)]
                if adj2 in word_list:
                    arousal = words[words["Word"] == adj2]["Arou"].values[0]
                    valence = words[words["Word"] == adj2]["Val"].values[0]
                    result["item"] += [i]
                    result["region"] += ["adjective"]
                    result["word"] += [adj]
                    result["arousal"] += [float(arousal)]
                    result["valence"] += [float(valence)]
                    not_found_fixed += [[adj, adj2]]
                    break
                else:
                    if n == 3:
                        result["item"] += [i]
                        result["region"] += ["adjective"]
                        result["word"] += [adj]
                        result["arousal"] += ["nan"]
                        result["valence"] += ["nan"]
                        not_found += [adj]
        
        # Same procedure for the noun
        if nou in word_list:
            arousal = words[words["Word"] == nou]["Arou"].values[0]
            valence = words[words["Word"] == nou]["Val"].values[0]
            result["item"] += [i]
            result["region"] += ["noun"]
            result["word"] += [nou]
            result["arousal"] += [float(arousal)]
            result["valence"] += [float(valence)]
        else:
            for n in range(1, 3):
                nou2 = copy.copy(nou)
                nou2 = nou2[0:(len(nou2)-n)]
                if nou2 in word_list:
                    arousal = words[words["Word"] == nou2]["Arou"].values[0]
                    valence = words[words["Word"] == nou2]["Val"].values[0]
                    result["item"] += [i]
                    result["region"] += ["noun"]
                    result["word"] += [nou]
                    result["arousal"] += [float(arousal)]
                    result["valence"] += [float(valence)]
                    not_found_fixed += [nou, nou2]
                    break
                else:
                    if n == 3:
                        result["item"] += [i]
                        result["region"] += ["noun"]
                        result["word"] += [nou]
                        result["arousal"] += ["nan"]
                        result["valence"] += ["nan"]
                        not_found += [nou]
                    
    return [result, not_found_fixed, not_found]
result = computeEmotion()

In [10]:
end_result = pd.DataFrame(data = result[0])

In [11]:
# Put result in the right frame
result = {}
for i,r in zip(end_result["item"], end_result["region"]):
    temp = segments[(segments["item"] == i) & (segments["region"] == r)]
    result["Id.{}.{}.".format(i, r)] = []
    for a, b in zip(temp["respondent"], temp[".id"]):
        result["Id.{}.{}.".format(i, r)] += ["Id.{}.{}.".format(a, b)]
end_result["ids"] = [*result.values()]

In [286]:
end_result.to_csv('../data/emotional_data/emotion_labels_summed.csv')