TODO:
- A nice figure that shows the mean/std of emotion pairs
- Figure that shows distribution of words based on count
- Size of vocabulary for descriptions (with lemmatization)

DONE:
- Check rows where all emotions labels are the same
- Remove rows with NaN values
- Remove rows with descriptions 'test' or 'stuff'
- Check rows where description is one word
- Ask Dr. K what he would like me to do in entries where the robot did nothing
- Mean/std of each of the 8 emotion pairs
- Count of word tokens in the descriptions
- Average description length (in words)
- Size of vocabulary for descriptions (with lowercase)

In [243]:
import pandas as pd
import numpy as np
import pickle
import nltk
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

# Data Cleaning Functions

In [2]:
def find_rows_with_same_emotion_labels(data):
    indices = []
    for index, row in data.iterrows():
        if row['Interst/Alarm'] == row['Confusion/Understanding'] == row['Frusteration/Relief'] == row['Sorrow/Joy'] == row['Anger/Gratitude'] == row['Fear/Hope'] == row['Boredom/Surprise'] == row['Disgust/Desire']:
            print(index, '\n', row)
            indices.append(index)
    return indices

In [3]:
def drop_row(data, index):
    return data.drop(index, axis=0)

In [4]:
def find_singleton_descriptions(data):
    indices = []
    for index, row in data.iterrows():
        if len(nltk.word_tokenize(str(row['Description']))) == 1:
            print(index, '\n', row['Timestamp'], '\n', row['Description'])
        indices.append(index)

In [5]:
def find_NaN_values(data):
    return np.where(pd.isnull(data))

In [6]:
def find_empty_values(data):
    return np.where(data.applymap(lambda x: x == ''))

In [7]:
def find_rows_with_description(data, description):
    test_rows = data[data['Description']==description].index.tolist()
    return test_rows

# Data Analysis Functions

In [160]:
def mean_emotion_pairs(data):
    return data[["Interst/Alarm", "Confusion/Understanding", "Frusteration/Relief", "Sorrow/Joy", "Anger/Gratitude", "Fear/Hope", "Boredom/Surprise", "Disgust/Desire"]].mean()

In [161]:
def std_emotion_pairs(data):
    return data[["Interst/Alarm", "Confusion/Understanding", "Frusteration/Relief", "Sorrow/Joy", "Anger/Gratitude", "Fear/Hope", "Boredom/Surprise", "Disgust/Desire"]].std()

In [247]:
def analyze_descriptions(data): 
    data['Description_Tokenized'] = data['Description'].apply(lambda description: nltk.word_tokenize(description.lower()))
    data['Description_Count'] = data['Description'].apply(lambda description: len(nltk.word_tokenize(description.lower())))

In [169]:
def count_total_tokens(data):
    return data['Description_Count'].sum()

In [170]:
def average_description_length(data):
    return data['Description_Count'].mean()

In [227]:
def vocab(data):
    return set(list(np.concatenate(data['Description_Tokenized'].values).flat))

In [228]:
def vocab_size(data):
    return len(set(list(np.concatenate(data['Description_Tokenized'].values).flat)))

In [180]:
cozmo_data = pd.read_csv('responses_cozmo copy.csv')

In [222]:
cozmo_data

Unnamed: 0,Timestamp,Description,Interst/Alarm,Confusion/Understanding,Frusteration/Relief,Sorrow/Joy,Anger/Gratitude,Fear/Hope,Boredom/Surprise,Disgust/Desire,Description_Tokenized,Description_Count
0,1.617917e+09,It went to investigate a thing it saw,1,1,3,3,3,4,4,5,"[It, went, to, investigate, a, thing, it, saw]",8
1,1.619714e+09,drove forward for a bit and then turned to his...,2,3,3,3,3,3,3,3,"[drove, forward, for, a, bit, and, then, turne...",14
2,1.619714e+09,"just drove forward, made a little sound at the...",2,3,3,3,3,3,3,3,"[just, drove, forward, ,, made, a, little, sou...",12
3,1.619714e+09,drove forward fairly quickly and then turned t...,1,3,3,3,4,4,3,4,"[drove, forward, fairly, quickly, and, then, t...",14
4,1.619714e+09,looked down squinted eyes and then looked forw...,2,2,3,3,3,3,3,3,"[looked, down, squinted, eyes, and, then, look...",17
...,...,...,...,...,...,...,...,...,...,...,...,...
483,1.635874e+09,the robot moved to its right and looked up,1,2,3,3,3,3,3,4,"[the, robot, moved, to, its, right, and, looke...",9
484,1.635874e+09,the robot moved forward and raised its arms up,1,5,4,5,4,4,3,3,"[the, robot, moved, forward, and, raised, its,...",9
485,1.635874e+09,raised its arms and head up slightly,2,2,3,3,3,3,3,4,"[raised, its, arms, and, head, up, slightly]",7
486,1.635874e+09,the robot looked down lowered its arms then mo...,3,5,4,4,3,4,3,3,"[the, robot, looked, down, lowered, its, arms,...",15


# Clean Data

In [190]:
cozmo_data = cozmo_data.drop_duplicates(subset=['Timestamp'])
cozmo_data

Unnamed: 0,Timestamp,Description,Interst/Alarm,Confusion/Understanding,Frusteration/Relief,Sorrow/Joy,Anger/Gratitude,Fear/Hope,Boredom/Surprise,Disgust/Desire
0,1.617917e+09,It went to investigate a thing it saw,1,1,3,3,3,4,4,5
1,1.619714e+09,drove forward for a bit and then turned to his...,2,3,3,3,3,3,3,3
2,1.619714e+09,"just drove forward, made a little sound at the...",2,3,3,3,3,3,3,3
3,1.619714e+09,drove forward fairly quickly and then turned t...,1,3,3,3,4,4,3,4
4,1.619714e+09,looked down squinted eyes and then looked forw...,2,2,3,3,3,3,3,3
...,...,...,...,...,...,...,...,...,...,...
483,1.635874e+09,the robot moved to its right and looked up,1,2,3,3,3,3,3,4
484,1.635874e+09,the robot moved forward and raised its arms up,1,5,4,5,4,4,3,3
485,1.635874e+09,raised its arms and head up slightly,2,2,3,3,3,3,3,4
486,1.635874e+09,the robot looked down lowered its arms then mo...,3,5,4,4,3,4,3,3


In [182]:
find_rows_with_same_emotion_labels(cozmo_data)

7 
 Timestamp                                                  1619714617.020491
Description                drove forward a bit and then looked down. Repl...
Interst/Alarm                                                              3
Confusion/Understanding                                                    3
Frusteration/Relief                                                        3
Sorrow/Joy                                                                 3
Anger/Gratitude                                                            3
Fear/Hope                                                                  3
Boredom/Surprise                                                           3
Disgust/Desire                                                             3
Name: 7, dtype: object
8 
 Timestamp                                                  1619714617.020491
Description                drove forward a bit and then looked down. Repl...
Interst/Alarm                                

[7,
 8,
 14,
 26,
 27,
 31,
 32,
 94,
 143,
 183,
 184,
 186,
 189,
 193,
 200,
 222,
 238,
 240,
 241,
 242,
 246,
 249,
 252,
 255,
 259,
 265,
 311,
 345,
 346,
 349,
 370,
 376,
 378,
 384,
 389,
 392,
 403,
 405,
 407,
 410,
 412,
 417,
 423,
 447]

In [183]:
cozmo_data = drop_row(cozmo_data, 143)

In [184]:
find_NaN_values(cozmo_data)

(array([37]), array([1]))

In [185]:
cozmo_data.iloc[37]

Timestamp                  1620932509.124969
Description                              NaN
Interst/Alarm                              2
Confusion/Understanding                    2
Frusteration/Relief                        3
Sorrow/Joy                                 2
Anger/Gratitude                            3
Fear/Hope                                  3
Boredom/Surprise                           2
Disgust/Desire                             3
Name: 37, dtype: object

In [186]:
cozmo_data = drop_row(cozmo_data, 37)

In [187]:
find_empty_values(cozmo_data)

(array([], dtype=int64), array([], dtype=int64))

In [188]:
find_singleton_descriptions(cozmo_data)

103 
 1633373656.0610828 
 interest
104 
 1633373733.466997 
 interest


In [189]:
find_rows_with_description(cozmo_data, 'test')

[]

# Analyze Data

In [203]:
mean_emotion_pairs(cozmo_data)

Interst/Alarm              2.380753
Confusion/Understanding    2.794979
Frusteration/Relief        2.930962
Sorrow/Joy                 2.907950
Anger/Gratitude            2.968619
Fear/Hope                  3.115063
Boredom/Surprise           3.089958
Disgust/Desire             3.133891
dtype: float64

In [204]:
std_emotion_pairs(cozmo_data)

Interst/Alarm              1.030665
Confusion/Understanding    0.971187
Frusteration/Relief        0.730758
Sorrow/Joy                 0.829172
Anger/Gratitude            0.683022
Fear/Hope                  0.704391
Boredom/Surprise           0.823058
Disgust/Desire             0.843561
dtype: float64

In [248]:
analyze_descriptions(cozmo_data)
cozmo_data

TypeError: unhashable type: 'list'

In [206]:
count_total_tokens(cozmo_data)

6853

In [207]:
average_description_length(cozmo_data)

14.336820083682008

In [237]:
vocab(cozmo_data)

{'!',
 '%',
 "'",
 "''",
 "'ah",
 "'arm",
 "'boi",
 "'boo",
 "'eah",
 "'ehhhehehhh",
 "'euwah",
 "'head",
 "'hiyo",
 "'hmm",
 "'m",
 "'mmm",
 "'oh",
 "'oi",
 "'oooh",
 "'peru",
 "'s",
 "'uh",
 '(',
 ')',
 ',',
 '.',
 '...',
 '1-sec',
 '1/8',
 '10',
 '15',
 '180',
 '1sec',
 '2',
 '2-sec',
 '20',
 '200',
 '270',
 '70',
 '8',
 '90',
 ':',
 ';',
 '?',
 '``',
 'a',
 'aahm',
 'about',
 'above',
 'abut',
 'ace',
 'acted',
 'action',
 'actually',
 'afraid',
 'after',
 'afterwards',
 'again',
 'ah',
 'ahh',
 'ahhm',
 'alarmed',
 'alert',
 'all',
 'almost',
 'already',
 'alternate',
 'alternated',
 'always',
 'am',
 'an',
 'and',
 'angry',
 'announcing',
 'annoyed',
 'another',
 'anything',
 'appear',
 'appeared',
 'appears',
 'appendage',
 'arm',
 'arms',
 'around',
 'as',
 'asked',
 'asking',
 'assertive',
 'assertively',
 'associated',
 'at',
 'attention',
 'auditory',
 'aw',
 'away',
 'aww',
 'awww',
 'awwwwwwwww',
 'ayyychemm',
 'back',
 'barely',
 'be',
 'became',
 'because',
 'been',
 'be

In [238]:
vocab_size(cozmo_data)

654

In [191]:
misty_data = pd.read_csv('responses_misty copy.csv')

In [192]:
misty_data

Unnamed: 0,Timestamp,Description,Interst/Alarm,Confusion/Understanding,Frusteration/Relief,Sorrow/Joy,Anger/Gratitude,Fear/Hope,Boredom/Surprise,Disgust/Desire
0,1.617741e+09,test,1,1,1,1,1,1,1,1
1,1.617916e+09,confused and asking for clarification almost,3,2,4,3,2,4,1,1
2,1.619713e+09,"His eyes half closed, his head tilted up, his ...",2,4,3,3,4,4,3,5
3,1.619713e+09,"eyes were half open, head tilted up and down s...",3,3,2,3,3,3,1,3
4,1.619713e+09,"eyes were looking concerned to the left, then ...",3,2,3,3,3,3,4,3
...,...,...,...,...,...,...,...,...,...,...
486,1.635871e+09,the robot said uh twice while moving its head ...,3,1,2,3,3,3,4,4
487,1.635871e+09,the robot move both of its arms up and down an...,3,4,2,1,3,3,3,4
488,1.635871e+09,The robot moved its left arm forward slightly ...,4,2,1,1,2,2,3,3
489,1.635871e+09,The robot looked up and to its right and said aw,1,2,3,3,3,4,2,4


# Clean Data

In [202]:
misty_data = misty_data.drop_duplicates(subset=['Timestamp'])
misty_data

Unnamed: 0,Timestamp,Description,Interst/Alarm,Confusion/Understanding,Frusteration/Relief,Sorrow/Joy,Anger/Gratitude,Fear/Hope,Boredom/Surprise,Disgust/Desire
1,1.617916e+09,confused and asking for clarification almost,3,2,4,3,2,4,1,1
2,1.619713e+09,"His eyes half closed, his head tilted up, his ...",2,4,3,3,4,4,3,5
3,1.619713e+09,"eyes were half open, head tilted up and down s...",3,3,2,3,3,3,1,3
4,1.619713e+09,"eyes were looking concerned to the left, then ...",3,2,3,3,3,3,4,3
5,1.619713e+09,one arm moved about halfway up twice head tilt...,3,3,2,2,3,3,2,3
...,...,...,...,...,...,...,...,...,...,...
486,1.635871e+09,the robot said uh twice while moving its head ...,3,1,2,3,3,3,4,4
487,1.635871e+09,the robot move both of its arms up and down an...,3,4,2,1,3,3,3,4
488,1.635871e+09,The robot moved its left arm forward slightly ...,4,2,1,1,2,2,3,3
489,1.635871e+09,The robot looked up and to its right and said aw,1,2,3,3,3,4,2,4


In [193]:
find_rows_with_same_emotion_labels(misty_data)

0 
 Timestamp                  1617740862.261658
Description                             test
Interst/Alarm                              1
Confusion/Understanding                    1
Frusteration/Relief                        1
Sorrow/Joy                                 1
Anger/Gratitude                            1
Fear/Hope                                  1
Boredom/Surprise                           1
Disgust/Desire                             1
Name: 0, dtype: object
66 
 Timestamp                  1630352955.434929
Description                             test
Interst/Alarm                              1
Confusion/Understanding                    1
Frusteration/Relief                        1
Sorrow/Joy                                 1
Anger/Gratitude                            1
Fear/Hope                                  1
Boredom/Surprise                           1
Disgust/Desire                             1
Name: 66, dtype: object
76 
 Timestamp                  1630421538.5

[0,
 66,
 76,
 89,
 110,
 139,
 231,
 287,
 340,
 381,
 382,
 408,
 411,
 412,
 414,
 415,
 417,
 418,
 445]

In [194]:
find_NaN_values(misty_data)

(array([], dtype=int64), array([], dtype=int64))

In [195]:
find_empty_values(misty_data)

(array([], dtype=int64), array([], dtype=int64))

In [196]:
find_singleton_descriptions(misty_data)

0 
 1617740862.2616577 
 test
66 
 1630352955.4349294 
 test
95 
 1630515062.8258655 
 confusion
98 
 1630520960.1284945 
 angery
108 
 1630601534.3583376 
 Sadness
121 
 1631134897.8837693 
 confused

122 
 1631134897.8837693 
 confused

123 
 1631134897.8837693 
 confused

124 
 1631134897.8837693 
 confused

125 
 1631134897.8837693 
 confused

126 
 1631134897.8837693 
 confused

139 
 1633113586.5666468 
 f
175 
 1633371901.3289552 
 confusion
177 
 1633372138.351581 
 curiosity 
181 
 1633372445.1713097 
 examination 
185 
 1633372777.6040866 
 awestruk
186 
 1633372844.241636 
 awkward
192 
 1633373363.4952807 
 confusion
194 
 1633373531.5608878 
 confusion
472 
 1635793253.7075171 
 Greeting 
473 
 1635793556.2197733 
 Agreeing 
474 
 1635793657.0961187 
 Welcoming 
475 
 1635793746.7843857 
 Confirming 
477 
 1635794002.9874015 
 Welcoming 


In [197]:
misty_data = drop_row(misty_data, 139)

In [201]:
find_rows_with_description(misty_data, 'test')

[]

In [199]:
misty_data = drop_row(misty_data, 0)

In [200]:
misty_data = drop_row(misty_data, 66)

# Analyze Data

In [210]:
mean_emotion_pairs(misty_data)

Interst/Alarm              2.638710
Confusion/Understanding    2.948387
Frusteration/Relief        2.946237
Sorrow/Joy                 3.068817
Anger/Gratitude            2.982796
Fear/Hope                  3.051613
Boredom/Surprise           3.156989
Disgust/Desire             3.073118
dtype: float64

In [211]:
std_emotion_pairs(misty_data)

Interst/Alarm              1.186584
Confusion/Understanding    1.167780
Frusteration/Relief        0.819556
Sorrow/Joy                 0.921250
Anger/Gratitude            0.838027
Fear/Hope                  0.775099
Boredom/Surprise           1.117590
Disgust/Desire             0.865420
dtype: float64

In [235]:
analyze_descriptions(misty_data)
misty_data

Unnamed: 0,Timestamp,Description,Interst/Alarm,Confusion/Understanding,Frusteration/Relief,Sorrow/Joy,Anger/Gratitude,Fear/Hope,Boredom/Surprise,Disgust/Desire,Description_Tokenized,Description_Count
1,1.617916e+09,confused and asking for clarification almost,3,2,4,3,2,4,1,1,"[confused, and, asking, for, clarification, al...",6
2,1.619713e+09,"His eyes half closed, his head tilted up, his ...",2,4,3,3,4,4,3,5,"[his, eyes, half, closed, ,, his, head, tilted...",25
3,1.619713e+09,"eyes were half open, head tilted up and down s...",3,3,2,3,3,3,1,3,"[eyes, were, half, open, ,, head, tilted, up, ...",15
4,1.619713e+09,"eyes were looking concerned to the left, then ...",3,2,3,3,3,3,4,3,"[eyes, were, looking, concerned, to, the, left...",23
5,1.619713e+09,one arm moved about halfway up twice head tilt...,3,3,2,2,3,3,2,3,"[one, arm, moved, about, halfway, up, twice, h...",20
...,...,...,...,...,...,...,...,...,...,...,...,...
486,1.635871e+09,the robot said uh twice while moving its head ...,3,1,2,3,3,3,4,4,"[the, robot, said, uh, twice, while, moving, i...",12
487,1.635871e+09,the robot move both of its arms up and down an...,3,4,2,1,3,3,3,4,"[the, robot, move, both, of, its, arms, up, an...",26
488,1.635871e+09,The robot moved its left arm forward slightly ...,4,2,1,1,2,2,3,3,"[the, robot, moved, its, left, arm, forward, s...",15
489,1.635871e+09,The robot looked up and to its right and said aw,1,2,3,3,3,4,2,4,"[the, robot, looked, up, and, to, its, right, ...",11


In [213]:
count_total_tokens(misty_data)

7610

In [214]:
average_description_length(misty_data)

16.365591397849464

In [239]:
vocab(misty_data)

{'!',
 '&',
 "'",
 "''",
 "'ah",
 "'d",
 "'ha",
 "'hi",
 "'hm",
 "'hmm",
 "'s",
 "'toy",
 "'um",
 "'ve",
 '(',
 ')',
 ',',
 '.',
 '1-2',
 '10/10',
 '15',
 '2',
 '45',
 '5-star',
 '90',
 ':',
 ';',
 '?',
 '``',
 'a',
 'aaarrhh',
 'aah',
 'about',
 'absolute',
 'accompanied',
 'accusing',
 'action',
 'actually',
 'ad',
 'adequately',
 'adorable',
 'affection',
 'afraid',
 'after',
 'afterward',
 'again',
 'agitated',
 'agreeing',
 'agreement',
 'ah',
 'ah-hah',
 'ahead',
 'alarmed',
 'alert',
 'all',
 'almost',
 'along',
 'also',
 'amount',
 'an',
 'and',
 'anger',
 'angery',
 'angrily',
 'angry',
 'annoyed',
 'annoying',
 'another',
 'anything',
 'apart',
 'apathy',
 'aperture',
 'aperture-eyes',
 'appear',
 'appeared',
 'approval',
 'aquired',
 'are',
 'arm',
 'arms',
 'around',
 'aroung',
 'arrogant',
 'as',
 'asking',
 'asleep',
 'assume',
 'at',
 'attention',
 'attracted',
 'aw',
 'awake',
 'away',
 'awestruk',
 'awkward',
 'aww',
 'baby',
 'back',
 'bad',
 'barely',
 'bashful',
 'b

In [240]:
vocab_size(misty_data)

842