## Imports

In [1]:
import os
import glob
import pickle
import numpy as np
import pandas as pd
import altair as alt
import seaborn as sns
import matplotlib.pyplot as plt

dirname = 'Task2_due_April-30'

## Files

In [2]:
files = glob.glob(os.path.join(dirname, '**'), recursive=True)
files = sorted(files[1:], key=lambda x: os.path.splitext(x)[1])
files = (files[7],files[9])
for file in files:
    print(file[len(dirname)+1:])

task_2_annotations_82d1d6d1093eaab6_e330cbf_pandas.pkl
task_2_features_1d8b658c21ddc127_e330cbf_pandas.pkl


In [3]:
pandas_annot = pd.read_pickle(f'{files[0]}')
pandas_annot_df = pd.DataFrame.from_dict(pandas_annot)
# pandas_annot_df.head()

In [4]:
pandas_features = pd.read_pickle(f'{files[1]}')
pandas_feature_df = pd.DataFrame.from_dict(pandas_features)
# pandas_feature_df.head()

In [5]:
# add class labels
def label_race(row):
   if row['valence'] <= 0 and row['arousal'] > 3:
        return 'angry'
   elif row['valence'] > 0 and row['arousal'] >= 3:
       return 'happy'
   elif row['valence'] < 0 and row['arousal'] <= 3:
       return 'sad'
   elif row['valence'] >= 0 and row['arousal'] < 3:
       return 'relaxed'

pandas_annot_df['class'] = pandas_annot_df.apply(lambda row: label_race(row), axis=1)
pandas_annot_df.head()

Unnamed: 0,pianist_id,segment_id,annotator_id,arousal,valence,gems_wonder,gems_transcendence,gems_tenderness,gems_nostalgia,gems_peacefulness,gems_power,gems_joyful_activation,gems_tension,gems_sadness,gemmes_flow,gemmes_movement,gemmes_force,gemmes_interior,gemmes_wandering,class
0,1,0,91,1,-1,2,1,2,4,2.0,1,1,1,2,3,2,1,1.0,2,sad
31,1,0,19,2,-1,3,3,3,4,4.0,1,2,3,3,3,2,2,3.0,3,sad
62,1,0,189,2,0,2,1,2,1,4.0,2,2,1,1,3,2,1,1.0,4,relaxed
93,1,0,126,2,2,4,5,2,3,5.0,2,4,1,3,5,1,2,2.0,5,relaxed
124,1,0,26,4,2,3,5,2,3,3.0,1,3,4,1,4,1,2,3.0,1,happy


## Question 1
How consistent are the emotion annotations? Do different annotators agree in their ratings of the same excerpt?

In [6]:
pandas_annot_df.describe()

Unnamed: 0,pianist_id,segment_id,annotator_id,arousal,valence,gems_wonder,gems_transcendence,gems_tenderness,gems_nostalgia,gems_peacefulness,gems_power,gems_joyful_activation,gems_tension,gems_sadness,gemmes_flow,gemmes_movement,gemmes_force,gemmes_interior,gemmes_wandering
count,2638.0,2638.0,2638.0,2638.0,2638.0,2638.0,2638.0,2638.0,2638.0,2637.0,2638.0,2638.0,2638.0,2638.0,2638.0,2638.0,2638.0,2637.0,2638.0
mean,6.037149,13.0,97.071645,2.929492,0.020849,2.90144,2.673616,2.366187,2.723275,2.395525,2.313495,2.276725,2.596664,2.148218,2.968537,2.561031,2.43442,2.690178,2.496209
std,3.179759,7.806583,56.340712,1.184031,1.099842,1.267184,1.235916,1.297041,1.371744,1.367916,1.23219,1.239287,1.412181,1.177312,1.329116,1.261079,1.264754,1.25894,1.279556
min,1.0,0.0,0.0,1.0,-2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,3.0,6.0,48.0,2.0,-1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0
50%,6.0,13.0,97.0,3.0,0.0,3.0,3.0,2.0,3.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,3.0,2.0
75%,9.0,20.0,146.0,4.0,1.0,4.0,4.0,3.0,4.0,4.0,3.0,3.0,4.0,3.0,4.0,4.0,3.0,4.0,3.0
max,11.0,26.0,194.0,5.0,2.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0


In [7]:
alt.Chart(pandas_annot_df).mark_circle(size=60, opacity=0.6).encode(
    x='annotator_id',
    y='segment_id',
    color='class',
    tooltip=['segment_id']
).properties(
    width=800,
    height=400
)

Because the annotators were given two different sets of segments, I seperated them into two plots.

In [8]:
df1 = pandas_annot_df[pandas_annot_df['segment_id']%2==0]
chart1 = alt.Chart(df1).mark_circle(size=60, opacity=0.6).encode(
    x='annotator_id',
    y='segment_id',
    color='class',
    tooltip=['class']
).properties(
    width=400,
    height=300
)

df2 = pandas_annot_df[pandas_annot_df['segment_id']%2!=0]
chart2 = alt.Chart(df2).mark_circle(size=60, opacity=0.6).encode(
    x='annotator_id',
    y='segment_id',
    color='class',
    tooltip=['class']
).properties(
    width=400,
    height=300
)

chart1 | chart2

Still not much is visible. Although tendencies are identifiable.

In [9]:
# Group by segment ID
grouped_df = pandas_annot_df.groupby('segment_id')
aggregated = pd.DataFrame(data={'Segment ID': [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26]})

In [10]:
# add each segment's mean to 'aggregated'
def add_mean(col, name):
    grouped_lists = grouped_df[f'{col}'].apply(list)
    temp = []
    for i in grouped_lists:
        mean = np.mean(i)
        temp.append(mean)
    
    aggregated[name] = temp
    
for _ in list(pandas_annot_df)[3:-1]:
    add_mean(_, f'Mean {_}')

In [11]:
aggregated

Unnamed: 0,Segment ID,Mean arousal,Mean valence,Mean gems_wonder,Mean gems_transcendence,Mean gems_tenderness,Mean gems_nostalgia,Mean gems_peacefulness,Mean gems_power,Mean gems_joyful_activation,Mean gems_tension,Mean gems_sadness,Mean gemmes_flow,Mean gemmes_movement,Mean gemmes_force,Mean gemmes_interior,Mean gemmes_wandering
0,0,2.495146,-0.15534,2.990291,2.553398,2.679612,3.495146,3.242718,1.747573,1.902913,1.941748,2.621359,3.68932,1.970874,1.796117,3.242718,2.990291
1,1,3.217391,0.152174,3.293478,3.119565,2.554348,3.054348,,2.293478,2.423913,2.445652,2.0,3.445652,2.782609,2.5,2.619565,2.652174
2,2,3.262136,0.126214,3.029126,2.68932,1.805825,2.029126,1.718447,2.699029,3.135922,3.019417,1.533981,2.572816,3.359223,2.699029,2.165049,2.427184
3,3,3.141304,-0.304348,2.326087,2.521739,1.51087,1.967391,1.434783,2.858696,2.206522,3.380435,2.293478,2.130435,2.771739,2.891304,2.391304,1.945652
4,4,3.174757,0.029126,3.398058,2.990291,2.902913,3.23301,2.679612,2.436893,1.854369,2.038835,2.456311,3.495146,1.951456,2.669903,3.097087,2.92233
5,5,2.456522,0.684783,3.152174,2.663043,2.673913,2.641304,2.934783,2.26087,3.119565,1.652174,1.478261,3.282609,2.826087,2.184783,2.5,3.0
6,6,2.417476,0.631068,3.116505,2.563107,3.893204,3.747573,3.932039,1.398058,2.048544,1.31068,2.281553,4.116505,1.815534,1.601942,3.252427,3.291262
7,7,3.065217,0.184783,3.293478,2.858696,2.836957,3.206522,2.369565,2.48913,2.402174,2.228261,2.152174,3.23913,2.619565,2.543478,2.771739,2.597826
8,8,3.291262,0.834951,3.106796,2.834951,2.184466,1.84466,1.932039,2.621359,3.378641,2.553398,1.320388,2.621359,3.485437,2.427184,2.058252,2.291262
9,9,2.619565,0.163043,3.271739,2.728261,2.934783,3.228261,2.5,1.782609,2.108696,2.445652,2.347826,3.23913,2.293478,2.01087,2.76087,2.619565
