# Annotation of data

In [6]:
import pandas as pd
import matplotlib.pyplot as plt

In [29]:
df = pd.read_csv("results.csv")

In [30]:
cx = pd.read_csv("contexts.csv")

In [31]:
df = df.drop(["source", "is_filler"], axis = 1)
df.head()

Unnamed: 0,participant,cs,word,group,contexts
0,277a058c-6372-4840-8b1d-2ac2bac3d5e1,36,наглый,proximal_proximal_meton,243 244
1,277a058c-6372-4840-8b1d-2ac2bac3d5e1,36,наглый,proximal_proximal_meton,245 246
2,277a058c-6372-4840-8b1d-2ac2bac3d5e1,36,наглый,proximal_proximal_meton,248 249 247
3,277a058c-6372-4840-8b1d-2ac2bac3d5e1,37,внимательный,proximal_proximal_meton,250 252
4,277a058c-6372-4840-8b1d-2ac2bac3d5e1,37,внимательный,proximal_proximal_meton,256 251 254


In [32]:
d = dict(zip(cx.id,cx.derivation)) #dictionary with key - stimulusID : value - derivation

In [33]:
d_sent = dict(zip(cx.id,cx.text))

In [34]:
def context_split(basket):
    basket_split =  [word.split() for word in basket]
    new_basket = [[int(stimulus) for stimulus in stimuli] for stimuli in basket_split]
    return new_basket

df.contexts = context_split(df.contexts)

In [35]:
df.head(2) # now column context is a list of integers, not a string 

Unnamed: 0,participant,cs,word,group,contexts
0,277a058c-6372-4840-8b1d-2ac2bac3d5e1,36,наглый,proximal_proximal_meton,"[243, 244]"
1,277a058c-6372-4840-8b1d-2ac2bac3d5e1,36,наглый,proximal_proximal_meton,"[245, 246]"


In [36]:
cx.head()

Unnamed: 0,id,cs,derivation,text
0,1,1,metonymy,У него были маленькие злые глазки.
1,9,2,homonym1,Повышение по службе дало надежды на карьерный ...
2,18,3,literal,В этом году английский у нас преподает нервна...
3,25,4,homonym1,Наша задача ― посеять на лугах и пастбищах нов...
4,32,5,literal,"В ходе расследования выяснилось, что водитель ..."


In [37]:
labels = [[d[stimulus] for stimulus in stimuli] for stimuli in df.contexts]

In [38]:
pd_labels = pd.Series(labels)

In [39]:
df["labels"] = pd_labels

One line = one basket. The column *labels* contains the array tht represents a basket with the word sences 

In [40]:
df.labels = [[sense.strip() for sense in senses] for senses in df.labels]

In [41]:
df.head(20) 

Unnamed: 0,participant,cs,word,group,contexts,labels
0,277a058c-6372-4840-8b1d-2ac2bac3d5e1,36,наглый,proximal_proximal_meton,"[243, 244]","[literal, literal]"
1,277a058c-6372-4840-8b1d-2ac2bac3d5e1,36,наглый,proximal_proximal_meton,"[245, 246]","[proximal_metonymy_1, proximal_metonymy_1]"
2,277a058c-6372-4840-8b1d-2ac2bac3d5e1,36,наглый,proximal_proximal_meton,"[248, 249, 247]","[proximal_proximal_metonymy_2, proximal_proxim..."
3,277a058c-6372-4840-8b1d-2ac2bac3d5e1,37,внимательный,proximal_proximal_meton,"[250, 252]","[proximal_metonymy_1, proximal_metonymy_1]"
4,277a058c-6372-4840-8b1d-2ac2bac3d5e1,37,внимательный,proximal_proximal_meton,"[256, 251, 254]","[proximal_proximal_metonymy_2, literal, literal]"
5,277a058c-6372-4840-8b1d-2ac2bac3d5e1,37,внимательный,proximal_proximal_meton,"[253, 255]","[proximal_proximal_metonymy_2, proximal_proxim..."
6,277a058c-6372-4840-8b1d-2ac2bac3d5e1,37,внимательный,proximal_proximal_meton,[257],[proximal_proximal_metonymy_2]
7,277a058c-6372-4840-8b1d-2ac2bac3d5e1,6,строгий,proximal_meton,"[40, 39]","[literal, literal]"
8,277a058c-6372-4840-8b1d-2ac2bac3d5e1,6,строгий,proximal_meton,"[42, 43]","[metaphor, metaphor]"
9,277a058c-6372-4840-8b1d-2ac2bac3d5e1,6,строгий,proximal_meton,"[41, 44]","[metonymy, metonymy]"


The function that annotates the types of baskets:
    1. Clean baskets, where all words belong to the same sence: e.g., [proximal_metonymy, proximal_metonymy]
    2. Baskets with only one stimulus: e.g. [literal] or [proximal_metonymy]
    3. Mixed baskets where the words belong to at least two different sences: e.g. [literal, literal, proximal_metonymy] or [metphor, metaphor, literal, proximal_metonymy] or [metaphor, literal]
        

In [42]:
arr = []
for basket in df.labels:
    if (len(basket)>1 and (len(set(basket))==1)):
        arr.append("pure") # clean baskets, where all words belong to the same sence 
    elif len(basket) == 1:
        arr.append("one stimulus") # baskets with only one stimulus
    else:
        arr.append("mixed") # mixed baskets where the words belong to different sences

In [43]:
df["basket_type"] = arr

In [44]:
df.to_csv('output.csv')