# Polish Wordnet - emotion annotated

In [2]:
import pandas as pd

from src.settings import SENTIMENT_RES_DIR

In [3]:
PLWN_EMO_PATH = SENTIMENT_RES_DIR.joinpath('plwordnet', 'emotion.csv')
PLWN_LU_SYN_MAP_PATH = SENTIMENT_RES_DIR.joinpath('plwordnet', 'lex_syn_id_mapper.csv')

df_emotion = pd.read_csv(
    PLWN_EMO_PATH,
    sep='\t',
    dtype={
        'id': int,
        'lexicalunit_id': int,
        'emotions': str,
        'valuations': str,
        'markedness': str,
        'unitStatus': int,
        'example1': str,
        'example2': str,
        'super_anotation': int,
    }
)
df_emotion

Unnamed: 0,id,lexicalunit_id,emotions,valuations,markedness,unitStatus,example1,example2,owner,creation_date,super_anotation
0,7,22103,,,,0,,,Jan.Brzeziński,2017-03-29 18:04:09,0
1,6362,378876,,,,0,,,Jan.Brzeziński,2017-03-29 18:40:50,0
2,6363,28635,,,,0,,,Jan.Brzeziński,2017-03-29 18:40:59,0
3,9,32394,"smutek,złość,strach","nieużyteczność,krzywda,nieszczęście",- m,1,Za swoje winy został skazany przez sąd na doży...,,Ada.Zajączkowska,2017-03-08 13:51:28,0
4,10,54232,"radość,cieszenie się na coś oczekiwanego","użyteczność,dobro,szczęście",+ s,1,W moim czasie wolnym lubię odpoczywać na dział...,,Ada.Zajączkowska,2017-03-08 14:06:52,0
...,...,...,...,...,...,...,...,...,...,...,...
189853,190897,52911,,,,0,,,Ada.Zajączkowska,2020-05-27 12:03:28,0
189854,190898,52918,,,,0,,,Ada.Zajączkowska,2020-05-27 12:03:43,0
189855,190899,52921,,,,0,,,Ada.Zajączkowska,2020-05-27 12:03:59,0
189856,190900,52938,"smutek,złość","nieużyteczność,krzywda",- s,1,Sprawczyni tego ohydnego czynu pozostała bezka...,,Ada.Zajączkowska,2020-05-27 12:04:30,0


In [4]:
df_emotion.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 189858 entries, 0 to 189857
Data columns (total 11 columns):
 #   Column           Non-Null Count   Dtype 
---  ------           --------------   ----- 
 0   id               189858 non-null  int64 
 1   lexicalunit_id   189858 non-null  int64 
 2   emotions         59287 non-null   object
 3   valuations       62867 non-null   object
 4   markedness       63656 non-null   object
 5   unitStatus       189858 non-null  int64 
 6   example1         63599 non-null   object
 7   example2         10536 non-null   object
 8   owner            189858 non-null  object
 9   creation_date    189858 non-null  object
 10  super_anotation  189858 non-null  int64 
dtypes: int64(4), object(7)
memory usage: 15.9+ MB


In [5]:
df_emotion[df_emotion.duplicated()]

Unnamed: 0,id,lexicalunit_id,emotions,valuations,markedness,unitStatus,example1,example2,owner,creation_date,super_anotation


In [6]:
len(df_emotion.lexicalunit_id.unique())

93362

In [7]:
df_emotion[df_emotion.lexicalunit_id == 73550]

Unnamed: 0,id,lexicalunit_id,emotions,valuations,markedness,unitStatus,example1,example2,owner,creation_date,super_anotation
7,13,73550,,,,0,,,Jan.Brzeziński,2017-03-08 18:14:39,0
2065,2046,73550,,,,0,,,Emilia.Matyka,2017-03-16 10:07:09,0


In [8]:
df_lu_syn = pd.read_csv(PLWN_LU_SYN_MAP_PATH, sep='\t')

df_lu_syn

Unnamed: 0,lex_id,syn_id
0,399,9107
1,4339,442239
2,659668,425000
3,21195,10
4,144142,103562
...,...,...
512566,7060980,7060838
512567,7060981,7060839
512568,7060982,7060840
512569,7060983,76235


In [9]:
df_lu_syn.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 512571 entries, 0 to 512570
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype
---  ------  --------------   -----
 0   lex_id  512571 non-null  int64
 1   syn_id  512571 non-null  int64
dtypes: int64(2)
memory usage: 7.8 MB


In [10]:
df_lu_syn[df_lu_syn.duplicated()]

Unnamed: 0,lex_id,syn_id


In [11]:
print(f'Unique lu ids: {len(df_lu_syn.lex_id.unique())}')
print(f'Unique synset ids: {len(df_lu_syn.syn_id.unique())}')

Unique lu ids: 512571
Unique synset ids: 352659


## merged dataframes

In [12]:
df_emotion = df_emotion.rename(columns={'lexicalunit_id': 'lu_id'})
df_lu_syn = df_lu_syn.rename(columns={'lex_id': 'lu_id'})

df_merged = df_emotion.merge(df_lu_syn, on='lu_id')
df_merged

Unnamed: 0,id,lu_id,emotions,valuations,markedness,unitStatus,example1,example2,owner,creation_date,super_anotation,syn_id
0,7,22103,,,,0,,,Jan.Brzeziński,2017-03-29 18:04:09,0,7653
1,663,22103,zaufanie,,+ s,1,Najnowszych odkryć archeologicznych dokonał ze...,,Emilia.Matyka,2017-03-12 11:19:02,0,7653
2,6362,378876,,,,0,,,Jan.Brzeziński,2017-03-29 18:40:50,0,246307
3,6425,378876,,,,0,,,Emilia.Matyka,2017-03-30 09:17:19,0,246307
4,6363,28635,,,,0,,,Jan.Brzeziński,2017-03-29 18:40:59,0,16086
...,...,...,...,...,...,...,...,...,...,...,...,...
189681,190886,127278,,"nieużyteczność,błąd",- s,1,Żeby renomowany poeta pozwalał sobie na zepsuc...,,Ewa.Kaczmarz,2020-05-27 11:49:17,0,91910
189682,190887,127641,smutek,nieużyteczność,- s,1,"Nie chciałem wracać do tej zapadłej dziury, w ...",,Ewa.Kaczmarz,2020-05-27 11:55:08,0,92184
189683,190888,127846,,"użyteczność,krzywda,nieszczęście",amb,1,Doświadczenie śmierci w najbliższym otoczeniu ...,Takie doświadczenie przez los było dla wielu z...,Ewa.Kaczmarz,2020-05-27 12:00:20,0,92316
189684,190890,127855,,,,0,,,Ewa.Kaczmarz,2020-05-27 12:00:50,0,92323


In [13]:
df_merged = df_merged.sort_values(by=['lu_id', 'id'])
df_merged

Unnamed: 0,id,lu_id,emotions,valuations,markedness,unitStatus,example1,example2,owner,creation_date,super_anotation,syn_id
12310,7031,11,,,,0,,,SentiOne,2017-04-21 14:36:39,0,238698
12311,7032,11,,,,0,,,SentiOne,2017-04-21 14:36:39,0,238698
148741,179528,12,,,,0,,,Kamil.Wabnic,2019-12-05 09:26:04,0,12
148742,179546,12,,,,0,,,Ada.Zajączkowska,2019-12-08 08:46:11,0,12
12312,7033,14,"wstręt,złość","niewiedza,błąd",- s,1,"Jej propozycje są zupełnie abstrakcyjne, nie d...",,SentiOne,2017-03-31 15:35:04,0,103631
...,...,...,...,...,...,...,...,...,...,...,...,...
171653,167346,7080904,,,,0,,,Ada.Zajączkowska,2018-05-23 16:07:01,0,7080579
171654,167542,7080904,,,,0,,,Kamil.Wabnic,2018-05-26 12:21:52,0,7080579
171881,167548,7080942,"radość,smutek,strach","dobro,szczęście,krzywda,nieszczęście",amb,1,Nie mam ochoty po raz kolejny obcierać się o ś...,"Widziałem, że obcieram się o zwycięstwo.",Kamil.Wabnic,2018-05-26 12:32:28,0,225464
171882,178447,7080942,"radość,smutek,złość,strach","użyteczność,szczęście,nieużyteczność,nieszczęście",amb,1,"W tym roku obcieraliśmy się o złoto, więc w pr...","To, co robisz, obciera się o oszustwo - nie ch...",Ada.Zajączkowska,2018-07-17 18:22:10,0,225464


In [14]:
df_merged[df_merged.syn_id == 110]

Unnamed: 0,id,lu_id,emotions,valuations,markedness,unitStatus,example1,example2,owner,creation_date,super_anotation,syn_id


In [27]:
df_syn_id_group = df_merged.groupby('syn_id')

emonames_map = dict()
emovaluations_map = dict()
for name, group in df_syn_id_group:
    emonames_agg = set()
    emovaluations_agg = set()

    for emonames in group['emotions'].values:
        if not pd.isna(emonames) and emonames not in [' ', '', '-']:
            emonames_agg.update(emonames.split(','))

    for emovaluations in group['valuations'].values:
        if not pd.isna(emovaluations) and emovaluations not in [' ', '', '-']:
            emovaluations_agg.update(emovaluations.split(','))

    if emonames_agg:
        emonames_map[str(name)] = list(emonames_agg)
    if emovaluations_agg:
        emovaluations_map[str(name)] = list(emovaluations_agg)

In [28]:
emonames_map

{'18': ['złość', 'strach'],
 '23': ['złość', 'strach', 'smutek', 'wstręt'],
 '24': ['smutek', 'wstręt'],
 '28': ['smutek',
  'radość',
  'zaufanie',
  'złość',
  'cieszenie się na coś oczekiwanego'],
 '45': ['zaskoczenie czymś nieprzewidywanym', 'strach', 'smutek'],
 '51': ['cieszenie się na', 'radość'],
 '52': ['radość'],
 '57': ['złość', 'radość'],
 '59': ['cieszenie się na'],
 '64': ['radość', 'cieszenie się na coś oczekiwanego'],
 '66': ['radość'],
 '67': ['zaufanie'],
 '70': ['wstręt'],
 '79': ['zaufanie'],
 '80': ['złość', 'wstręt'],
 '101': ['zaufanie'],
 '102': ['zaufanie'],
 '105': ['strach',
  'zaskoczenie czymś nieprzewidywanym',
  'radość',
  'smutek',
  'zaufanie',
  'złość',
  'cieszenie się na coś oczekiwanego'],
 '107': ['cieszenie się na coś oczekiwanego', 'złość', 'radość', 'wstręt'],
 '108': ['radość'],
 '124': ['radość', 'cieszenie się na coś oczekiwanego'],
 '127': ['złość'],
 '129': ['smutek', 'złość', 'radość'],
 '132': ['złość', 'zaufanie'],
 '140': ['złość', 'w

## Get mapings from english plWN synsets to polish plWN synsets

In [32]:
EN_PL_PLWN_MAP_PATH = SENTIMENT_RES_DIR.joinpath('plwn_pwn_mappings', 'mapping_plwn_i-links_i-all.txt')

df_en_pl_map = pd.read_csv(EN_PL_PLWN_MAP_PATH, sep='\t')

df_en_pl_map

Unnamed: 0,parent_id,child_id,name
0,100081,356680,Hiper_plWN-PWN
1,100090,323934,Hiper_plWN-PWN
2,100091,284855,Hiper_plWN-PWN
3,100094,359501,Hiper_plWN-PWN
4,100107,290303,Hiper_plWN-PWN
...,...,...,...
473604,7083245,255346,Syn_PWN-plWN
473605,7083246,405543,Syn_PWN-plWN
473606,7087398,433913,Syn_PWN-plWN
473607,7087403,23796,Syn_PWN-plWN


In [33]:
df_en_pl_map.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 473609 entries, 0 to 473608
Data columns (total 3 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   parent_id  473609 non-null  int64 
 1   child_id   473609 non-null  int64 
 2   name       473609 non-null  object
dtypes: int64(2), object(1)
memory usage: 10.8+ MB


In [36]:
df_en_pl_map.name.unique().tolist()

['Hiper_plWN-PWN',
 'Hiper_PWN-plWN',
 'Hipo_plWN-PWN',
 'Hipo_PWN-plWN',
 'międzyjęzykowa_synonimia_częściowa_plWN-PWN',
 'międzyjęzykowa_synonimia_częściowa_PWN-plWN',
 'międzyjęzykowa_synonimia_międzyparadygmatyczna_made_of_plWN-PWN',
 'międzyjęzykowa_synonimia_międzyparadygmatyczna_resembling_plWN-PWN',
 'Syn_plWN-PWN',
 'Syn_PWN-plWN']

In [38]:
plwn_en_emonames_hyper = dict()
plwn_en_emonames_syn = dict()
plwn_en_emonames_hypo = dict()

plwn_en_emovaluations_hyper = dict()
plwn_en_emovaluations_syn = dict()
plwn_en_emovaluations_hypo = dict()

for idx, row in df_en_pl_map.iterrows():
    relation_name = row['name']
    # hyperonyms
    if relation_name == 'Hiper_plWN-PWN':
        plwn_pl_parent_id = str(row['parent_id'])
        plwn_en_child_id = str(row['child_id'])
        if plwn_pl_parent_id in emonames_map:
            plwn_en_emonames_hyper[plwn_en_child_id] = emonames_map[plwn_pl_parent_id]
        if plwn_pl_parent_id in emovaluations_map:
            plwn_en_emovaluations_hyper[plwn_en_child_id] = emovaluations_map[plwn_pl_parent_id]
    if relation_name == 'Hiper_PWN-plWN':
        plwn_en_parent_id = str(row['parent_id'])
        plwn_pl_child_id = str(row['child_id'])
        if plwn_pl_child_id in emonames_map:
            plwn_en_emonames_hyper[plwn_en_parent_id] = emonames_map[plwn_pl_child_id]
        if plwn_pl_child_id in emovaluations_map:
            plwn_en_emovaluations_hyper[plwn_en_parent_id] = emovaluations_map[plwn_pl_child_id]

    # hyponyms
    if relation_name == 'Hipo_plWN-PWN':
        plwn_pl_parent_id = str(row['parent_id'])
        plwn_en_child_id = str(row['child_id'])
        if plwn_pl_parent_id in emonames_map:
            plwn_en_emonames_hypo[plwn_en_child_id] = emonames_map[plwn_pl_parent_id]
        if plwn_pl_parent_id in emovaluations_map:
            plwn_en_emovaluations_hypo[plwn_en_child_id] = emovaluations_map[plwn_pl_parent_id]
    if relation_name == 'Hipo_PWN-plWN':
        plwn_en_parent_id = str(row['parent_id'])
        plwn_pl_child_id = str(row['child_id'])
        if plwn_pl_child_id in emonames_map:
            plwn_en_emonames_hypo[plwn_en_parent_id] = emonames_map[plwn_pl_child_id]
        if plwn_pl_child_id in emovaluations_map:
            plwn_en_emovaluations_hypo[plwn_en_parent_id] = emovaluations_map[plwn_pl_child_id]

    # synonyms
    if relation_name in [
        'Syn_plWN-PWN',
        'międzyjęzykowa_synonimia_częściowa_plWN-PWN',
        'międzyjęzykowa_synonimia_międzyparadygmatyczna_made_of_plWN-PWN'
    ]:
        plwn_pl_parent_id = str(row['parent_id'])
        plwn_en_child_id = str(row['child_id'])
        if plwn_pl_parent_id in emonames_map:
            plwn_en_emonames_syn[plwn_en_child_id] = emonames_map[plwn_pl_parent_id]
        if plwn_pl_parent_id in emovaluations_map:
            plwn_en_emovaluations_syn[plwn_en_child_id] = emovaluations_map[plwn_pl_parent_id]
    if relation_name in [
        'Syn_PWN-plWN',
        'międzyjęzykowa_synonimia_częściowa_PWN-plWN',
        'międzyjęzykowa_synonimia_międzyparadygmatyczna_made_of_PWN-plWN'
    ]:
        plwn_en_parent_id = str(row['parent_id'])
        plwn_pl_child_id = str(row['child_id'])
        if plwn_pl_child_id in emonames_map:
            plwn_en_emonames_syn[plwn_en_parent_id] = emonames_map[plwn_pl_child_id]
        if plwn_pl_child_id in emovaluations_map:
            plwn_en_emovaluations_syn[plwn_en_parent_id] = emovaluations_map[plwn_pl_child_id]

In [39]:
plwn_en_emonames_syn

{'273103': ['wstręt'],
 '272374': ['złość', 'radość'],
 '355895': ['złość', 'smutek'],
 '272888': ['złość'],
 '269308': ['radość'],
 '269448': ['radość', 'cieszenie się na coś oczekiwanego'],
 '318457': ['zaufanie'],
 '358633': ['radość'],
 '361608': ['smutek'],
 '317212': ['smutek', 'wstręt', 'zaufanie', 'radość'],
 '324284': ['smutek', 'radość'],
 '360032': ['smutek'],
 '312042': ['złość'],
 '336552': ['zaufanie'],
 '324438': ['złość', 'smutek'],
 '359738': ['złość', 'smutek'],
 '286912': ['radość'],
 '324305': ['radość'],
 '313074': ['smutek', 'wstręt', 'radość', 'cieszenie się na coś oczekiwanego'],
 '362193': ['zaufanie', 'radość'],
 '313101': ['smutek',
  'radość',
  'zaufanie',
  'wstręt',
  'cieszenie się na coś oczekiwanego'],
 '359752': ['zaufanie', 'radość'],
 '358523': ['smutek', 'strach', 'radość'],
 '339784': ['złość', 'wstręt'],
 '311308': ['złość', 'cieszenie się na'],
 '315308': ['złość', 'wstręt'],
 '267128': ['strach', 'radość'],
 '267337': ['złość', 'strach', 'smute

In [47]:
def get_plwn_pwn_mapping() -> dict:
    MAPPING_PWN_PLWN_PATH = SENTIMENT_RES_DIR.joinpath('plwn_pwn_mappings', 'pwn30-plwn32.txt')
    map_lines = MAPPING_PWN_PLWN_PATH.open(mode='r').readlines()
    plwn_pwn_mapping = dict()
    for line in map_lines:
        pwn_syn, plwn_syn = line.split('\t')
        plwn_syn = str(int(plwn_syn))
        plwn_pwn_mapping[plwn_syn] = pwn_syn

    return plwn_pwn_mapping


plwn_pwn_mapping = get_plwn_pwn_mapping()

In [48]:
plwn_pwn_mapping

{'262989': '00001740-a',
 '284799': '00001740-n',
 '281174': '00001740-r',
 '366991': '00001740-v',
 '281175': '00001837-r',
 '284800': '00001930-n',
 '281176': '00001981-r',
 '262990': '00002098-a',
 '284801': '00002137-n',
 '281177': '00002142-r',
 '281178': '00002296-r',
 '262991': '00002312-a',
 '366992': '00002325-v',
 '281179': '00002436-r',
 '284802': '00002452-n',
 '262992': '00002527-a',
 '366993': '00002573-v',
 '281180': '00002621-r',
 '284803': '00002684-n',
 '366994': '00002724-v',
 '262993': '00002730-a',
 '262994': '00002843-a',
 '366995': '00002942-v',
 '283242': '00002950-r',
 '262995': '00002956-a',
 '281183': '00003093-r',
 '262996': '00003131-a',
 '366996': '00003133-v',
 '281184': '00003294-r',
 '366997': '00003316-v',
 '262997': '00003356-a',
 '281185': '00003380-r',
 '366998': '00003431-v',
 '281186': '00003483-r',
 '262998': '00003553-a',
 '284804': '00003553-n',
 '366999': '00003662-v',
 '262999': '00003700-a',
 '281187': '00003771-r',
 '367000': '00003826-v',


In [73]:
print(f'Mapping len: {len(plwn_pwn_mapping.values())}')
print(f'Unique PWN labels in mapping: {len(set(plwn_pwn_mapping.values()))}')

Mapping len: 117176
Unique PWN labels in mapping: 117034


In [95]:
from pathlib import Path
from collections import defaultdict


def get_pwn_mappings(values_mapper: dict, plwn_pwn_mapping: dict[str, str], savepath: Path) -> None:
    pwn_values = defaultdict(set)
    for key, values in values_mapper.items():
        if key in plwn_pwn_mapping:
            pwn_id = plwn_pwn_mapping[key]
            pwn_values[pwn_id].update(values)

    data = {'pwn_id': pwn_values.keys(), 'values': pwn_values.values()}
    df = pd.DataFrame.from_dict(data)
    df = df.sort_values(by=['pwn_id'])
    df['values'] = df['values'].apply(lambda x: list(x))

    assert len(df) == len(df['pwn_id'].unique())
    df.to_csv(savepath, sep='\t', index=False, header=False)


In [96]:
# save mapping filenames for EmoPlWN
HYPERNYMY_EMONAMES_PATH = SENTIMENT_RES_DIR.joinpath('plwn_pwn_mappings', 'pwn-plwn_hypernymy_emonames.txt')
SYNONYMY_EMONAMES_PATH = SENTIMENT_RES_DIR.joinpath('plwn_pwn_mappings', 'pwn-plwn_synonymy_emonames.txt')
HYPONYMY_EMONAMES_PATH = SENTIMENT_RES_DIR.joinpath('plwn_pwn_mappings', 'pwn-plwn_hyponymy_emonames.txt')

HYPERNYMY_EMOVALUATIONS_PATH = SENTIMENT_RES_DIR.joinpath('plwn_pwn_mappings', 'pwn-plwn_hypernymy_emovaluations.txt')
SYNONYMY_EMOVALUATIONS_PATH = SENTIMENT_RES_DIR.joinpath('plwn_pwn_mappings', 'pwn-plwn_synonymy_emovaluations.txt')
HYPONYMY_EMOVALUATIONS_PATH = SENTIMENT_RES_DIR.joinpath('plwn_pwn_mappings', 'pwn-plwn_hyponymy_emovaluations.txt')

Save emotion mappings files

In [97]:
get_pwn_mappings(
    values_mapper=plwn_en_emonames_hyper,
    plwn_pwn_mapping=plwn_pwn_mapping,
    savepath=HYPERNYMY_EMONAMES_PATH,
)
get_pwn_mappings(
    values_mapper=plwn_en_emonames_syn,
    plwn_pwn_mapping=plwn_pwn_mapping,
    savepath=SYNONYMY_EMONAMES_PATH,
)
get_pwn_mappings(
    values_mapper=plwn_en_emonames_hypo,
    plwn_pwn_mapping=plwn_pwn_mapping,
    savepath=HYPONYMY_EMONAMES_PATH,
)

get_pwn_mappings(
    values_mapper=plwn_en_emovaluations_hyper,
    plwn_pwn_mapping=plwn_pwn_mapping,
    savepath=HYPERNYMY_EMOVALUATIONS_PATH,
)
get_pwn_mappings(
    values_mapper=plwn_en_emovaluations_syn,
    plwn_pwn_mapping=plwn_pwn_mapping,
    savepath=SYNONYMY_EMOVALUATIONS_PATH,
)
get_pwn_mappings(
    values_mapper=plwn_en_emovaluations_hypo,
    plwn_pwn_mapping=plwn_pwn_mapping,
    savepath=HYPONYMY_EMOVALUATIONS_PATH,
)