In [112]:
import pandas as pd
import numpy as np

import ast

from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from imblearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, cohen_kappa_score, balanced_accuracy_score, precision_score, f1_score
from scipy.special import comb
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings("ignore")

## Import the data

In [116]:
raw_data = pd.read_csv('./translated_raw.csv', index_col=None)
base_feature = ['nrdirhypers_x',
                'nrhypos_x',
                'nrpartrels_normalised_x',
                'depthfromtopsynset_normalised_x',
                'glosslength_normalised_x',
                'minwordlength_x',
                'nroflemmas_x',
                'polyscore_max_x']
target = ['vote_x']

raw_data

Unnamed: 0,Synsets,domain_x,norm,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,polyscore_max_x,vote_x,raw_translation,raw_translation_lemmas
0,Synset('adjustable_wrench.n.01'),tool,adjustable_wrench,1,7,0.0,1.012903,0.563173,17,2,1,nb,adjust change change hold hold hold screw tool,['adjust change change hold hold hold screw to...
1,Synset('allen_wrench.n.01'),tool,allen_wrench,1,0,0.0,1.012903,0.391092,12,1,1,nb,break hold hold hold metal screw tool,['break hold hold hold metal screw tool ']
2,Synset('alligator_wrench.n.01'),tool,alligator_wrench,1,0,0.0,1.012903,1.517437,16,1,1,nb,animal claw claw claw chew chew chew eat teeth...,['animal claw claw claw chew chew chew eat tee...
3,Synset('awl.n.01'),tool,awl,1,2,15.7,0.911613,0.985552,3,1,1,b,cloth cloth hand knit knit wool,['cloth cloth hand knit knit wool ']
4,Synset('backsaw.n.01'),tool,backsaw,1,0,0.0,1.114194,1.110701,7,2,1,nb,cut blade blade blade cut edge edge hand heave...,['cut blade blade blade cut edge edge hand hea...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,Synset('ballet_skirt.n.01'),garm,ballet_skirt,1,0,0.0,0.947552,0.578283,4,2,2,nb,act ballet ballet ballet dance dance dance dre...,['act ballet ballet ballet dance dance dance d...
835,Synset('mess_jacket.n.01'),garm,mess_jacket,1,0,0.0,1.158120,1.652238,11,3,1,nb,clean cloth cloth cloth cover mess mess,"['clean cloth cloth cloth cover mess mess ', '..."
836,Synset('long_johns.n.01'),garm,long_johns,1,0,0.0,1.052836,0.479149,10,1,1,nb,bend cloth cloth cloth cover cloth cloth comfo...,['bend cloth cloth cloth cover cloth cloth com...
837,Synset('undies.n.01'),garm,undies,1,0,0.0,1.158120,0.280880,6,1,1,nb,cloth cloth cloth cover cloth cover comfort co...,['cloth cloth cloth cover cloth cover comfort ...


## Feature Engineering

In [117]:
## Translation set
translation_data = raw_data.copy()
translation_data['translation_set'] = translation_data['raw_translation'].apply(lambda translation_list: list(set(translation_list.split())))


## Number, length
translation_data['number_tranlsation'] = translation_data['raw_translation'].apply(lambda translation_list: len(translation_list.split()))
translation_data['number_sense'] = translation_data['translation_set'].apply(lambda sense_list: len(sense_list))

translation_data

Unnamed: 0,Synsets,domain_x,norm,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,polyscore_max_x,vote_x,raw_translation,raw_translation_lemmas,translation_set,number_tranlsation,number_sense
0,Synset('adjustable_wrench.n.01'),tool,adjustable_wrench,1,7,0.0,1.012903,0.563173,17,2,1,nb,adjust change change hold hold hold screw tool,['adjust change change hold hold hold screw to...,"[adjust, hold, change, screw, tool]",8,5
1,Synset('allen_wrench.n.01'),tool,allen_wrench,1,0,0.0,1.012903,0.391092,12,1,1,nb,break hold hold hold metal screw tool,['break hold hold hold metal screw tool '],"[break, hold, metal, screw, tool]",7,5
2,Synset('alligator_wrench.n.01'),tool,alligator_wrench,1,0,0.0,1.012903,1.517437,16,1,1,nb,animal claw claw claw chew chew chew eat teeth...,['animal claw claw claw chew chew chew eat tee...,"[claw, chew, eat, animal, fin, teeth]",15,6
3,Synset('awl.n.01'),tool,awl,1,2,15.7,0.911613,0.985552,3,1,1,b,cloth cloth hand knit knit wool,['cloth cloth hand knit knit wool '],"[hand, cloth, knit, wool]",6,4
4,Synset('backsaw.n.01'),tool,backsaw,1,0,0.0,1.114194,1.110701,7,2,1,nb,cut blade blade blade cut edge edge hand heave...,['cut blade blade blade cut edge edge hand hea...,"[edge, metal, hand, cut, blade, heave, large]",13,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,Synset('ballet_skirt.n.01'),garm,ballet_skirt,1,0,0.0,0.947552,0.578283,4,2,2,nb,act ballet ballet ballet dance dance dance dre...,['act ballet ballet ballet dance dance dance d...,"[dance, act, long, dress, skirt, ballet, leg, ...",13,8
835,Synset('mess_jacket.n.01'),garm,mess_jacket,1,0,0.0,1.158120,1.652238,11,3,1,nb,clean cloth cloth cloth cover mess mess,"['clean cloth cloth cloth cover mess mess ', '...","[cloth, cover, clean, mess]",7,4
836,Synset('long_johns.n.01'),garm,long_johns,1,0,0.0,1.052836,0.479149,10,1,1,nb,bend cloth cloth cloth cover cloth cloth comfo...,['bend cloth cloth cloth cover cloth cloth com...,"[cotton, long, comfort, cloth, bend, material,...",14,7
837,Synset('undies.n.01'),garm,undies,1,0,0.0,1.158120,0.280880,6,1,1,nb,cloth cloth cloth cover cloth cover comfort co...,['cloth cloth cloth cover cloth cover comfort ...,"[pants, cotton, comfort, cloth, material, cove...",14,7


In [118]:
## lemmas
def flattenTranslation(dataframe):
    raw = dataframe['raw_translation_lemmas']
    listTranslation = ast.literal_eval(raw)
    processed = ''
    for s in listTranslation:
        processed += s
    return processed
translation_data['raw_translation_lemmas'] = translation_data.apply(flattenTranslation, axis=1)

translation_data

Unnamed: 0,Synsets,domain_x,norm,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,polyscore_max_x,vote_x,raw_translation,raw_translation_lemmas,translation_set,number_tranlsation,number_sense
0,Synset('adjustable_wrench.n.01'),tool,adjustable_wrench,1,7,0.0,1.012903,0.563173,17,2,1,nb,adjust change change hold hold hold screw tool,adjust change change hold hold hold screw tool...,"[adjust, hold, change, screw, tool]",8,5
1,Synset('allen_wrench.n.01'),tool,allen_wrench,1,0,0.0,1.012903,0.391092,12,1,1,nb,break hold hold hold metal screw tool,break hold hold hold metal screw tool,"[break, hold, metal, screw, tool]",7,5
2,Synset('alligator_wrench.n.01'),tool,alligator_wrench,1,0,0.0,1.012903,1.517437,16,1,1,nb,animal claw claw claw chew chew chew eat teeth...,animal claw claw claw chew chew chew eat teeth...,"[claw, chew, eat, animal, fin, teeth]",15,6
3,Synset('awl.n.01'),tool,awl,1,2,15.7,0.911613,0.985552,3,1,1,b,cloth cloth hand knit knit wool,cloth cloth hand knit knit wool,"[hand, cloth, knit, wool]",6,4
4,Synset('backsaw.n.01'),tool,backsaw,1,0,0.0,1.114194,1.110701,7,2,1,nb,cut blade blade blade cut edge edge hand heave...,cut blade blade blade cut edge edge hand heave...,"[edge, metal, hand, cut, blade, heave, large]",13,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,Synset('ballet_skirt.n.01'),garm,ballet_skirt,1,0,0.0,0.947552,0.578283,4,2,2,nb,act ballet ballet ballet dance dance dance dre...,act ballet ballet ballet dance dance dance dre...,"[dance, act, long, dress, skirt, ballet, leg, ...",13,8
835,Synset('mess_jacket.n.01'),garm,mess_jacket,1,0,0.0,1.158120,1.652238,11,3,1,nb,clean cloth cloth cloth cover mess mess,clean cloth cloth cloth cover mess mess animal...,"[cloth, cover, clean, mess]",7,4
836,Synset('long_johns.n.01'),garm,long_johns,1,0,0.0,1.052836,0.479149,10,1,1,nb,bend cloth cloth cloth cover cloth cloth comfo...,bend cloth cloth cloth cover cloth cloth comfo...,"[cotton, long, comfort, cloth, bend, material,...",14,7
837,Synset('undies.n.01'),garm,undies,1,0,0.0,1.158120,0.280880,6,1,1,nb,cloth cloth cloth cover cloth cover comfort co...,cloth cloth cloth cover cloth cover comfort co...,"[pants, cotton, comfort, cloth, material, cove...",14,7


In [119]:
## Translation set
translation_data['translation_lemmas_set'] = translation_data['raw_translation_lemmas'].apply(lambda translation_list: list(set(translation_list.split())))

## Number, length
translation_data['number_tranlsation_lemmas'] = translation_data['raw_translation_lemmas'].apply(lambda translation_list: len(translation_list.split()))
translation_data['number_sense_lemmas'] = translation_data['translation_lemmas_set'].apply(lambda sense_list: len(sense_list))

translation_data

Unnamed: 0,Synsets,domain_x,norm,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,polyscore_max_x,vote_x,raw_translation,raw_translation_lemmas,translation_set,number_tranlsation,number_sense,translation_lemmas_set,number_tranlsation_lemmas,number_sense_lemmas
0,Synset('adjustable_wrench.n.01'),tool,adjustable_wrench,1,7,0.0,1.012903,0.563173,17,2,1,nb,adjust change change hold hold hold screw tool,adjust change change hold hold hold screw tool...,"[adjust, hold, change, screw, tool]",8,5,"[adjust, hold, rotate, angle, change, screw, t...",15,7
1,Synset('allen_wrench.n.01'),tool,allen_wrench,1,0,0.0,1.012903,0.391092,12,1,1,nb,break hold hold hold metal screw tool,break hold hold hold metal screw tool,"[break, hold, metal, screw, tool]",7,5,"[break, hold, metal, screw, tool]",7,5
2,Synset('alligator_wrench.n.01'),tool,alligator_wrench,1,0,0.0,1.012903,1.517437,16,1,1,nb,animal claw claw claw chew chew chew eat teeth...,animal claw claw claw chew chew chew eat teeth...,"[claw, chew, eat, animal, fin, teeth]",15,6,"[claw, chew, eat, animal, fin, teeth]",15,6
3,Synset('awl.n.01'),tool,awl,1,2,15.7,0.911613,0.985552,3,1,1,b,cloth cloth hand knit knit wool,cloth cloth hand knit knit wool,"[hand, cloth, knit, wool]",6,4,"[hand, cloth, knit, wool]",6,4
4,Synset('backsaw.n.01'),tool,backsaw,1,0,0.0,1.114194,1.110701,7,2,1,nb,cut blade blade blade cut edge edge hand heave...,cut blade blade blade cut edge edge hand heave...,"[edge, metal, hand, cut, blade, heave, large]",13,7,"[edge, metal, hand, cut, metalback, blade, hea...",19,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,Synset('ballet_skirt.n.01'),garm,ballet_skirt,1,0,0.0,0.947552,0.578283,4,2,2,nb,act ballet ballet ballet dance dance dance dre...,act ballet ballet ballet dance dance dance dre...,"[dance, act, long, dress, skirt, ballet, leg, ...",13,8,"[dance, educate, act, long, dress, skirt, ball...",17,10
835,Synset('mess_jacket.n.01'),garm,mess_jacket,1,0,0.0,1.158120,1.652238,11,3,1,nb,clean cloth cloth cloth cover mess mess,clean cloth cloth cloth cover mess mess animal...,"[cloth, cover, clean, mess]",7,4,"[fur, winter, hood, color, black, wear, cloth,...",34,14
836,Synset('long_johns.n.01'),garm,long_johns,1,0,0.0,1.052836,0.479149,10,1,1,nb,bend cloth cloth cloth cover cloth cloth comfo...,bend cloth cloth cloth cover cloth cloth comfo...,"[cotton, long, comfort, cloth, bend, material,...",14,7,"[cotton, long, comfort, cloth, bend, material,...",14,7
837,Synset('undies.n.01'),garm,undies,1,0,0.0,1.158120,0.280880,6,1,1,nb,cloth cloth cloth cover cloth cover comfort co...,cloth cloth cloth cover cloth cover comfort co...,"[pants, cotton, comfort, cloth, material, cove...",14,7,"[pants, cotton, comfort, cloth, material, cove...",14,7


## Cue validity

In [107]:
## cue set
def cueSet(dataframe):
    translation_basic_level = dataframe.loc[dataframe['vote_x']=='b']['translation_lemmas_set']
    translation_basic_level = translation_basic_level.reset_index()['translation_lemmas_set']
    cue_set = set()
    for trans in translation_basic_level:
        cue_set = cue_set | set(trans)
    cue_len = len(cue_set)

    translation_non_basic_level = dataframe.loc[dataframe['vote_x']=='nb']['translation_lemmas_set']
    translation_non_basic_level = translation_non_basic_level.reset_index()['translation_lemmas_set']
    non_basic_level_cue_set = set()
    for trans in translation_non_basic_level:
        non_basic_level_cue_set = non_basic_level_cue_set | set(trans)
    non_cue_len = len(non_basic_level_cue_set)

    basic_level_and_cue = cue_set - non_basic_level_cue_set
    basic_level_and_cue_len = len(basic_level_and_cue)
    total_len = basic_level_and_cue_len + non_cue_len
    return cue_set, cue_len, basic_level_and_cue, basic_level_and_cue_len, total_len

In [110]:
## Cue validity while training and testing
def calCV(feature_list):
    global cue_set, cue_len, basic_level_and_cue, basic_level_and_cue_len, total_len
    acc_cv = 0
    bl_cue = 0
    cue = 0
    for feature in feature_list:
        if feature in basic_level_and_cue:
            bl_cue += 1
        if feature in cue_set:
            cue += 1
    if bl_cue != 0:
        p_bl_cue = comb(basic_level_and_cue_len, bl_cue)
        p_cue = comb(cue_len, cue)
        acc_cv = p_bl_cue/p_cue
    return acc_cv


In [114]:
translation_data['cv'] = translation_data['translation_lemmas_set'].apply(lambda l: calCV(l))
normalize_scaler = MinMaxScaler()
translation_data['cv'] = normalize_scaler.fit_transform(translation_data[['cv']])

translation_data

Unnamed: 0,Synsets,domain_x,norm,nrdirhypers_x,nrhypos_x,nrpartrels_normalised_x,depthfromtopsynset_normalised_x,glosslength_normalised_x,minwordlength_x,nroflemmas_x,...,vote_x,raw_translation,raw_translation_lemmas,translation_set,number_tranlsation,number_sense,translation_lemmas_set,number_tranlsation_lemmas,number_sense_lemmas,cv
0,Synset('adjustable_wrench.n.01'),tool,adjustable_wrench,1,7,0.0,1.012903,0.563173,17,2,...,nb,adjust change change hold hold hold screw tool,adjust change change hold hold hold screw tool...,"[adjust, hold, change, screw, tool]",8,5,"[adjust, hold, rotate, angle, change, screw, t...",15,7,0.0
1,Synset('allen_wrench.n.01'),tool,allen_wrench,1,0,0.0,1.012903,0.391092,12,1,...,nb,break hold hold hold metal screw tool,break hold hold hold metal screw tool,"[break, hold, metal, screw, tool]",7,5,"[break, hold, metal, screw, tool]",7,5,0.0
2,Synset('alligator_wrench.n.01'),tool,alligator_wrench,1,0,0.0,1.012903,1.517437,16,1,...,nb,animal claw claw claw chew chew chew eat teeth...,animal claw claw claw chew chew chew eat teeth...,"[claw, chew, eat, animal, fin, teeth]",15,6,"[claw, chew, eat, animal, fin, teeth]",15,6,0.0
3,Synset('awl.n.01'),tool,awl,1,2,15.7,0.911613,0.985552,3,1,...,b,cloth cloth hand knit knit wool,cloth cloth hand knit knit wool,"[hand, cloth, knit, wool]",6,4,"[hand, cloth, knit, wool]",6,4,0.0
4,Synset('backsaw.n.01'),tool,backsaw,1,0,0.0,1.114194,1.110701,7,2,...,nb,cut blade blade blade cut edge edge hand heave...,cut blade blade blade cut edge edge hand heave...,"[edge, metal, hand, cut, blade, heave, large]",13,7,"[edge, metal, hand, cut, metalback, blade, hea...",19,8,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834,Synset('ballet_skirt.n.01'),garm,ballet_skirt,1,0,0.0,0.947552,0.578283,4,2,...,nb,act ballet ballet ballet dance dance dance dre...,act ballet ballet ballet dance dance dance dre...,"[dance, act, long, dress, skirt, ballet, leg, ...",13,8,"[dance, educate, act, long, dress, skirt, ball...",17,10,0.0
835,Synset('mess_jacket.n.01'),garm,mess_jacket,1,0,0.0,1.158120,1.652238,11,3,...,nb,clean cloth cloth cloth cover mess mess,clean cloth cloth cloth cover mess mess animal...,"[cloth, cover, clean, mess]",7,4,"[fur, winter, hood, color, black, wear, cloth,...",34,14,0.0
836,Synset('long_johns.n.01'),garm,long_johns,1,0,0.0,1.052836,0.479149,10,1,...,nb,bend cloth cloth cloth cover cloth cloth comfo...,bend cloth cloth cloth cover cloth cloth comfo...,"[cotton, long, comfort, cloth, bend, material,...",14,7,"[cotton, long, comfort, cloth, bend, material,...",14,7,0.0
837,Synset('undies.n.01'),garm,undies,1,0,0.0,1.158120,0.280880,6,1,...,nb,cloth cloth cloth cover cloth cover comfort co...,cloth cloth cloth cover cloth cover comfort co...,"[pants, cotton, comfort, cloth, material, cove...",14,7,"[pants, cotton, comfort, cloth, material, cove...",14,7,0.0
