# TestQueryExpander

This notebook is intended to allow testing of the query expander functionalities.

In [1]:
%run includes/imports.py
%load_ext autoreload
%autoreload 2

In [2]:
from includes.w2vec import W2VecModel
from includes.stringop import StringOp
from includes.query_expander import QueryExpander

In [3]:
W2VEC = W2VecModel()
CHARGRAM = W2VecModel()
CHARGRAM.load_model('index/model_char.w2v')
W2VEC.load_model('index/model_word.w2v')

### Examples based only on similarity

In [4]:
CHARGRAM.get_extreme_similarities('positive')

[('positive', 1.0),
 ('abpositive', 0.83555370357207825),
 ('postive', 0.81306006497444039),
 ('positve', 0.78224685893363399),
 ('postitive', 0.75106754441302259),
 ('positives', 0.73920448561684071),
 ('positively', 0.70386590056292542),
 ('positivea', 0.70270108789314667),
 ('positivect', 0.69790953999502603),
 ('negative', 0.69561993363389107)]

In [5]:
W2VEC.get_extreme_similarities('huge')

[('huge', 0.99999999999999989),
 ('large', 0.63845070113921376),
 ('lg', 0.60091165089868803),
 ('lge', 0.54505257562472731),
 ('arge', 0.52603615553141903),
 ('lrg', 0.52310969492793113),
 ('enormous', 0.51714260994744266),
 ('massive', 0.49617927369726889),
 ('substantial', 0.48943049783581977),
 ('mall', 0.44649910128078202)]

### Examples based on QueryExpander

In [6]:
Expander = QueryExpander(W2VEC, CHARGRAM) 

In [15]:
# Synonyms
Expander.get_synonym("important", 0.4)

{'crucial': 0.55781850907043729, 'important': 0.99999999999999989}

In [11]:
#misspelling and variants
Expander.get_variants("positive", 5, 2)

{'abpositive': 0.83555370357207825,
 'positive': 1.0,
 'positve': 0.78224685893363399,
 'postitive': 0.75106754441302259,
 'postive': 0.81306006497444039}

In [11]:
for k,v in Expander.expand_word("small", syn_filter=False).items():
    print(k, v)

small 1
little 1
minor 1
modest 1
small-scale 1
pocket-size 1
pocket-sized 1
humble 1
low 1
lowly 1
minuscule 1
belittled 1
diminished 1
smalla 0.7746757406450243
smallto 0.774522577216758


In [12]:
for k,v in Expander.expand_word("small", 0.2).items():
    print(k, v)

small 1
little 0.27506903884402584
minor 0.2603955926489393
modest 0.13340838154436163
low 0.2344518959190348
minuscule 0.3969706115245425
diminished 0.15832813430928297
smalla 0.7746757406450243
smallto 0.774522577216758


In [13]:
for k,v in Expander.get_synonym_no_filtering('small').items():
    print(k, v)

small 1
little 1
minor 1
modest 1
small-scale 1
pocket-size 1
pocket-sized 1
humble 1
low 1
lowly 1
minuscule 1
belittled 1
diminished 1


In [14]:
for k,v in Expander.expand_word('hemorrhage', syn_threshold=0.2, var_thres_sim=30, var_thres_dis=3).items():
    print(k, v)

bleeding 0.39020930927541164
hemorrhage 1
haemorrhage 0.8255109169510516
bleed 0.5537789455329523
emorrhage 0.9043772806910582
rehemorrhage 0.8976589619911091
hemorrhaged 0.8950102063391685
hemmorrhage 0.8669769690527649
hemorhage 0.8376344115303077
hemorrhagea 0.8270885150672542
hemorrahge 0.8095592181684932
hemorrhagect 0.8025166547889467
hemorrhages 0.8021550983948165
hemorrhagemri 0.7994347620007629
hemorrhagecta 0.7988764494116896
hemorrage 0.7777886578628944
hemorrhaging 0.7726019875647785
hemmorhage 0.7497029345127292
hemmorhages 0.6350034548060906
hemorragic 0.6180878007294357
hemmorage 0.6023493466391348


In [18]:
for k,v in Expander.expand_word('positive', syn_threshold=0.2, var_thres_sim=20, var_thres_dis=-1).items():
    print(k, v)

positive 1
abpositive 0.8355537035720784
postive 0.8130600649744406
positve 0.7822468589336342
postitive 0.7510675444130227
positives 0.7392044856168408
positively 0.7038659005629259
positivea 0.7027010878931468
positivect 0.6979095399950261
positivegi 0.6929377548357832
possitive 0.6809354664334613
postiive 0.6730314596389659
postivie 0.6474585458725723
