In [1]:
import fasttext
import fasttext.util
import gensim
from gensim.models import FastText, Word2Vec
from gensim.models.fasttext import load_facebook_model
from nltk.corpus import wordnet as wn
import nltk
import itertools

In [11]:
nltk.download("omw-1.4")
nltk.download("wordnet")

[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\JoseManuelPinto\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\omw-1.4.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\JoseManuelPinto\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
#Grado similaridad
#Numero de sinonimos en común
#Número de antónimos en común
#Relación con los sinonimos
#Relación con los antónimos
#Familia semántica (Hiperonimos)
#Número de palabras entre la pista y la evaluada(s)

In [16]:
def get_cosine_similarity(model, word1, word2):
    return model.wv.similarity(word1, word2)

def get_synonyms(word1, lang = 'spa'):
    all_synonyms = list(map(lambda x: x.lemma_names(lang), wn.synsets(word1, lang=lang)))
    return list(set(itertools.chain(*all_synonyms)))

def get_antonyms(word1, lang = 'spa'):
    all_lemas = list(map(lambda x: x.lemmas(), wn.synsets(word1, lang=lang)))
    all_antonyms_lemas = list(map(lambda x: x.antonyms(), list(itertools.chain(*all_lemas))))
    all_antonyms = map(lambda x: x.synset().lemma_names(lang=lang), list(itertools.chain(*all_antonyms_lemas)))
    return list(set(itertools.chain(*all_antonyms)))

def get_synonyms_with_similarity(word1, model, lang='spa'):
    return list(map(lambda word2: (word2, get_cosine_similarity(model, word1, word2)), get_synonyms(word1, lang=lang)))

def get_number_of_same_synonyms(word1, word2, lang = 'spa'):
    synonyms_word1 = get_synonyms(word1, lang = lang)
    synonyms_word2 = get_synonyms(word2, lang = lang)
    return len(set(synonyms_word1).intersection(synonyms_word2))

def get_number_of_same_antonyms(word1, word2, lang = 'spa'):
    antonyms_word1 = get_antonyms(word1, lang = lang)
    antonyms_word2 = get_antonyms(word2, lang = lang)
    return len(set(antonyms_word1).intersection(antonyms_word2))

def get_number_synonyms_vs_antonyms(word1, word2, lang = 'spa'):
    antonyms_word1 = get_antonyms(word1, lang = lang)
    synonyms_word1 = get_synonyms(word1, lang = lang)
    antonyms_word2 = get_antonyms(word2, lang = lang)
    synonyms_word2 = get_synonyms(word2, lang = lang)
    return len(set(synonyms_word1).intersection(antonyms_word2)) + len(set(antonyms_word1).intersection(synonyms_word2))

def get_antonyms_with_similarity(word1, model, lang='spa'):
    return list(map(lambda word2: (word2, get_cosine_similarity(model, word1, word2)), get_antonyms(word1, lang=lang)))

def get_hypernyms_of_synsets(synsets):
    hypernyms = list(map(lambda x: x.hypernyms(), synsets))
    return list(set(itertools.chain(*hypernyms)))

def get_first_common_hypernym(word1, word2, lang = 'spa'):
    

In [5]:
fasttext.util.download_model('es', if_exists='ignore')  # English

Downloading https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.es.300.bin.gz


 (1.90%) [>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]> 

 (4.05%) [==>                                                ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]=>                                                 ]=>                                                 ]=>                                                 ]=>                                                 ]=>                                                 ]=>                                                 ]=

 (7.40%) [===>                                               ]==>                                                ]==>                                                ]==>                                                ]]==>                                                ]==>                                                ]>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]==>

 (10.66%) [=====>                                             ]==>                                               ]>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]===>    











































'cc.es.300.bin'

In [6]:
%%capture
FAST_TEXT_MODEL = "cc.es.300.bin" # Model name in fasttext


ft = fasttext.load_model(FAST_TEXT_MODEL)

In [5]:
ft.get_nearest_neighbors('life')

[(0.6984261274337769, 'life-'),
 (0.6904193162918091, 'lives'),
 (0.6745699644088745, 'life.But'),
 (0.6679530143737793, 'life.It'),
 (0.6632518768310547, 'life.The'),
 (0.6621710062026978, 'life.In'),
 (0.6610385775566101, 'life.Now'),
 (0.652584969997406, 'life.So'),
 (0.6445083022117615, 'life.That'),
 (0.643845796585083, 'life--and')]

In [7]:
model = load_facebook_model(FAST_TEXT_MODEL)
#model.wv.most_similar(positive=['red', 'green'])

In [53]:
model.wv.similarity('casa', 'hogar')

0.61479205

In [67]:
a = list(map(lambda x: x.lemma_names('spa'), wn.synsets('casa', lang='spa')))
set(itertools.chain(*a))

['hogar',
 'domiciliación',
 'morada',
 'piso',
 'país',
 'residencia',
 'empresa',
 'alojamiento',
 'casa',
 'trincheras',
 'firma',
 'familia',
 'domicilio',
 'pensión',
 'vivienda',
 'habitáculo']

In [19]:
get_antonyms('bien', lang='spa')

['mal', 'maldad']

In [27]:
get_number_synonyms_vs_antonyms('bien', 'mal', lang='spa')

3

In [38]:
first = wn.synset('apple.n.01')
second = wn.synset('banana.n.01')

0.07142857142857142

In [41]:
fist = first.hypernyms()

In [55]:
fist = list(map(lambda x: x.hypernyms(),list(set(itertools(*fist)))))

TypeError: 'module' object is not callable

In [25]:
get_hypernyms_of_synsets(get_hypernyms_of_synsets(get_hypernyms_of_synsets(get_hypernyms_of_synsets(get_hypernyms_of_synsets(get_hypernyms_of_synsets(get_hypernyms_of_synsets(wn.synsets('bien', lang='spa'))))))))

[]

In [None]:
first = wn.synset('apple.n.01')
second = wn.synset('banana.n.01')