In [95]:
from gensim.models import KeyedVectors

# Load vectors directly from the file
# NOTE: I only load 1m out of 300m vector to speed up the model performance
model = KeyedVectors.load_word2vec_format('data/GoogleNews-vectors-negative300.bin.gz', binary=True, limit=1000000)

In [96]:
# Access vectors for specific words with a keyed lookup:
vector = model['easy']

# see the shape of the vector (300,)
vector.shape

(300,)

In [97]:
model.similarity('doctor', 'man')

0.3144896

In [98]:
model.most_similar('man')

[('woman', 0.7664012312889099),
 ('boy', 0.6824870109558105),
 ('teenager', 0.6586930751800537),
 ('teenage_girl', 0.6147903203964233),
 ('girl', 0.5921714305877686),
 ('robber', 0.5585119128227234),
 ('Robbery_suspect', 0.5584409236907959),
 ('teen_ager', 0.5549196004867554),
 ('men', 0.5489763021469116),
 ('guy', 0.5420035123825073)]

In [172]:
model.most_similar(positive=['doctor', 'woman'], negative=['man'])

[('gynecologist', 0.7093892097473145),
 ('nurse', 0.647728681564331),
 ('doctors', 0.6471461057662964),
 ('physician', 0.64389967918396),
 ('pediatrician', 0.6249487996101379),
 ('nurse_practitioner', 0.6218312978744507),
 ('obstetrician', 0.6072014570236206),
 ('ob_gyn', 0.5986712574958801),
 ('midwife', 0.5927063226699829),
 ('dermatologist', 0.5739566683769226)]

#### Pickle the model 

In [107]:
model.save("models/model.pkl") # Stores *.pkl file

In [108]:
model = KeyedVectors.load("models/model.pkl")

#### Use requests modules to extract data

In [178]:
import requests

url = 'http://0.0.0.0:5000/similarity'

payload = {"word1": "man",
           "word2": "woman"}

r = requests.post(url, data=payload)
print(r, r.text)

<Response [200]> {
    "Prediction": "0.76640123"
}



In [179]:
import requests

url = 'http://0.0.0.0:5000/mostSimilar'

payload = {"word1": "man"}

r = requests.post(url, data=payload)
print(r, r.text)

<Response [200]> {
    "Prediction": "[('woman', 0.7664012908935547), ('boy', 0.6824870109558105), ('teenager', 0.6586930155754089), ('teenage_girl', 0.6147903800010681), ('girl', 0.5921714305877686), ('robber', 0.5585119128227234), ('Robbery_suspect', 0.5584409236907959), ('teen_ager', 0.5549196600914001), ('men', 0.5489763021469116), ('guy', 0.5420035123825073)]"
}



In [180]:
# man is to doctor as woman is to...
import requests

url = 'http://0.0.0.0:5000/wordAssociation'

payload = {"word1": "man", 
           "word2": "doctor", 
           "word3": "woman"}

r = requests.post(url, data=payload)
print(r, r.text)

<Response [200]> {
    "Prediction": "[('gynecologist', 0.7093892097473145), ('nurse', 0.647728681564331), ('doctors', 0.6471461057662964), ('physician', 0.64389967918396), ('pediatrician', 0.6249487996101379), ('nurse_practitioner', 0.6218314170837402), ('obstetrician', 0.6072014570236206), ('ob_gyn', 0.5986712574958801), ('midwife', 0.5927063226699829), ('dermatologist', 0.5739566087722778)]"
}



#### Curl command

In [None]:
curl --header "Content-Type: application/json" \
  --request POST \
  --data '{"word1": "man", "word2": "woman"}' \
  http://0.0.0.0:5000/similarity