### Word2Vec with Spacy

In [35]:
import spacy
import numpy as np
import pandas as pd

### Loading the Pipeline

In [3]:
nlp = spacy.load("en_core_web_lg")

### Explore Word Vectors

In [8]:
print("Vocab Size: ",nlp.vocab.vectors['car'].shape)
print("Word ID:    ",nlp.vocab.strings['car'])

Vocab Size:  (300,)
Word ID:     17545852598994811774


### Finding Similar Words

In [20]:
most_similar = nlp.vocab.vectors.most_similar(np.asarray([nlp.vocab.vectors[nlp.vocab.strings['car']]]),n=5)

In [24]:
for word in most_similar[0][0]:
    print(nlp.vocab.strings[word].center(20))

        car         
      vehicle       
       truck        
        cars        
     motorbike      


### Dealing with Sentences | Sentence Embedding

In [28]:
doc1 = nlp('I lkie cricket')
doc2 = nlp('I lkie football')
doc1.similarity(doc2)

0.9456318530472728

### OOV or Out Of Vocabulary

In [37]:
tokens = nlp("Woow the weather is great")
pd.DataFrame([(token.text, token.has_vector, token.is_oov) for token in tokens],columns=['Token','Vector','OOV'])

Unnamed: 0,Token,Vector,OOV
0,Woow,False,True
1,the,True,False
2,weather,True,False
3,is,True,False
4,great,True,False
