## Working with Word Vectors and Sentiment Analysis!

In [None]:
import spacy
nlp = spacy.load("en_core_web_lg")

In [2]:
king = nlp.vocab["king"].vector
man = nlp.vocab["man"].vector
woman = nlp.vocab["woman"].vector


#### Using cosine similarity to find distance between two word vectors

In [4]:
from scipy import spatial
cosine_similarity = lambda vec1, vec2: 1-spatial.distance.cosine(vec1, vec2)

In [5]:
new_vector = king - man + woman

In [6]:
similarities = []

for word in nlp.vocab:
    if(word.has_vector and word.is_lower and word.is_alpha):
        similarities.append((word, cosine_similarity(word.vector, new_vector)))

similarities = sorted(similarities, key = lambda item: -item[1])

print([similarities[i][0].text for i in range(0, 10)])


['king', 'queen', 'prince', 'kings', 'princess', 'royal', 'throne', 'queens', 'monarch', 'kingdom']


#### Performs a-b+c arithmetic, and returns a top-ten result with words

In [7]:
def vector_math(a,b,c):
    
    vec1 = nlp(a).vector
    vec2 = nlp(b).vector
    vec3 = nlp(c).vector
    
    new_vector = vec1-vec2+vec3
    similarities = []
    for word in nlp.vocab:
        if(word.has_vector and word.is_lower and word.is_alpha):
            similarities.append((word, cosine_similarity(word.vector, new_vector)))
                                
    similarities = sorted(similarities, key = lambda x: -x[1])
    print([similarities[i][0].text for i in range(0, 10)])
   

In [18]:
vector_math('king','man','woman')

['king', 'queen', 'prince', 'kings', 'princess', 'royal', 'throne', 'queens', 'monarch', 'kingdom']


#### Returns a set of SentimentIntensityAnalyzer polarity scores based on any review that we write

In [20]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/adarshpachori/nltk_data...


In [21]:
review = 'I am so happy playing on my nintendo DS. YAYYYY!!'

In [22]:
sid.polarity_scores(review)

{'neg': 0.0, 'neu': 0.513, 'pos': 0.487, 'compound': 0.7687}

#### Categorizing the polarity score into positive, neutral, and negative

In [25]:
def review_rating(string):
    score = sid.polarity_scores(string)['compound']
    if( score == 0):
        return "Neutral"
    if(score > 0):
        return "Positive"
    if(score < 0):
        return "Negative"

In [26]:
review_rating(review)

'Positive'