### Multinomial Event Model

In [21]:
X = ["This was an awesome awesome movie",
     "Great movie! I linked it a lot",
     "Happy Ending! awesome acting by the hero",
     "Loved it! truly great",
     "bad not upto the mark",
     "could have been better",
     "Surely a Disappointing movie"]
Y = [1, 1, 1, 1, 0, 0, 0] # 1 - Positive and 0 - Negative

In [22]:
test_x = ["I was happy & happy I loved the acting in the movie",
          "the movie I saw was bad"]

### 1.Cleaning

In [23]:
import clean_review as cr

In [24]:
X_clean = [cr.getStemmedReview(i) for i in X] # List Comprehension
X_test_clean = [cr.getStemmedReview(i) for i in test_x]

In [25]:
X_clean, X_test_clean

(['awesom awesom movi',
  'great movi link lot',
  'happi end awesom act hero',
  'love truli great',
  'bad upto mark',
  'could better',
  'sure disappoint movi'],
 ['happi happi love act movi', 'movi saw bad'])

### 2.Vectorization

In [26]:
from sklearn.feature_extraction.text import CountVectorizer

In [27]:
cv = CountVectorizer()

In [28]:
x_vec = cv.fit_transform(X_clean).toarray()

In [29]:
x_vec

array([[0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0],
       [1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
       [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]],
      dtype=int64)

In [30]:
cv.get_feature_names()

['act',
 'awesom',
 'bad',
 'better',
 'could',
 'disappoint',
 'end',
 'great',
 'happi',
 'hero',
 'link',
 'lot',
 'love',
 'mark',
 'movi',
 'sure',
 'truli',
 'upto']

In [31]:
### Vectorization on test set
xt_vec = cv.transform(X_test_clean).toarray()
print(xt_vec)

[[1 0 0 0 0 0 0 0 2 0 0 0 1 0 1 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]]


### 3. Multinomial Naive Bayes

In [32]:
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB

In [33]:
mnb = MultinomialNB()
print(mnb)

MultinomialNB()


In [35]:
# Training
mnb.fit(x_vec, Y)

In [38]:
mnb.predict(xt_vec)

array([1, 0])

In [39]:
mnb.predict_proba(xt_vec)

array([[0.09332629, 0.90667371],
       [0.61699717, 0.38300283]])

### 4. Multivariate Bernoulli Event Model Naive Bayes

In [41]:
bnb = BernoulliNB()

In [45]:
bnb.fit(x_vec, Y)

In [46]:
bnb.predict_proba(xt_vec)

array([[0.07647628, 0.92352372],
       [0.68830318, 0.31169682]])

In [47]:
bnb.predict(xt_vec)

array([1, 0])

In [48]:
bnb.score(x_vec, Y)

1.0