### Multinomial Event Model

In [2]:
x = ["This was an awesome movie",
     "Great movie! I liked it a lot",
     "Happy Ending! Awesome acting by the hero",
     "Loved it! Truly great",
     "Bad. Not upto the mark",
     "Could have been better",
     "Surely a disappointing movie"]

y = [1,1,1,1,0,0,0]

In [6]:
x_test = ["I was happy & happy and I loved the acting in the movie",
          "The movie I saw was bad"]

### 1. Cleaning

In [7]:
import clean_text as ct

In [8]:
x_clean = [ct.getCleanReview(i) for i in x]
xt_clean = [ct.getCleanReview(i) for i in x_test]

In [9]:
print(x_clean)
print(xt_clean)

['awesom movi', 'great movi like lot', 'happi end awesom act hero', 'love truli great', 'bad upto mark', 'could better', 'sure disappoint movi']
['happi happi love act movi', 'movi saw bad']


### 2. Vectorization

In [10]:
from sklearn.feature_extraction.text import CountVectorizer

In [11]:
cv = CountVectorizer()

x_vec = cv.fit_transform(x_clean).toarray()
print(x_vec)

[[0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0]
 [1 1 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1]
 [0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0]]


In [13]:
print(cv.get_feature_names())

['act', 'awesom', 'bad', 'better', 'could', 'disappoint', 'end', 'great', 'happi', 'hero', 'like', 'lot', 'love', 'mark', 'movi', 'sure', 'truli', 'upto']


In [14]:
## Vectorization on the test set

xt_vec = cv.transform(xt_clean).toarray()
print(xt_vec)

[[1 0 0 0 0 0 0 0 2 0 0 0 1 0 1 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]]


### 3. Multinomial Naive Bayes

In [21]:
from sklearn.naive_bayes import MultinomialNB, BernoulliNB

In [17]:
mnb = MultinomialNB()

In [18]:
## Training
mnb.fit(x_vec,y)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [19]:
## Predictions

mnb.predict(xt_vec)

array([1, 0])

In [28]:
mnb.predict_proba(xt_vec)

array([[0.08109667, 0.91890333],
       [0.60235294, 0.39764706]])

In [29]:
mnb.score(x_vec,y)

1.0

### 4. Multivariate Bernaulli Event Model Naive Bayes

In [22]:
bnb = BernoulliNB()

In [23]:
bnb.fit(x_vec,y)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [24]:
bnb.predict_proba(xt_vec)

array([[0.07647628, 0.92352372],
       [0.68830318, 0.31169682]])

In [26]:
bnb.predict(xt_vec)

array([1, 0])

In [27]:
bnb.score(x_vec,y)

1.0