### Multinomial Event Model

In [18]:
x = ["This was awesome an awesome movie",
     "Great movie! I liked it a lot",
     "Happy Ending! awesome acting by the hero",
     "loved it! truly great",
     "bad not upto the mark",
     "could have been better",
     "Surely a Disappointing movie"]

y = [1,1,1,1,0,0,0] # 1 - Positive, 0 - Negative Class

In [84]:
x_test = ["I was happy & happy and I loved the acting in the movie",
          "The movie I saw was bad"]

### 1. Cleaning

In [85]:
import clean_text as ct #importing the code that we have already written

In [86]:
x_clean = [ct.getCleanReview(i) for i in x] #this is called List Comprehension in python
xt_clean = [ct.getCleanReview(i) for i in x_test]



In [87]:
print(x_clean)
print(xt_clean)

['awesom awesom movi', 'great movi like lot', 'happi end awesom act hero', 'love truli great', 'bad upto mark', 'could better', 'sure disappoint movi']
['happi happi love act movi', 'movi saw bad']


### 2.Vectorization

In [88]:
from sklearn.feature_extraction.text import CountVectorizer

In [89]:
cv = CountVectorizer(ngram_range=(1,2))

x_vec = cv.fit_transform(x_clean).toarray() #fit method trains our model
print(x_vec)
print(x_vec.shape) #34 features

[[0 0 2 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0]
 [1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0]
 [0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1]
 [0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0]]
(7, 34)


In [90]:
print(cv.get_feature_names())

['act', 'act hero', 'awesom', 'awesom act', 'awesom awesom', 'awesom movi', 'bad', 'bad upto', 'better', 'could', 'could better', 'disappoint', 'disappoint movi', 'end', 'end awesom', 'great', 'great movi', 'happi', 'happi end', 'hero', 'like', 'like lot', 'lot', 'love', 'love truli', 'mark', 'movi', 'movi like', 'sure', 'sure disappoint', 'truli', 'truli great', 'upto', 'upto mark']


In [91]:
## Vectorization on the test set
xt_vec = cv.transform(xt_clean).toarray() #fit transform should be called only on train data not on test here only call transform data
print(xt_vec)
cv.get_feature_names()
print(xt_vec.shape)

[[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]]
(2, 34)


### 3. Multinomial Naive Bayes

In [92]:
#creating our model and training it
from sklearn.naive_bayes import MultinomialNB,BernoulliNB, GaussianNB

In [93]:
mnb = MultinomialNB() #creating object of the function
print(mnb)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)


In [94]:
# Training the data
mnb.fit(x_vec,y)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [96]:
#Predictions on test data
mnb.predict(xt_vec) #it says first review is positive and second is negative(0)

array([1, 0])

In [107]:
mnb.predict_proba(xt_vec) #it will calculate the prosterior probability

array([[0.09580319, 0.90419681],
       [0.61972801, 0.38027199]])

In [108]:
mnb.score(x_vec,y)

1.0

### 4. Multivariate Bernoulli Event Model Naive Bayes


In [99]:
bnb = BernoulliNB(binarize=0.0) #binarize is the threshhold anything greater than 0 will be treated as 1 and less than or equal to zero will be converted into 0

In [100]:
print(bnb)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)


In [101]:
bnb.fit(x_vec,y)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [102]:
bnb.predict_proba(xt_vec) #it is not worried abt the freq of the term but it just focus on the occurence of the feature


array([[0.10638608, 0.89361392],
       [0.76046221, 0.23953779]])

In [104]:
bnb.predict(xt_vec)

array([1, 0])

In [105]:
bnb.score(x_vec,y) #to find the accuracy

1.0