# Sentiment Analysis base line model

### Import dependancies

In [1]:
import os
import time

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

### Build training dataset and testing dataset

In [2]:
data_dir = "/home/jalaj/PycharmProjects/Sentiment_Analysis/data"
classes = ['positiveReviews', 'negativeReviews']

# Read the data
train_data = []
train_labels = []
test_data = []
test_labels = []
for curr_class in classes:
    dirname = os.path.join(data_dir, curr_class)
    for fname in os.listdir(dirname):
        with open(os.path.join(dirname, fname), 'r') as f:
            content = f.read()
            if fname.startswith('12'):
                test_data.append(content)
                test_labels.append(curr_class)
            else:
                train_data.append(content)
                train_labels.append(curr_class)
                


### Geberate feature vector by using TfidfVectorizer

In [3]:
# Create feature vectors
vectorizer = TfidfVectorizer(min_df=5,
                                 max_df = 0.8,
                                 sublinear_tf=True,
                                 use_idf=True)
train_vectors = vectorizer.fit_transform(train_data)
test_vectors = vectorizer.transform(test_data)


### Perfrom training using different ML algos

In [4]:
# Perform classification with MultinomialNB
clf = MultinomialNB()
clf.fit(train_vectors, train_labels)
prediction = clf.predict(test_vectors)

# Perform classification with SVM, kernel=rbf
classifier_rbf = svm.SVC()
classifier_rbf.fit(train_vectors, train_labels)
prediction_rbf = classifier_rbf.predict(test_vectors)
    
# Perform classification with SVM, kernel=linear
classifier_linear = svm.SVC(kernel='linear')
classifier_linear.fit(train_vectors, train_labels)
prediction_linear = classifier_linear.predict(test_vectors)
  
# Perform classification with SVM, kernel=linear
classifier_liblinear = svm.LinearSVC()
classifier_liblinear.fit(train_vectors, train_labels)
prediction_liblinear = classifier_liblinear.predict(test_vectors)

### Test the result of MultinomialNB

In [5]:
# Print results in a nice table for MultinomialNB
print("\nResults for NaiveBayes (MultinomialNB) ")
print(classification_report(test_labels, prediction))
print "\nAccuracy score of Multinomial naive bayes algorithm -----> " + str(accuracy_score(test_labels, prediction))


print "\n\n\n"

print "Reviews Prediction"
print "\nPredicted label is------> "+prediction[10] 
print "\nMovie Review is ------> \n"+test_data[10]


Results for NaiveBayes (MultinomialNB) 
                 precision    recall  f1-score   support

negativeReviews       0.79      0.87      0.82       611
positiveReviews       0.85      0.77      0.81       611

    avg / total       0.82      0.82      0.82      1222


Accuracy score of Multinomial naive bayes algorithm -----> 0.815875613748




Reviews Prediction

Predicted label is------> positiveReviews

Movie Review is ------> 
- After their sons are sentenced to life in prison, Adelle (Debbie Reynolds) and Helen (Shirley Winters) begin receiving threatening phone calls because someone fells their sons got off easy. The pair decides to move to California to escape the publicity of the trial and to start a new life. They start a dance school that is soon very successful. One of the students has a rich unmarried father with whom Adelle quickly falls in love. In the meantime, Helen is busy raising rabbits and becoming a little too infatuated with an evangelist on the radio. It's on

### Test the result of SVM with rbf kernal

In [6]:
# Print results in a nice table for SVM algorithm with rbf kernal
print("\nResults for SVM algorithm with rbf kernel")
print(classification_report(test_labels, prediction_rbf))
print "\nAccuracy score of SVM algorithm with rbf kernel-----> " + str(accuracy_score(test_labels, prediction_rbf))


print "\n\n\n"

print "Reviews Prediction"
print "\nPredicted label is------> "+prediction[10] 
print "\nMovie Review is ------> \n"+test_data[10]


Results for SVM algorithm with rbf kernel
                 precision    recall  f1-score   support

negativeReviews       0.98      0.31      0.48       611
positiveReviews       0.59      1.00      0.74       611

    avg / total       0.79      0.65      0.61      1222


Accuracy score of SVM algorithm with rbf kernel-----> 0.654664484452




Reviews Prediction

Predicted label is------> positiveReviews

Movie Review is ------> 
- After their sons are sentenced to life in prison, Adelle (Debbie Reynolds) and Helen (Shirley Winters) begin receiving threatening phone calls because someone fells their sons got off easy. The pair decides to move to California to escape the publicity of the trial and to start a new life. They start a dance school that is soon very successful. One of the students has a rich unmarried father with whom Adelle quickly falls in love. In the meantime, Helen is busy raising rabbits and becoming a little too infatuated with an evangelist on the radio. It's only 

### Test the result of SVM with linear kernal

In [7]:
# Print results in a nice table for SVM algorithm with linear kernal
print("\nResults for SVM algorithm with linear kernel")
print(classification_report(test_labels, prediction_linear))
print "\nAccuracy score of SVM algorithm with linear kernal-----> " + str(accuracy_score(test_labels, prediction_linear))

print "\n\n\n"

print "Reviews Prediction"
print "\nPredicted label is------> "+prediction[10] 
print "\nMovie Review is ------> \n"+test_data[10]


Results for SVM algorithm with linear kernel
                 precision    recall  f1-score   support

negativeReviews       0.82      0.86      0.84       611
positiveReviews       0.85      0.81      0.83       611

    avg / total       0.84      0.84      0.84      1222


Accuracy score of SVM algorithm with linear kernal-----> 0.836333878887




Reviews Prediction

Predicted label is------> positiveReviews

Movie Review is ------> 
- After their sons are sentenced to life in prison, Adelle (Debbie Reynolds) and Helen (Shirley Winters) begin receiving threatening phone calls because someone fells their sons got off easy. The pair decides to move to California to escape the publicity of the trial and to start a new life. They start a dance school that is soon very successful. One of the students has a rich unmarried father with whom Adelle quickly falls in love. In the meantime, Helen is busy raising rabbits and becoming a little too infatuated with an evangelist on the radio. It's

### Test the result of SVM with linearSVC

In [8]:
# Print results in a nice table for SVM algorithm with liblinear kernal
print("\nResults for SVM algorithm with liblinear kernel")
print(classification_report(test_labels, prediction_liblinear))
print "\nAccuracy score of SVM algorithm with liblinear kernal-----> " + str(accuracy_score(test_labels, prediction_liblinear))


print "\n\n\n"

print "Reviews Prediction"
print "\nPredicted label is------> "+prediction[10] 
print "\nMovie Review is ------> \n"+test_data[10]


Results for SVM algorithm with liblinear kernel
                 precision    recall  f1-score   support

negativeReviews       0.82      0.86      0.84       611
positiveReviews       0.85      0.81      0.83       611

    avg / total       0.84      0.84      0.84      1222


Accuracy score of SVM algorithm with liblinear kernal-----> 0.836333878887




Reviews Prediction

Predicted label is------> positiveReviews

Movie Review is ------> 
- After their sons are sentenced to life in prison, Adelle (Debbie Reynolds) and Helen (Shirley Winters) begin receiving threatening phone calls because someone fells their sons got off easy. The pair decides to move to California to escape the publicity of the trial and to start a new life. They start a dance school that is soon very successful. One of the students has a rich unmarried father with whom Adelle quickly falls in love. In the meantime, Helen is busy raising rabbits and becoming a little too infatuated with an evangelist on the radio