# Subjectivity Detection & Polarity Classification Test

## Subjectivity Detection Test

In [1]:
import numpy
import nltk
nltk.download('subjectivity')
from nltk.corpus import subjectivity

%run SubjectivityDetection.ipynb

sents_subj = subjectivity.sents(categories='subj')
sents_obj = subjectivity.sents(categories='obj')

multinomialNB_model = get_subj_det_trained(sents_subj, sents_obj, CountVectorizer())
multinomialNB_model_SW = get_subj_det_trained(sents_subj, sents_obj, CountVectorizer(stop_words='english'))
multinomialNB_model_NGRange = get_subj_det_trained(sents_subj, sents_obj, CountVectorizer(ngram_range=(1, 5)))
multinomialNB_model_SW_NGRange = get_subj_det_trained(sents_subj, sents_obj, CountVectorizer(stop_words='english', ngram_range=(1, 5)))

corpus = [" ".join([w for w in sent]) for sent in sents_subj] + [" ".join([w for w in sent]) for sent in sents_obj]
labels = numpy.array([0] * len(sents_subj) + [1] * len(sents_obj))

print('Cross-validation score Multinomial Naive Bayes model without stop words control:')
print(multinomialNB_model.cross_validation_score(corpus, labels, 10))
print('----------------------------------------------------------------------')

print('Cross-validation score Multinomial Naive Bayes model with stop words control:')
print(multinomialNB_model_SW.cross_validation_score(corpus, labels, 10))
print('----------------------------------------------------------------------')

print('Cross-validation score Multinomial Naive Bayes model without stop words control and ngram_range = (1,5):')
print(multinomialNB_model_NGRange.cross_validation_score(corpus, labels, 10))
print('----------------------------------------------------------------------')

print('Cross-validation score Multinomial Naive Bayes model with stop words control and ngram_range = (1,5):')
print(multinomialNB_model_SW_NGRange.cross_validation_score(corpus, labels, 10))
print('----------------------------------------------------------------------')

[nltk_data] Downloading package subjectivity to
[nltk_data]     C:\Users\Elena\AppData\Roaming\nltk_data...
[nltk_data]   Package subjectivity is already up-to-date!


Cross-validation score Multinomial Naive Bayes model without stop words control:
0.921
----------------------------------------------------------------------
Cross-validation score Multinomial Naive Bayes model with stop words control:
0.909
----------------------------------------------------------------------
Cross-validation score Multinomial Naive Bayes model without stop words control and ngram_range = (1,5):
0.924
----------------------------------------------------------------------
Cross-validation score Multinomial Naive Bayes model with stop words control and ngram_range = (1,5):
0.909
----------------------------------------------------------------------


## Polarity Classification Test

In [2]:
import pandas as pd
import numpy
import nltk
nltk.download('movie_reviews')
nltk.download('subjectivity')
from nltk.corpus import movie_reviews
from nltk.corpus import subjectivity

%run SubjectivityDetection.ipynb
%run PolarityClassification.ipynb

def prepare_row_table(neg, pos, sd_model, sd_description):
    PolClass_model, score = get_pol_class_trained(neg, pos, sd_model, CountVectorizer())
    print('Test score simple model; ' , sd_description , ': ' , score)
    PolClass_SW, score = get_pol_class_trained(neg, pos, sd_model, CountVectorizer(stop_words='english'))
    print('Test score model with stop word control; ' , sd_description , ': ' , score)
    PolClass_NGRange, score = get_pol_class_trained(neg, pos, sd_model, CountVectorizer(ngram_range=(1, 5)))
    print('Test score model with ngrams = (1,3); ' , sd_description , ': ' , score)
    PolClass_SW_NGRange, score = get_pol_class_trained(neg, pos, sd_model, CountVectorizer(stop_words='english', ngram_range=(1, 5)))
    print('Test score model with stop word and ngrams = (1,3); ' , sd_description , ': ' , score)
    print('------------------------------------------------------------------------------------------')
    
    train_neg_subj = []
    for review in neg:
        train_neg_subj.append(remove_obj_sents(review, sd_model))

    train_pos_subj = []
    for review in pos:
        train_pos_subj.append(remove_obj_sents(review, sd_model))

    corpus = [" ".join([w for w in sent]) for sent in train_pos_subj] + [" ".join([w for w in sent]) for sent in train_neg_subj]
    labelsPolarity = numpy.array([0] * len(train_pos_subj) + [1] * len(train_neg_subj))
    results = []
    results.append(PolClass_model.cross_validation_score(corpus, labelsPolarity, 10))
    results.append(PolClass_SW.cross_validation_score(corpus, labelsPolarity, 10))
    results.append(PolClass_NGRange.cross_validation_score(corpus, labelsPolarity, 10))
    results.append(PolClass_SW_NGRange.cross_validation_score(corpus, labelsPolarity, 10))
    
    return results



sents_subj = subjectivity.sents(categories='subj')
sents_obj = subjectivity.sents(categories='obj')

SD_model = get_subj_det_trained(sents_subj, sents_obj, CountVectorizer())
SD_model_SW = get_subj_det_trained(sents_subj, sents_obj, CountVectorizer(stop_words='english'))
SD_model_NGRange = get_subj_det_trained(sents_subj, sents_obj, CountVectorizer(ngram_range=(1, 5)))
SD_model_SW_NGRange = get_subj_det_trained(sents_subj, sents_obj, CountVectorizer(stop_words='english', ngram_range=(1, 5)))


neg = [x for x in movie_reviews.paras(categories='neg')]
pos = [x for x in movie_reviews.paras(categories='pos')]

tableData = []
tableData.append(prepare_row_table(neg, pos, SD_model, 'Base Subjectivity Detector'))
tableData.append(prepare_row_table(neg, pos, SD_model_SW, 'Subjectivity Detector with stop word control'))
tableData.append(prepare_row_table(neg, pos, SD_model_NGRange, 'Subjectivity Detector with ngrams = (1,5)'))
tableData.append(prepare_row_table(neg, pos, SD_model_SW_NGRange, 'Subjectivity Detector with stop word control and ngrams = (1,5)'))
    
table = pd.DataFrame(tableData)
table.columns = ['PC','PC stop words','PC ngrams = (1,5)','PC stop words & ngrams = (1,5)']
table.index = ['SD','SD stop words','SD ngrams = (1,5)','SD stop words & ngrams = (1,5)']

print('Cross-validation scores')
display(table)
    

[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\Elena\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package subjectivity to
[nltk_data]     C:\Users\Elena\AppData\Roaming\nltk_data...
[nltk_data]   Package subjectivity is already up-to-date!


Test score simple model;  Base Subjectivity Detector :  0.825
Test score model with stop word control;  Base Subjectivity Detector :  0.8075
Test score model with ngrams = (1,3);  Base Subjectivity Detector :  0.875
Test score model with stop word and ngrams = (1,3);  Base Subjectivity Detector :  0.815
------------------------------------------------------------------------------------------
Test score simple model;  Subjectivity Detector with stop word control :  0.8275
Test score model with stop word control;  Subjectivity Detector with stop word control :  0.8125
Test score model with ngrams = (1,3);  Subjectivity Detector with stop word control :  0.875
Test score model with stop word and ngrams = (1,3);  Subjectivity Detector with stop word control :  0.825
------------------------------------------------------------------------------------------
Test score simple model;  Subjectivity Detector with ngrams = (1,5) :  0.84
Test score model with stop word control;  Subjectivity Dete

Unnamed: 0,PC,PC stop words,"PC ngrams = (1,5)","PC stop words & ngrams = (1,5)"
SD,0.841,0.832,0.877,0.822
SD stop words,0.842,0.836,0.876,0.83
"SD ngrams = (1,5)",0.854,0.85,0.875,0.841
"SD stop words & ngrams = (1,5)",0.842,0.834,0.875,0.832
