# Polarity Classification

In [1]:
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate

class PolarityClassificationModel:
    
    def __init__(self, vectorizer = CountVectorizer()):
        self.model = MultinomialNB()
        self.vectorizer = vectorizer
    
    def fit(self, corpus, labels):
        self.vectorizer.fit(corpus)
        vectors = self.vectorizer.transform(corpus)
        
        self.model.fit(vectors, labels)
        
    def predict(self, corpus):
        vectors = self.vectorizer.transform(corpus)
        return ['positive' if x == 0 else 'negative' for x in self.model.predict(vectors)]
        
    def score(self, corpus, labels):
        vectors = self.vectorizer.transform(corpus)
        return self.model.score(vectors, labels)
    
    def cross_validation_score(self, corpus, labels, k = 10):
        vectors = self.vectorizer.transform(corpus)
        scores = cross_validate(self.model, vectors, labels, cv=StratifiedKFold(n_splits=k) , scoring=['f1_micro'])
        average = sum(scores['test_f1_micro'])/len(scores['test_f1_micro'])
        return round(average, 3)

In [2]:
import numpy
import random

%run SubjectivityDetection.ipynb


def remove_obj_sents(review, SubjDet_model):
    corpus = [" ".join([w for w in sent]) for sent in review]
    predictions = SubjDet_model.predict(corpus)

    cp = [(corpus[i], predictions[i]) for i in range(0, len(corpus))]
    
    return [x[0] for x in cp if x[1] == 'subj']
              

def get_pol_class_trained(neg, pos, SubjDet_model, vectorizer = CountVectorizer()):    
    train_neg = neg[:int(len(neg)*0.8)]
    train_pos = pos[:int(len(pos)*0.8)]
    test = pos[(len(train_pos)):] + neg[(len(train_neg)):]
              
    train_neg_subj = []
    for review in train_neg:
        train_neg_subj.append(remove_obj_sents(review, SubjDet_model))

    train_pos_subj = []
    for review in train_pos:
        train_pos_subj.append(remove_obj_sents(review, SubjDet_model))

    test_subj = []
    for review in test:
        test_subj.append(remove_obj_sents(review, SubjDet_model))
              
    corpus = [" ".join([w for w in sent]) for sent in train_pos_subj] + [" ".join([w for w in sent]) for sent in train_neg_subj]
    labelsPolarity = numpy.array([0] * len(train_pos_subj) + [1] * len(train_neg_subj))
    
    model = PolarityClassificationModel(vectorizer)
    model.fit(corpus, labelsPolarity)
              
    test_data = [" ".join([w for w in sent]) for sent in test_subj]
    labels_test = numpy.array([0] * (len(pos) - len(train_pos_subj)) + [1] * (len(neg) - len(train_neg_subj)))
    return model, model.score(test_data, labels_test)
              