# Subjectivity Detection

In [1]:
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate

class SubjectivityDetectionModel:
    
    def __init__(self, vectorizer = CountVectorizer()):
        self.model = MultinomialNB()
        self.vectorizer = vectorizer
    
    def fit(self, corpus, labels):
        self.vectorizer.fit(corpus)
        vectors = self.vectorizer.transform(corpus)
        
        self.model.fit(vectors, labels)
        
    def predict(self, corpus):
        vectors = self.vectorizer.transform(corpus)
        return ['subj' if x == 0 else 'obj' for x in self.model.predict(vectors)]
        
    def score(self, corpus, labels):
        vectors = self.vectorizer.transform(corpus)
        return self.model.score(vectors, labels)
    
    def cross_validation_score(self, corpus, labels, k = 10):
        vectors = self.vectorizer.transform(corpus)
        scores = cross_validate(self.model, vectors, labels, cv=StratifiedKFold(n_splits=k) , scoring=['f1_micro'])
        average = sum(scores['test_f1_micro'])/len(scores['test_f1_micro'])
        return round(average, 3)

In [2]:
import numpy

def get_subj_det_trained(sents_subj, sents_obj, vectorizer = CountVectorizer()):
    corpus = [" ".join([w for w in sent]) for sent in sents_subj] + [" ".join([w for w in sent]) for sent in sents_obj]
    labels = numpy.array([0] * len(sents_subj) + [1] * len(sents_obj))
    
    model = SubjectivityDetectionModel(vectorizer)
    model.fit(corpus, labels)
    
    return model
