In [54]:
from copy import deepcopy
from collections import Counter
from math import log

In [79]:
class NaiveBayes:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.vocab = None
        self.class_vocabs = None
        self.size_vocab = 0
        self.empty_prediction_dict = None
        self.class_priors = None
    
    def train(self):
        """X is list of list of words. y is the class labels corresponding to each word list"""
        self.vocab = Counter()
        self.class_priors = {classname: count/len(y) for classname, count in Counter(self.y).items()}
        self.class_vocabs = {classname:Counter() for classname in set(self.y)}
        for idx, wordlist in enumerate(self.X):
            word_counts = Counter(wordlist)
            self.class_vocabs[self.y[idx]] += word_counts
            self.vocab += word_counts
        self.size_vocab = len(self.vocab)
        self.empty_prediction_dict = {key:0 for key in self.class_vocabs.keys()}
        
    def predict(self, test_documents):
        """list of dictionaries of lists of word counts. Each requires a prediction"""
        predictions = []
        for idx, document in enumerate(test_documents):
            predictions.append(deepcopy(self.empty_prediction_dict))
            for classname in self.empty_prediction_dict.keys():
                total_class_word_count = sum(self.class_vocabs[classname].values())
                for word in document:
                    if word in self.class_vocabs[classname]:
                        word_freq = self.class_vocabs[classname][word] + 1
                    else:
                        word_freq = 1
                    predictions[idx][classname] += log(word_freq / total_class_word_count + self.size_vocab)
                predictions[idx][classname] += log(self.class_priors[classname])
        ml_predictions = [] #maximum likelihood predictions
        for prediction in predictions:
            ml_predictions.append(max(prediction, key=prediction.get))
        return ml_predictions, predictions

In [80]:
X = [['lion', 'tiger', 'jungle', 'wonderful'], ['habitat', 'lion', 'savanah'], ['jungle', 'tiger', 'lion'],
     ['liars', 'fools', 'morons', 'debate'], ['liars', 'liars', 'liars', 'suits'], ['debate', 'laws', 'fools'],
     ['stars', 'light', 'rocket', 'stars', 'lightyears'], ['rocket', 'rocket', 'wonderful', 'liars'], ['sun', 'jungle', 'stars']]
y = ['nature', 'nature', 'nature', 'politics', 'politics', 'politics', 'space', 'space', 'space']
test = [['liars', 'fools', 'green chairs', 'argue', 'morons'], ['sun', 'lightyears', 'jungle', 'rocket', 'rocket'], 
        ['jungle', 'tiger'], ['lion', 'lion', 'wonderful', 'teeth']]

In [81]:
model = NaiveBayes(X, y)
model.train()
class_predictions, all_predictions = model.predict(test)
print(class_predictions)
print()
print(all_predictions)

{'nature': 0.3333333333333333, 'politics': 0.3333333333333333, 'space': 0.3333333333333333}
['politics', 'space', 'nature', 'nature']

[{'nature': 13.096780028874962, 'politics': 13.131061732247845, 'space': 13.0967705477351}, {'nature': 13.108408066870082, 'politics': 13.094121161489783, 'space': 13.135559128163269}, {'nature': 4.602800714339357, 'politics': 4.578481091395047, 'space': 4.582460559683878}, {'nature': 10.298315971100878, 'politics': 10.255574471458205, 'space': 10.258667218384696}]


In [17]:
#counters are the way to create a sum count of names
testcounter = ['a', 'a', 'a', 'c', 'd', 'c', 'b']
othertest = ['e', 'a', 'v', 'b', 'b']
lettercount = Counter(testcounter)
secondlettercount = Counter(othertest)

In [30]:
#getting max from dictionary options
print(max(total, key=total.get))
print(max(total.keys(), key=total.get))
print(max(total.keys(), key=(lambda key: total[key])))
print(max(total.keys(), key=(lambda elephant: total[elephant])))

a
a
