# Sentiment Analysis and Logistic Regression

## Installation
Required installations:
* Python 3
* Pip

## Imports

In [86]:
import math

## Task 1.1 Sentiment Analysis

Given the following short movie reviews and a document D = fast, couple, shoot,
fly. Write a python implementation (from scratch!) that compute the most likely
class for D. Assume a naive Bayes classifier and use add-1 smoothing for the likelihoods. Each review is labeled with a genre, either comedy or action.

In [87]:
class Sentiment_Naive_Bayes_Classifier:
    
    # Initialize dicitonaries and values to
    # keep track of probabilities
    def __init__(self):
        self.label_probabilities = {}
        self.word_as_label_probabilities = {}
        self.vocabulary = []
        self.total_words = 0
        self.total_reviews = 0
    
    # method that increments every occurence of
    # an entry into a given dictionary
    def add_probability(self, p, dictionary):
        if p in dictionary:
            dictionary[p] += 1
        else:
            dictionary[p] = 1
    
    def fit(self, data, labels):
        for words in data:
            self.vocabulary.extend(words)
        
        # Removes duplicates from the vocabulary
        self.vocabulary = set(self.vocabulary)
        self.total_words = len(self.vocabulary)
        self.total_reviews = len(data)
        
        for words, label in zip(data, labels):
            for word in set(words):
                self.add_probability(
                    (word, label), self.word_as_label_probabilities)
            
            self.add_probability(label, self.label_probabilities)
            
    # calculates probability that a singe word results in a label
    # with add-one smoothing
    def label_laplace_probability(self, word, label, p_label):
        word_label_frequency = (self.word_as_label_probabilities[(word, label)]
                           if (word, label) in self.word_as_label_probabilities else 0)
        return (word_label_frequency + 1) / (p_label + self.total_words)
        
    
    def predict(self, d):
        predicted_probabilities = {}
        for label in self.label_probabilities:
            probability = self.label_probabilities[label]
            p_label = probability / self.total_reviews
            for word in d:
                if word in self.vocabulary:
                    # multiplies the probability that each word from the vocabulary
                    # in the 
                    p_label *= self.label_laplace_probability(word, label, probability)
                    
            predicted_probabilities[label] = p_label
            
        predicted_label = max(predicted_probabilities, key=predicted_probabilities.get)
        
        return (predicted_label, predicted_probabilities[predicted_label])
        

In [93]:
d = "fast couple shoot fly".split()

data = [("fun couple love love".split(), "comedy"),
        ("fast furious shoot".split(), "action"),
        ("couple fly fast fun fun".split(), "comedy"),
        ("furious shoot shoot fun".split(), "action"),
        ("fly fast shoot love".split(), "action")]

snbc = Sentiment_Naive_Bayes_Classifier()
snbc.fit([review for review, label in data], [label for review, label in data])
print(snbc.predict(d))

dd = "predictable with no fun".split()

datadata = [("just plain boring".split(), "-"),
        ("entirely predictable and lacks energy".split(), "-"),
        ("no surprises and very few laughs".split(), "-"),
        ("very powerful".split(), "+"),
        ("the most fun film of the summer".split(), "+")]

snbc = Sentiment_Naive_Bayes_Classifier()
snbc.fit([review for review, label in datadata], [label for review, label in datadata])
print(snbc.predict(dd))


('action', 0.00144)
('-', 0.00019725486972959643)


## Task 1.2 Logistic Regression

* Create an implementation (from scratch!) of the Stochastic Gradient Descent Algorithm in Figure 5.5 (Page 86) of the reference book “Speech and Language Processing”.

* Note: There are some implementations online. You can use them as a reference but you should try to modify the implementation and show in your report that you understand every step. Also, if you use an implementation online as a reference include in your report which reference you used. Include in your report an example run in the style of the Example in Section 5.4.3 (Page 87) of the reference book.

In [94]:
class Logistic_Regressor:
    
    def __init__(self):
        self.w = [0]
        self.b = 0
        
    def update_weights_bias(self, new_w, new_b):
        self.w = new_w
        self.b = new_b
    
    def predict(self, x):
        product = []
        for x1, w1 in zip(x, self.w):
            product.append(x1 * w1)
        return 1 / (1 + math.exp(-(sum(product) + self.b)))
    
    def cross_entropy(self, y_pred, y):
        return -y * math.log(y_pred) + (1 - y) * math.log(y_pred)
    
    def gradient(self, x, y):
        y_pred = self.predict(x)
        loss = y_pred - y
        gradients = [x[i] * loss for i in range(len(x))]
        return gradients, loss
    
    def fit(self, data, labels, alpha):
        for x, y in zip(data, labels):
            gradients, loss = self.gradient(x, y)
            new_w = [self.w[i] - alpha * gradients[i] for i in range(len(self.w))]
            new_b = self.b - alpha + loss
            self.update_weights_bias(new_w, new_b)
            

In [95]:
# Example values
y = 1
x = [3, 2]
alpha = 0.1

lg = Logistic_Regressor()

print(lg.predict(x))

for i in range(50):
    lg.fit([x], [y], alpha)
    if (i + 1) % 10 == 0:
        print(lg.predict(x))
        print("weights:", lg.w, "bias:", lg.b)

0.5
0.15917487669809918
weights: [1.9931407442399747] bias: -7.6438024807999145
0.027101855634289837
weights: [4.74203158657605] bias: -17.8067719552535
0.003805322952055705
weights: [7.702625501856081] bias: -28.675418339520288
0.0005176348921757234
weights: [10.69716791001136] bias: -39.65722636670457
7.01029450476619e-05
weights: [13.696426941934813] bias: -50.654756473116095
