# Sebtiment Analysis deep dive. 6 mini projects.

### Lesson: Curate a dataset

In [1]:
# Helper print function
def pretty_print_review_and_label(i):
    print(labels[i] + "\t:\t" + reviews[i][:80] + "...")
    
# Load reviews and labels
g = open('reviews.txt', 'r')
reviews = list(map(lambda x: x[:-1], g.readlines()))
g.close()

g = open('labels.txt', 'r')
labels = list(map(lambda x: x[:-1].upper(), g.readlines()))
g.close()

In [2]:
len(reviews)

25000

In [3]:
reviews[0]

'bromwell high is a cartoon comedy . it ran at the same time as some other programs about school life  such as  teachers  . my   years in the teaching profession lead me to believe that bromwell high  s satire is much closer to reality than is  teachers  . the scramble to survive financially  the insightful students who can see right through their pathetic teachers  pomp  the pettiness of the whole situation  all remind me of the schools i knew and their students . when i saw the episode in which a student repeatedly tried to burn down the school  i immediately recalled . . . . . . . . . at . . . . . . . . . . high . a classic line inspector i  m here to sack one of your teachers . student welcome to bromwell high . i expect that many adults of my age think that bromwell high is far fetched . what a pity that it isn  t   '

In [4]:
labels[0]

'POSITIVE'

### Lesson: Develop a predictive theory 

In [5]:
print("labels.txt \t : \t reviews.txt\n")
pretty_print_review_and_label(2137)
pretty_print_review_and_label(12816)
pretty_print_review_and_label(6267)
pretty_print_review_and_label(21934)
pretty_print_review_and_label(5297)
pretty_print_review_and_label(4998)

labels.txt 	 : 	 reviews.txt

NEGATIVE	:	this movie is terrible but it has some good effects .  ...
POSITIVE	:	adrian pasdar is excellent is this film . he makes a fascinating woman .  ...
NEGATIVE	:	comment this movie is impossible . is terrible  very improbable  bad interpretat...
POSITIVE	:	excellent episode movie ala pulp fiction .  days   suicides . it doesnt get more...
NEGATIVE	:	if you haven  t seen this  it  s terrible . it is pure trash . i saw this about ...
POSITIVE	:	this schiffer guy is a real genius  the movie is of excellent quality and both e...


## Project 1. Theory validation.

In [6]:
from collections import Counter
import numpy as np

In [7]:
# Create the counter objects: Positive words, negative words and all words
positive_counts = Counter()
negative_counts = Counter()
total_counts = Counter()

In [8]:
# TODO: Loop over all the words in all the reviews and increment the counts in the appropriate counter objects
for i in range(len(reviews)):
  
  if labels[i] == "POSITIVE":
    for word in reviews[i].split(' '):
      positive_counts[word] += 1
  else:
    for word in reviews[i].split(' '):
      negative_counts[word] += 1
      
  for word in reviews[i].split(' '):
    total_counts[word] += 1

In [9]:
total_counts.most_common()

[('', 1111930),
 ('the', 336713),
 ('.', 327192),
 ('and', 164107),
 ('a', 163009),
 ('of', 145864),
 ('to', 135720),
 ('is', 107328),
 ('br', 101872),
 ('it', 96352),
 ('in', 93968),
 ('i', 87623),
 ('this', 76000),
 ('that', 73245),
 ('s', 65361),
 ('was', 48208),
 ('as', 46933),
 ('for', 44343),
 ('with', 44125),
 ('movie', 44039),
 ('but', 42603),
 ('film', 40155),
 ('you', 34230),
 ('on', 34200),
 ('t', 34081),
 ('not', 30626),
 ('he', 30138),
 ('are', 29430),
 ('his', 29374),
 ('have', 27731),
 ('be', 26957),
 ('one', 26789),
 ('all', 23978),
 ('at', 23513),
 ('they', 22906),
 ('by', 22546),
 ('an', 21560),
 ('who', 21433),
 ('so', 20617),
 ('from', 20498),
 ('like', 20276),
 ('there', 18832),
 ('her', 18421),
 ('or', 18004),
 ('just', 17771),
 ('about', 17374),
 ('out', 17113),
 ('if', 16803),
 ('has', 16790),
 ('what', 16159),
 ('some', 15747),
 ('good', 15143),
 ('can', 14654),
 ('more', 14251),
 ('she', 14223),
 ('when', 14182),
 ('very', 14069),
 ('up', 13291),
 ('time', 127

In [10]:
# Most common negae words
negative_counts.most_common()

[('', 561462),
 ('.', 167538),
 ('the', 163389),
 ('a', 79321),
 ('and', 74385),
 ('of', 69009),
 ('to', 68974),
 ('br', 52637),
 ('is', 50083),
 ('it', 48327),
 ('i', 46880),
 ('in', 43753),
 ('this', 40920),
 ('that', 37615),
 ('s', 31546),
 ('was', 26291),
 ('movie', 24965),
 ('for', 21927),
 ('but', 21781),
 ('with', 20878),
 ('as', 20625),
 ('t', 20361),
 ('film', 19218),
 ('you', 17549),
 ('on', 17192),
 ('not', 16354),
 ('have', 15144),
 ('are', 14623),
 ('be', 14541),
 ('he', 13856),
 ('one', 13134),
 ('they', 13011),
 ('at', 12279),
 ('his', 12147),
 ('all', 12036),
 ('so', 11463),
 ('like', 11238),
 ('there', 10775),
 ('just', 10619),
 ('by', 10549),
 ('or', 10272),
 ('an', 10266),
 ('who', 9969),
 ('from', 9731),
 ('if', 9518),
 ('about', 9061),
 ('out', 8979),
 ('what', 8422),
 ('some', 8306),
 ('no', 8143),
 ('her', 7947),
 ('even', 7687),
 ('can', 7653),
 ('has', 7604),
 ('good', 7423),
 ('bad', 7401),
 ('would', 7036),
 ('up', 6970),
 ('only', 6781),
 ('more', 6730),
 ('

#### Positive to negative word ratios

In [11]:
pos_neg_ratios = Counter()

for word in positive_counts.keys():
  pos_neg_ratios[word] = positive_counts[word] / float(negative_counts[word] + 1)
  
pos_neg_ratios.most_common()

[('edie', 109.0),
 ('antwone', 88.0),
 ('din', 82.0),
 ('gunga', 66.0),
 ('goldsworthy', 65.0),
 ('gypo', 60.0),
 ('yokai', 60.0),
 ('paulie', 59.0),
 ('visconti', 51.0),
 ('flavia', 51.0),
 ('blandings', 48.0),
 ('kells', 48.0),
 ('brashear', 47.0),
 ('gino', 46.0),
 ('deathtrap', 45.0),
 ('harilal', 41.0),
 ('panahi', 41.0),
 ('ossessione', 39.0),
 ('tsui', 38.0),
 ('caruso', 38.0),
 ('sabu', 37.0),
 ('ahmad', 37.0),
 ('khouri', 36.0),
 ('dominick', 36.0),
 ('aweigh', 35.0),
 ('mj', 35.0),
 ('mcintire', 34.0),
 ('kriemhild', 34.0),
 ('blackie', 33.0),
 ('daisies', 33.0),
 ('newcombe', 33.0),
 ('kei', 32.0),
 ('trelkovsky', 32.0),
 ('jaffar', 31.0),
 ('hilliard', 31.0),
 ('gundam', 30.666666666666668),
 ('bathsheba', 30.0),
 ('pazu', 30.0),
 ('sheeta', 30.0),
 ('krell', 30.0),
 ('offside', 30.0),
 ('venoms', 29.0),
 ('fineman', 29.0),
 ('paine', 28.0),
 ('pimlico', 28.0),
 ('ranma', 28.0),
 ('ronny', 28.0),
 ('abhay', 27.0),
 ('iturbi', 26.5),
 ('kipling', 26.0),
 ('pym', 26.0),
 ('ga

In [12]:
pos_neg_ratios

Counter({'bromwell': 8.0,
         'high': 1.0262417994376758,
         'is': 1.1429797939461703,
         'a': 1.0550414765134515,
         'cartoon': 0.8383838383838383,
         'comedy': 1.1574750830564784,
         '.': 0.952936331242278,
         'it': 0.9937303426585002,
         'ran': 1.0427350427350428,
         'at': 0.9148208469055374,
         'the': 1.0607993145235326,
         'same': 0.94716618635927,
         'time': 1.0491143317230274,
         'as': 1.27547755260351,
         'some': 0.8957505718069099,
         'other': 1.1281932187645147,
         'programs': 1.3928571428571428,
         'about': 0.9173471639814611,
         'school': 0.8928164196123147,
         'life': 1.7279835390946503,
         '': 0.9804172314115088,
         'such': 1.0176817288801572,
         'teachers': 0.813953488372093,
         'my': 1.0784574468085106,
         'years': 1.5670454545454546,
         'in': 1.1476664990629428,
         'teaching': 0.9302325581395349,
         'profession

In [15]:
print("Pos-to-neg ratio for 'the' = {}".format(pos_neg_ratios["the"]))
print("Pos-to-neg ratio for 'amazing' = {}".format(pos_neg_ratios["amazing"]))
print("Pos-to-neg ratio for 'terrible' = {}".format(pos_neg_ratios["terrible"]))

Pos-to-neg ratio for 'the' = 1.0607993145235326
Pos-to-neg ratio for 'amazing' = 4.022813688212928
Pos-to-neg ratio for 'terrible' = 0.17744252873563218


### Project 2. Creating input/output data

In [47]:
# Create vocabulary
vocab = set(total_counts.keys())
vocab_size = len(vocab)
vocab_size

74074

In [48]:
layer_0 = np.zeros((1, vocab_size))
layer_0.shape

(1, 74074)

In [49]:
word2index = {}
for i, word in enumerate(vocab):
  word2index[word] = i

In [54]:
def update_input_layer(review):
    global layer_0
    layer_0 *= 0
    for word in review.split(' '):
        layer_0[0][word2index[word]] += 1
        
update_input_layer(reviews[0])
layer_0

array([[18.,  0.,  0., ...,  0.,  0.,  0.]])

In [44]:
layer_0[0]

array([0., 0., 0., ..., 0., 0., 0.])

In [61]:
def get_target_for_label(label):
  if label == 'POSITIVE':
    return 1
  else:
    return 0

### Project 3. Building a neural network.

In [118]:
import time
import sys
import numpy as np

class SentimentNetwork:
    def __init__(self, reviews, labels, hidden_nodes = 10, learning_rate = 0.1):
        np.random.seed(1)
        self.pre_process_data(reviews, labels)
        self.init_network(len(self.review_vocab), hidden_nodes, 1, learning_rate)

    def pre_process_data(self, reviews, labels):
        review_vocab = set()
        for review in reviews:
          for word in review.split(' '):
            review_vocab.add(word)
        self.review_vocab = list(review_vocab)
    
        label_vocab = set()
        for label in labels:
          label_vocab.add(label)
        self.label_vocab = list(label_vocab)
        
        self.review_vocab_size = len(self.review_vocab)
        self.label_vocab_size = len(self.label_vocab)
        
        self.word2index = {}
        for i, word in enumerate(self.review_vocab):
          self.word2index[word] = i
        
        self.label2index = {}
        for i, label in enumerate(self.label_vocab):
          self.word2index[label] = i
          
    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes
        self.learning_rate = learning_rate
        self.weights_0_1 = np.zeros((self.input_nodes, self.hidden_nodes))
        self.weights_1_2 = np.random.normal(0, self.output_nodes**-0.5, (self.hidden_nodes, self.output_nodes))
        self.layer_0 = np.zeros((1,input_nodes))
        
    def update_input_layer(self, review):
        self.layer_0 *= 0
        for word in review.split(' '):
            if word in self.word2index.keys():
              self.layer_0[0][self.word2index[word]] = 1
                
    def get_target_for_label(self,label):
        if label == 'POSITIVE':
            return 1
        else:
            return 0
          
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_output_2_derivative(self, output):
        return output * (1 - output)
      
    def train(self, training_reviews, training_labels):
        
        assert(len(training_reviews) == len(training_labels))
        correct_so_far = 0
        start = time.time()

        for i in range(len(training_reviews)):
            
            cur_review = training_reviews[i]
            cur_label = training_labels[i]
            
            self.update_input_layer(cur_review)
            layer_1 = self.layer_0.dot(self.weights_0_1)
            layer_2 = layer_1.dot(self.weights_1_2)
            layer_2 = self.sigmoid(layer_2)

            layer_2_error = layer_2 - self.get_target_for_label(cur_label)
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)
            
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T)
            layer_1_delta = layer_1_error
            
            self.weights_1_2 -= layer_1.T.dot(layer_2_delta) * self.learning_rate
            self.weights_0_1 -= self.layer_0.T.dot(layer_1_delta) * self.learning_rate

            if np.abs(layer_2_error) < 0.5:
              correct_so_far += 1
              
            reviews_per_second = i / float(time.time() - start)
                        
            elapsed_time = float(time.time() - start)
            reviews_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_reviews)))[:4] \
                             + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \
                             + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) \
                             + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
            if(i % 2500 == 0):
                print("")
                
    def test(self, testing_reviews, testing_labels):
      
        correct = 0
        start = time.time()

        for i in range(len(testing_reviews)):
            pred = self.run(testing_reviews[i])
            if(pred == testing_labels[i]):
                correct += 1
            
            elapsed_time = float(time.time() - start)
            reviews_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_reviews)))[:4] \
                             + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \
                             + " #Correct:" + str(correct) + " #Tested:" + str(i+1) \
                             + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
            
    def run(self, review):
        review = review.lower()
        self.update_input_layer(review.lower())
        layer_1 = self.layer_0.dot(self.weights_0_1)
        layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))
        
        if layer_2[0] > 0.5:
          return "POSITIVE"
        else:
          return "NEGATIVE"

In [119]:
mlp = SentimentNetwork(reviews[:-1000],labels[:-1000], learning_rate=0.001)
mlp.test(reviews[-1000:],labels[-1000:])

Progress:0.0% Speed(reviews/sec):0.0 #Correct:0 #Tested:1 Testing Accuracy:0.0%Progress:0.1% Speed(reviews/sec):336.9 #Correct:1 #Tested:2 Testing Accuracy:50.0%Progress:0.2% Speed(reviews/sec):504.3 #Correct:1 #Tested:3 Testing Accuracy:33.3%Progress:0.3% Speed(reviews/sec):756.5 #Correct:2 #Tested:4 Testing Accuracy:50.0%Progress:0.4% Speed(reviews/sec):805.8 #Correct:2 #Tested:5 Testing Accuracy:40.0%Progress:0.5% Speed(reviews/sec):1007. #Correct:3 #Tested:6 Testing Accuracy:50.0%Progress:0.6% Speed(reviews/sec):1006. #Correct:3 #Tested:7 Testing Accuracy:42.8%Progress:0.7% Speed(reviews/sec):1005. #Correct:4 #Tested:8 Testing Accuracy:50.0%Progress:0.8% Speed(reviews/sec):1004. #Correct:4 #Tested:9 Testing Accuracy:44.4%Progress:0.9% Speed(reviews/sec):1004. #Correct:5 #Tested:10 Testing Accuracy:50.0%Progress:1.0% Speed(reviews/sec):1115. #Correct:5 #Tested:11 Testing Accuracy:45.4%Progress:1.1% Speed(reviews/sec):1104. #Correct:6 #Tested:12 Testing Accuracy:50.0%Pro

Progress:55.5% Speed(reviews/sec):2694. #Correct:278 #Tested:556 Testing Accuracy:50.0%Progress:55.6% Speed(reviews/sec):2686. #Correct:278 #Tested:557 Testing Accuracy:49.9%Progress:55.7% Speed(reviews/sec):2691. #Correct:279 #Tested:558 Testing Accuracy:50.0%Progress:55.8% Speed(reviews/sec):2683. #Correct:279 #Tested:559 Testing Accuracy:49.9%Progress:55.9% Speed(reviews/sec):2687. #Correct:280 #Tested:560 Testing Accuracy:50.0%Progress:56.0% Speed(reviews/sec):2679. #Correct:280 #Tested:561 Testing Accuracy:49.9%Progress:56.1% Speed(reviews/sec):2684. #Correct:281 #Tested:562 Testing Accuracy:50.0%Progress:56.2% Speed(reviews/sec):2689. #Correct:281 #Tested:563 Testing Accuracy:49.9%Progress:56.3% Speed(reviews/sec):2681. #Correct:282 #Tested:564 Testing Accuracy:50.0%Progress:56.4% Speed(reviews/sec):2686. #Correct:282 #Tested:565 Testing Accuracy:49.9%Progress:56.5% Speed(reviews/sec):2690. #Correct:283 #Tested:566 Testing Accuracy:50.0%Progress:56.6% Speed(reviews/se

In [120]:
mlp.train(reviews[:-1000],labels[:-1000])

Progress:0.0% Speed(reviews/sec):0.0 #Correct:0 #Trained:1 Training Accuracy:0.0%
Progress:3.63% Speed(reviews/sec):224.5 #Correct:635 #Trained:874 Training Accuracy:72.6%

KeyboardInterrupt: 

In [108]:
def update_input_layer(review):
    
    global layer_0
    
    # clear out previous state, reset the layer to be all 0s
    layer_0 *= 0
    for word in review.split(" "):
        layer_0[0][word2index[word]] += 1

update_input_layer(reviews[0])
layer_0

array([[18.,  0.,  0., ...,  0.,  0.,  0.]])

In [109]:
review_counter = Counter()

for word in reviews[0].split(" "):
    review_counter[word] += 1
    
review_counter.most_common()

[('.', 27),
 ('', 18),
 ('the', 9),
 ('to', 6),
 ('high', 5),
 ('i', 5),
 ('bromwell', 4),
 ('is', 4),
 ('a', 4),
 ('teachers', 4),
 ('that', 4),
 ('of', 4),
 ('it', 2),
 ('at', 2),
 ('as', 2),
 ('school', 2),
 ('my', 2),
 ('in', 2),
 ('me', 2),
 ('students', 2),
 ('their', 2),
 ('student', 2),
 ('cartoon', 1),
 ('comedy', 1),
 ('ran', 1),
 ('same', 1),
 ('time', 1),
 ('some', 1),
 ('other', 1),
 ('programs', 1),
 ('about', 1),
 ('life', 1),
 ('such', 1),
 ('years', 1),
 ('teaching', 1),
 ('profession', 1),
 ('lead', 1),
 ('believe', 1),
 ('s', 1),
 ('satire', 1),
 ('much', 1),
 ('closer', 1),
 ('reality', 1),
 ('than', 1),
 ('scramble', 1),
 ('survive', 1),
 ('financially', 1),
 ('insightful', 1),
 ('who', 1),
 ('can', 1),
 ('see', 1),
 ('right', 1),
 ('through', 1),
 ('pathetic', 1),
 ('pomp', 1),
 ('pettiness', 1),
 ('whole', 1),
 ('situation', 1),
 ('all', 1),
 ('remind', 1),
 ('schools', 1),
 ('knew', 1),
 ('and', 1),
 ('when', 1),
 ('saw', 1),
 ('episode', 1),
 ('which', 1),
 ('r

In [150]:
import time
import sys
import numpy as np

class SentimentNetwork:
    def __init__(self, reviews, labels, hidden_nodes = 10, learning_rate = 0.1):
        np.random.seed(1)
        self.pre_process_data(reviews, labels)
        self.init_network(len(self.review_vocab), hidden_nodes, 1, learning_rate)

    def pre_process_data(self, reviews, labels):
        review_vocab = set()
        for review in reviews:
          for word in review.split(' '):
            review_vocab.add(word)
        self.review_vocab = list(review_vocab)
    
        label_vocab = set()
        for label in labels:
          label_vocab.add(label)
        self.label_vocab = list(label_vocab)
        
        self.review_vocab_size = len(self.review_vocab)
        self.label_vocab_size = len(self.label_vocab)
        
        self.word2index = {}
        for i, word in enumerate(self.review_vocab):
          self.word2index[word] = i
        
        self.label2index = {}
        for i, label in enumerate(self.label_vocab):
          self.word2index[label] = i
          
          
          
    def init_network(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        self.input_nodes = input_nodes
        self.hidden_nodes = hidden_nodes
        self.output_nodes = output_nodes
        self.learning_rate = learning_rate
        self.weights_0_1 = np.zeros((self.input_nodes, self.hidden_nodes))
        self.weights_1_2 = np.random.normal(0, self.output_nodes**-0.5, (self.hidden_nodes, self.output_nodes))
        self.layer_0 = np.zeros((1,input_nodes))
        
        
        
    def update_input_layer(self, review):
        self.layer_0 *= 0
        self.indices = set()
        for word in review.split(' '):
            if word in self.word2index.keys():
              self.layer_0[0][self.word2index[word]] = 1
              self.indices.add(self.word2index[word])
        self.indices = list(self.indices)
        
        
                
    def get_target_for_label(self,label):
        if label == 'POSITIVE':
            return 1
        else:
            return 0
          
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_output_2_derivative(self, output):
        return output * (1 - output)
      
    def train(self, training_reviews_raw, training_labels):
      
      
        training_reviews = list()
        for review in training_reviews_raw:
          indices = set()
          for word in review.split(' '):
            if word in self.word2index.keys():
              indices.add(self.word2index[word])
          training_reviews.append(list(indices))
        
        assert(len(training_reviews) == len(training_labels))
        correct_so_far = 0
        start = time.time()
        
        layer_1 = self.layer_0.dot(self.weights_0_1)
        
        for i in range(len(training_reviews)):
            
            cur_review = training_reviews[i]
            cur_label = training_labels[i]
            
            self.update_input_layer(cur_review)
            
            
            layer_1 += (self.weights_0_1[i])
              
            layer_2 = layer_1.dot(self.weights_1_2)
            layer_2 = self.sigmoid(layer_2)

            layer_2_error = layer_2 - self.get_target_for_label(cur_label)
            layer_2_delta = layer_2_error * self.sigmoid_output_2_derivative(layer_2)
            
            layer_1_error = layer_2_delta.dot(self.weights_1_2.T)
            layer_1_delta = layer_1_error
            
            self.weights_1_2 -= layer_1.T.dot(layer_2_delta) * self.learning_rate
            self.weights_0_1 -= self.layer_0.T.dot(layer_1_delta) * self.learning_rate

            if np.abs(layer_2_error) < 0.5:
              correct_so_far += 1
              
            reviews_per_second = i / float(time.time() - start)
                        
            elapsed_time = float(time.time() - start)
            reviews_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(training_reviews)))[:4] \
                             + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \
                             + " #Correct:" + str(correct_so_far) + " #Trained:" + str(i+1) \
                             + " Training Accuracy:" + str(correct_so_far * 100 / float(i+1))[:4] + "%")
            if(i % 2500 == 0):
                print("")
                
    def test(self, testing_reviews, testing_labels):
      
        correct = 0
        start = time.time()

        for i in range(len(testing_reviews)):
            pred = self.run(testing_reviews[i])
            if(pred == testing_labels[i]):
                correct += 1
            
            elapsed_time = float(time.time() - start)
            reviews_per_second = i / elapsed_time if elapsed_time > 0 else 0
            
            sys.stdout.write("\rProgress:" + str(100 * i/float(len(testing_reviews)))[:4] \
                             + "% Speed(reviews/sec):" + str(reviews_per_second)[0:5] \
                             + " #Correct:" + str(correct) + " #Tested:" + str(i+1) \
                             + " Testing Accuracy:" + str(correct * 100 / float(i+1))[:4] + "%")
            
    def run(self, review):
        review = review.lower()
        self.update_input_layer(review.lower())
        layer_1 = self.layer_0.dot(self.weights_0_1)
        layer_2 = self.sigmoid(layer_1.dot(self.weights_1_2))
        
        if layer_2[0] > 0.5:
          return "POSITIVE"
        else:
          return "NEGATIVE"

In [151]:
mlp = SentimentNetwork(reviews[:-1000],labels[:-1000], learning_rate=0.001)
mlp.test(reviews[-1000:],labels[-1000:])

Progress:0.0% Speed(reviews/sec):0.0 #Correct:0 #Tested:1 Testing Accuracy:0.0%Progress:0.1% Speed(reviews/sec):328.3 #Correct:1 #Tested:2 Testing Accuracy:50.0%Progress:0.2% Speed(reviews/sec):499.7 #Correct:1 #Tested:3 Testing Accuracy:33.3%Progress:0.3% Speed(reviews/sec):749.6 #Correct:2 #Tested:4 Testing Accuracy:50.0%Progress:0.4% Speed(reviews/sec):799.7 #Correct:2 #Tested:5 Testing Accuracy:40.0%Progress:0.5% Speed(reviews/sec):832.2 #Correct:3 #Tested:6 Testing Accuracy:50.0%Progress:0.6% Speed(reviews/sec):998.6 #Correct:3 #Tested:7 Testing Accuracy:42.8%Progress:0.7% Speed(reviews/sec):998.3 #Correct:4 #Tested:8 Testing Accuracy:50.0%Progress:0.8% Speed(reviews/sec):999.8 #Correct:4 #Tested:9 Testing Accuracy:44.4%Progress:0.9% Speed(reviews/sec):999.9 #Correct:5 #Tested:10 Testing Accuracy:50.0%Progress:1.0% Speed(reviews/sec):999.9 #Correct:5 #Tested:11 Testing Accuracy:45.4%Progress:1.1% Speed(reviews/sec):1099. #Correct:6 #Tested:12 Testing Accuracy:50.0%Pro

Progress:58.0% Speed(reviews/sec):2748. #Correct:290 #Tested:581 Testing Accuracy:49.9%Progress:58.1% Speed(reviews/sec):2740. #Correct:291 #Tested:582 Testing Accuracy:50.0%Progress:58.2% Speed(reviews/sec):2745. #Correct:291 #Tested:583 Testing Accuracy:49.9%Progress:58.3% Speed(reviews/sec):2749. #Correct:292 #Tested:584 Testing Accuracy:50.0%Progress:58.4% Speed(reviews/sec):2741. #Correct:292 #Tested:585 Testing Accuracy:49.9%Progress:58.5% Speed(reviews/sec):2746. #Correct:293 #Tested:586 Testing Accuracy:50.0%Progress:58.6% Speed(reviews/sec):2751. #Correct:293 #Tested:587 Testing Accuracy:49.9%Progress:58.7% Speed(reviews/sec):2755. #Correct:294 #Tested:588 Testing Accuracy:50.0%Progress:58.8% Speed(reviews/sec):2747. #Correct:294 #Tested:589 Testing Accuracy:49.9%Progress:58.9% Speed(reviews/sec):2752. #Correct:295 #Tested:590 Testing Accuracy:50.0%Progress:59.0% Speed(reviews/sec):2756. #Correct:295 #Tested:591 Testing Accuracy:49.9%Progress:59.1% Speed(reviews/se

In [152]:
mlp.train(reviews[:-1000],labels[:-1000])

Progress:0.0% Speed(reviews/sec):0.0 #Correct:0 #Trained:1 Training Accuracy:0.0%
Progress:10.4% Speed(reviews/sec):282.8 #Correct:1208 #Trained:2501 Training Accuracy:48.3%
Progress:20.8% Speed(reviews/sec):280.9 #Correct:2459 #Trained:5001 Training Accuracy:49.1%
Progress:31.2% Speed(reviews/sec):279.0 #Correct:3709 #Trained:7501 Training Accuracy:49.4%
Progress:32.9% Speed(reviews/sec):278.8 #Correct:3911 #Trained:7906 Training Accuracy:49.4%

KeyboardInterrupt: 