# Amazon Reviews Analyzer

In [14]:
def getEmptyPosDictionary(posWords):
    #2. Create the dictionaries with their default values
    posWordsDict = {}
    for word in posWords:
        if word not in posWordsDict:
            posWordsDict[word] = 0
    return posWordsDict

In [15]:
def getEmptyNegDictionary(negWords):
    negWordsDict = {}
    for word in negWords:
        if word not in negWordsDict:
            negWordsDict[word] = 0
            
    return negWordsDict

In [16]:
def countWords(reviewsFile,posWordsDict,negWordsDict):
    #3. Open the reviews + keep track of the pos/neg word counts in each dictionary
    reviewsFileHandler = open(reviewsFile, "r")
    allReviews = reviewsFileHandler.read()
    allWords = allReviews.split()
    for word in allWords:
        if word in posWordsDict:
            posWordsDict[word] += 1
        elif word in negWordsDict:
            negWordsDict[word] += 1
    return[posWordsDict,negWordsDict]

In [17]:
def getPosNegCount(posWordsDict,negWordsDict):
    #4. Calculate the final pos/neg word counts and return
    totalPosWords = 0
    totalNegWords = 0
    for posWord in posWordsDict:
        numTimesPosWordUsed = posWordsDict[posWord]
        if numTimesPosWordUsed > 0:
            totalPosWords += numTimesPosWordUsed
    for negWord in negWordsDict:
        numTimesNegWordUsed = negWordsDict[negWord]
        if numTimesNegWordUsed > 0:
            totalNegWords += numTimesNegWordUsed
    return[totalPosWords,totalNegWords]

In [18]:
# You can also take this a step further and put each function in its own cell!
#Just make sure that any function that is called is defined *ABOVE* (before) where it is called
def totalReviewSentiment(positiveWordsFile, negativeWordsFile, reviewsFile):
    #1. Open the files and get the words in a list
    posFileHandler = open(positiveWordsFile,"r")
    posContent = posFileHandler.read()
    posWords = posContent.splitlines()
    
    negFileHandler = open(negativeWordsFile,"r")
    negContent = negFileHandler.read()
    negWords = negContent.splitlines()
    
    #2. Create the dictionaries with their default values
    posWordsDict = getEmptyPosDictionary(posWords)
    negWordsDict = getEmptyNegDictionary(negWords)
    
    #3. Open the reviews + keep track of the pos/neg word counts in each dictionary
    [posWordsDict,negWordsDict] = countWords(reviewsFile,posWordsDict,negWordsDict)
    
    #4. Calculate the final pos/neg word counts and return
    [totalPosWords,totalNegWords] = getPosNegCount(posWordsDict,negWordsDict)
    
    print("Total positive words used: " + str(totalPosWords))
    print("Total negative words used: " + str(totalNegWords))
    
    if totalPosWords > totalNegWords:
        return("The Reviews Are Mostly Positive")
    else:
        return("The Reviews Are Mostly Negative")
    

### Step 2: What's the sentiment of a single review?
For this task, you will be creating a function that analyzes the sentiment of a single Mamazon review. This function will take in a string *review*, and calculate the sentiment for that review. It will also take in *posWordsDict* and *negWordsDict* which both represent dictionaries of each of the positive and negative words.

In order to calculate the sentiment, we are going to first calculate just the positive sentiment. This can be calculated by dividing the number of positive words over the number of total words for a review.

Example #1: If I had 3 positive words and 2 negative words in a sentence, then my **positive sentiment** would be positive words / total words --> 3/5 --> 0.6

Example #2: If I had 12 negative words and 6 positive words in a sentence, then my **positive sentiment** would be positive words / total words --> 6/18 --> 0.3333333333333333

Now that we have our positive sentiment caclulated... how do we calculate the negative sentiment?
Well, it's just the inverse of the positive sentiment!

Example #1: negative words / total words --> 2/5 --> 0.4 == 1 - 0.6(positive sentiment)

Example #2: negative words / total words --> 12/18 --> 0.66666666 == 1 - 0.3333333(positive sentiment)

How can we tell if a review is more positive or more negative?

Well, if we just calculate the positive sentiment - then we can check if that number is above 0.5 or below 0.5.

positiveSentiment > 0.5 == review that is more positive than negative
positiveSentiment < 0.5 == review that is more negative than positive

For your function, you will return a string that states whether this review was more positive or more negative. If it was more positive, print "This review has a positive sentiment". If the review is more negative, print "This review has a negative sentiment".

In [19]:
def reviewSentiment(review,posWordsDict,negWordsDict):
    #your code here
    
    posWords = 0
    negWords = 0
    
    #get all the words from the review
    reviewWordsList = review.split()
    
    #loop through the words from the review
    for word in reviewWordsList:
        #1. is the word positive or negative?
        if word in posWordsDict:
            #do something
            posWords += 1
        #2. count how many positive and negative words we are encountering
        if word in negWordsDict:
            #do something
            negWords += 1
        
    #calculate the positive sentiment / negative sentiment of the review
        #hint: you only need to calculate the positive sentiment ratio and check if it is
        #above or below 0.5 to know the answer this
    #3. how do we calculate the positive sentiment ratio?
    positiveSentiment = posWords / (posWords + negWords)
    
    #4. return a string that says whether this review is more positive or negative
    print(positiveSentiment)
    if positiveSentiment > 0.5:
        print("This review has a positive sentiment")
    elif positiveSentiment < 0.5:
        print("This review has a negative sentiment")
    else:
        print("This review was entirely neutral")

In [20]:
totalReviewSentiment("positivewords.txt","negativewords.txt","reviews.txt")

Total positive words used: 613312
Total negative words used: 344299


'The Reviews Are Mostly Positive'

In [21]:
totalReviewSentiment("positivewords.txt","negativewords.txt","reviews.txt")

Total positive words used: 613312
Total negative words used: 344299


'The Reviews Are Mostly Positive'

In [22]:
#Test the function using code that we already wrote:
posFileHandler = open("positivewords.txt","r")
posContent = posFileHandler.read()
posWords = posContent.splitlines()

negFileHandler = open("negativewords.txt","r")
negContent = negFileHandler.read()
negWords = negContent.splitlines()

posWordsDict = getEmptyPosDictionary(posWords)
negWordsDict = getEmptyNegDictionary(negWords)


In [23]:
review = "I really enjoyed this product!!!"
reviewSentiment(review,posWordsDict,negWordsDict)

1.0
This review has a positive sentiment


In [24]:
review = "I really hated this product!!!"
reviewSentiment(review,posWordsDict,negWordsDict)

0.0
This review has a negative sentiment


In [25]:
review = "I really hated and loved this product but ultimately it was okay!!!"
reviewSentiment(review,posWordsDict,negWordsDict)

0.5
This review was entirely neutral
