# Scott Breitbach
## 03-April-2021
## DSC550, Week 4

# 4.2: Sentiment Analysis

In [1]:
import pandas as pd
# import numpy as np

## 1) Load the data file DailyComments.csv from the Week 4 Data Files into a data frame.

In [2]:
commentsDF = pd.read_csv('week-4/DailyComments.csv')

## 2) Identify a scheme to categorize each comment as positive or negative. You can devise your own scheme or find a commonly used scheme to perform this sentiment analysis. However you decide to do this, make sure to explain the scheme you decide to use.

### Manual Sentiment Coding

In [3]:
myScoring = ['Neutral', 'Positive', 'Positive', 'Neutral', 'Negative', 'Neutral', 'Positive']
commentsDF['mySentiment'] = myScoring
commentsDF

Unnamed: 0,Day of Week,comments,mySentiment
0,Monday,"Hello, how are you?",Neutral
1,Tuesday,Today is a good day!,Positive
2,Wednesday,It's my birthday so it's a really special day!,Positive
3,Thursday,Today is neither a good day or a bad day!,Neutral
4,Friday,I'm having a bad day.,Negative
5,Saturday,There' s nothing special happening today.,Neutral
6,Sunday,Today is a SUPER good day!,Positive


### Sentiment Analysis by Encoding Selected Words as Positive or Negative

In [4]:
encodingDF = commentsDF.copy()

In [5]:
def manualEncoding(df):
    df['positive1'] = df.comments.str.count('good')
    df['positive2'] = df.comments.str.count('special')
    df['negative'] = df.comments.str.count('bad')
    df['encoderScoring'] = df.positive1 + df.positive2 - df.negative
    return df

In [6]:
manualEncoding(encodingDF)

Unnamed: 0,Day of Week,comments,mySentiment,positive1,positive2,negative,encoderScoring
0,Monday,"Hello, how are you?",Neutral,0,0,0,0
1,Tuesday,Today is a good day!,Positive,1,0,0,1
2,Wednesday,It's my birthday so it's a really special day!,Positive,0,1,0,1
3,Thursday,Today is neither a good day or a bad day!,Neutral,1,0,1,0
4,Friday,I'm having a bad day.,Negative,0,0,1,-1
5,Saturday,There' s nothing special happening today.,Neutral,0,1,0,1
6,Sunday,Today is a SUPER good day!,Positive,1,0,0,1


### Sentiment Analysis using Vader

In [7]:
# import nltk
# nltk.download('vader_lexicon')

In [8]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [9]:
analyzer = SentimentIntensityAnalyzer()

In [10]:
vaderDF = commentsDF.copy()

In [11]:
def vaderScoring(df):
    df['negative'] = df['comments'].apply(lambda x: analyzer.polarity_scores(x)['neg'])
    df['neutral'] = df['comments'].apply(lambda x: analyzer.polarity_scores(x)['neu'])
    df['positive'] = df['comments'].apply(lambda x: analyzer.polarity_scores(x)['pos'])
    df['vaderScoring'] = df['comments'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
    return df

In [12]:
vaderScoring(vaderDF)

Unnamed: 0,Day of Week,comments,mySentiment,negative,neutral,positive,vaderScoring
0,Monday,"Hello, how are you?",Neutral,0.0,1.0,0.0,0.0
1,Tuesday,Today is a good day!,Positive,0.0,0.484,0.516,0.4926
2,Wednesday,It's my birthday so it's a really special day!,Positive,0.0,0.664,0.336,0.5497
3,Thursday,Today is neither a good day or a bad day!,Neutral,0.508,0.492,0.0,-0.735
4,Friday,I'm having a bad day.,Negative,0.538,0.462,0.0,-0.5423
5,Saturday,There' s nothing special happening today.,Neutral,0.361,0.639,0.0,-0.3089
6,Sunday,Today is a SUPER good day!,Positive,0.0,0.277,0.723,0.8327


### Sentiment Analysis using TextBlob

In [13]:
from textblob import TextBlob

In [14]:
textBlobDF = commentsDF.copy()

In [15]:
textBlobDF['textBlobScoring'] = textBlobDF['comments'].apply(lambda x: TextBlob(x).polarity)
textBlobDF

Unnamed: 0,Day of Week,comments,mySentiment,textBlobScoring
0,Monday,"Hello, how are you?",Neutral,0.0
1,Tuesday,Today is a good day!,Positive,0.875
2,Wednesday,It's my birthday so it's a really special day!,Positive,0.446429
3,Thursday,Today is neither a good day or a bad day!,Neutral,-0.0875
4,Friday,I'm having a bad day.,Negative,-0.7
5,Saturday,There' s nothing special happening today.,Neutral,0.357143
6,Sunday,Today is a SUPER good day!,Positive,0.604167


## 3) Implement your sentiment analysis with code and display the results. Note: DailyComments.csv is a purposely small file, so you will be able to clearly see why the results are what they are.

Inspecting the results for each Sentiment Analysis, it would appear that the +/- 0.4 should suffice to give us the most accurate results across the board.

In [16]:
import numpy as np

In [17]:
def overallSentiment(value):
    if value >= 0.4:
        return 'Positive'
    elif value <= -0.4:
        return 'Negative'
    else:
        return 'Neutral'
    
def sentimentCol(df, column):
    df['Sentiment'] = df[column].apply(lambda score: overallSentiment(score))
    return df

def accuracy(df):
    score = np.mean(np.where(df['mySentiment'] == df['Sentiment'], 1, 0))
    return score.round(2) * 100

### Manual Word Encoding Results

In [18]:
sentimentCol(encodingDF, 'encoderScoring')
print(f'Overall accuracy: {accuracy(encodingDF)}%')
encodingDF

Overall accuracy: 86.0%


Unnamed: 0,Day of Week,comments,mySentiment,positive1,positive2,negative,encoderScoring,Sentiment
0,Monday,"Hello, how are you?",Neutral,0,0,0,0,Neutral
1,Tuesday,Today is a good day!,Positive,1,0,0,1,Positive
2,Wednesday,It's my birthday so it's a really special day!,Positive,0,1,0,1,Positive
3,Thursday,Today is neither a good day or a bad day!,Neutral,1,0,1,0,Neutral
4,Friday,I'm having a bad day.,Negative,0,0,1,-1,Negative
5,Saturday,There' s nothing special happening today.,Neutral,0,1,0,1,Positive
6,Sunday,Today is a SUPER good day!,Positive,1,0,0,1,Positive


### Vader Analysis Results

In [19]:
sentimentCol(vaderDF, 'vaderScoring')
print(f'Overall accuracy: {accuracy(vaderDF)}%')
vaderDF

Overall accuracy: 86.0%


Unnamed: 0,Day of Week,comments,mySentiment,negative,neutral,positive,vaderScoring,Sentiment
0,Monday,"Hello, how are you?",Neutral,0.0,1.0,0.0,0.0,Neutral
1,Tuesday,Today is a good day!,Positive,0.0,0.484,0.516,0.4926,Positive
2,Wednesday,It's my birthday so it's a really special day!,Positive,0.0,0.664,0.336,0.5497,Positive
3,Thursday,Today is neither a good day or a bad day!,Neutral,0.508,0.492,0.0,-0.735,Negative
4,Friday,I'm having a bad day.,Negative,0.538,0.462,0.0,-0.5423,Negative
5,Saturday,There' s nothing special happening today.,Neutral,0.361,0.639,0.0,-0.3089,Neutral
6,Sunday,Today is a SUPER good day!,Positive,0.0,0.277,0.723,0.8327,Positive


### TextBlob Analysis Results

In [20]:
sentimentCol(textBlobDF, 'textBlobScoring')
print(f'Overall accuracy: {accuracy(textBlobDF)}%')
textBlobDF

Overall accuracy: 100.0%


Unnamed: 0,Day of Week,comments,mySentiment,textBlobScoring,Sentiment
0,Monday,"Hello, how are you?",Neutral,0.0,Neutral
1,Tuesday,Today is a good day!,Positive,0.875,Positive
2,Wednesday,It's my birthday so it's a really special day!,Positive,0.446429,Positive
3,Thursday,Today is neither a good day or a bad day!,Neutral,-0.0875,Neutral
4,Friday,I'm having a bad day.,Negative,-0.7,Negative
5,Saturday,There' s nothing special happening today.,Neutral,0.357143,Neutral
6,Sunday,Today is a SUPER good day!,Positive,0.604167,Positive


## 4) For up to 5% extra credit, find another set of comments, e.g., some tweets, and perform the same sentiment analysis.