In [1]:
#Sentiment analysis in NLP is a technique to determine the emotional tone behind the body of the text
# 1. Rule Based Systems (Core Rules | Data Mining Based Systems)
# 2. Transformer Based Systems


# As per NLP, there exists three types of sentiment
# 1. Positive Sentiment
# 2. Negatice Sentiment
# 3. Neutral Sentiment

In [2]:
#Basic Rule Based System | Domain Based approach (Using language for understanding the sentence's sentiment)

#Step1: Create dictionaries containing positive and negative words | LEXICON

positiveWords = ["good","happy","excellent","great","positive","fortunate"]
negativeWords = ["bad","sad","poor","negative","unfortunate","terrible"]

In [3]:
#Simple Analyser function to analyse the sentiment of the given text data

def ruleBasedSimpleTextSentimentAnalyser(textData):
  #Normalization

  textData = textData.lower()

  #Initialize sentiment score

  positiveCount = 0
  negativeCount = 0

  #Tokenize the text into words
  words = textData.split()

  #Check each word with my dictionary to identify number of positive and negative words

  for word in words:
    if word in positiveWords:
      positiveCount += 1
    elif word in negativeWords:
      negativeCount += 1

  #Determine sentiment

  if positiveCount > negativeCount:
    return "Positive"
  elif negativeCount > positiveCount:
    return "Negative"
  else:
    return "Neutral"

In [4]:
ruleBasedSimpleTextSentimentAnalyser("This product is great and works perfectly")

'Positive'

In [5]:
inputData = input("Enter text: ")
ruleBasedSimpleTextSentimentAnalyser(inputData)

'Positive'

In [6]:
#VADER (Valence Aware Dictionary and sEntiment Reasoner)
#
# VADER uses rule-based approach with predefined LEXICON which maps words to their sentiment intensity scores.
# It considers the following:
# 1. Words
# 2. Exclamation points
# 3. Emphasize on Capital letter
# 4. Negation words
# 5. Level of the sentiment (Degree Modifiers) ---- extremely happy, super excited

In [7]:
#VADER Scoring System
# VADER assigns each word in a textdata a score between -4 to +4
# Positive words have score close to +4
# Negative words have score close to -4
# Neutral words have score close to 0

In [8]:
#Metrics in VADER system
# 1. Positive Score : Propotion of the text with positive sentiment
# 2. Negative Score: Proportion of text with negative sentiment
# 3. Neutral Score : Proportion of text with neutral sentiment
# 4. Compound Score : Overall SENIMENT score (range from -1(most negative) to +1(Most positive))

In [9]:
import vaderSentiment
import pandas as pd
data = pd.read_csv("C:/Users/micha/yelp.csv")

In [10]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyser = SentimentIntensityAnalyzer()

analyser.polarity_scores("Not bad at all")

{'neg': 0.0, 'neu': 0.513, 'pos': 0.487, 'compound': 0.431}

In [11]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def senimentClassifierUsingVader(textData):
  analyser = SentimentIntensityAnalyzer()
  scores = analyser.polarity_scores(textData)
  compoundScore = scores['compound']

  if compoundScore >= 0.05:
    return "Positive"
  elif compoundScore <= -0.05:
    return "Negative"
  else:
    return "Neutral"

In [12]:
data['sentiment'] = senimentClassifierUsingVader(data['text'])
data.head

KeyboardInterrupt: 

In [None]:
#https://github.com/cjhutto/vaderSentiment/tree/master

In [None]:
features = data.iloc[:,[4]].values
label = data.iloc[:,10].values

In [None]:
import string
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
def textPreprocessing(document):
  #Remove Punctuations
  processedData = ''.join([char for char in document if char not in string.punctuation])
  #Seperate words from document and normalize it
  wordsInLowerCase = [word.lower() for word in processedData.split(" ")]
  #Generate Vocab
  return [word for word in wordsInLowerCase if word not in stopwords.words('english')]

from sklearn.feature_extraction.text import CountVectorizer
wordVector = CountVectorizer(analyzer=textPreprocessing)
finalWordVectorVocab = wordVector.fit(features)
bagOfWords = finalWordVectorVocab.transform(features)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(bagOfWords,
                                                 label,
                                                 test_size=0.2,
                                                 random_state=6)
from sklearn.linear_model import LogisticRegression
modelLogisticRegression = LogisticRegression()
modelLogisticRegression.fit(X_train,y_train)
print("Train Score is {} and Test Score is {}".format(modelLogisticRegression.score(X_train,y_train), modelLogisticRegression.score(X_test,y_test)))

In [None]:
# yelp dataset (Day7 assignment folder)
# Using Vader create a new column named sentiment and find sentiment for each text/review and store the same in the sentiment column
# Create a ML model for performing sentimental analysis