In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from textblob import TextBlob

In [None]:
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 

In [None]:
dataset=pd.read_csv('IMDB Dataset.csv')
dataset.head()

In [None]:
dataset = dataset.rename(columns = {'review':'text'})
X = dataset.iloc[:-49000,:]
X

In [None]:
import re
import nltk

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []
for i in range(0, 1000):
    review = X['text'][i]
    review = re.sub(r'#[a-zA-Z0-9]+'," ", review)
    review = re.sub(r'@[a-zA-Z0-9]+', ' ', review)
    review = re.sub(r'&[a-zA-Z0-9]+', ' ', review)
    review = re.sub('[0-9]+', ' ', review)
    review = re.sub(r'RT[\s]+', ' ', review)
    review = re.sub(r"https?:\/\/\S+|www\S+|", "", review)
    review = re.sub(r'\\[a-zA-Z0-9]+', ' ', review)
    review = re.sub(r'^[a-zA-Z0-9]+'," ", review)
    review = re.sub(r'[^a-zA-Z0-9]+'," ", review)
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

In [None]:
corpus[0]

In [None]:
X['cleaned'] = corpus
X

In [None]:
import nltk
nltk.download('averaged_perceptron_tagger')

In [None]:
import nltk
nltk.download('wordnet')

In [None]:
import nltk
nltk.download('sentiwordnet')

In [None]:
import nltk
import ssl
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

In [None]:
lemmatizer = WordNetLemmatizer()
pos=neg=obj=count=0

postagging = []

for review in corpus:
    list = word_tokenize(review)
    postagging.append(nltk.pos_tag(list))

X['pos_tags'] = postagging

def penn_to_wn(tag):
    if tag.startswith('J'):
        return wn.ADJ
    elif tag.startswith('N'):
        return wn.NOUN
    elif tag.startswith('R'):
        return wn.ADV
    elif tag.startswith('V'):
        return wn.VERB
    return None


# Returns list of pos-neg and objective score. But returns empty list if not present in senti wordnet.
def get_sentiment(word,tag):
    wn_tag = penn_to_wn(tag)
    
    if wn_tag not in (wn.NOUN, wn.ADJ, wn.ADV):
        return []

    #Lemmatization
    lemma = lemmatizer.lemmatize(word, pos=wn_tag)
    if not lemma:
        return []

    #Synset is a special kind of a simple interface that is present in NLTK to look up words in WordNet. 
    #Synset instances are the groupings of synonymous words that express the same concept. 
    #Some of the words have only one Synset and some have several.
    synsets = wn.synsets(word, pos=wn_tag)
    if not synsets:
        return []

    # Take the first sense, the most common
    synset = synsets[0]
    swn_synset = swn.senti_synset(synset.name())

    return [synset.name(), swn_synset.pos_score(),swn_synset.neg_score(),swn_synset.obj_score()]

    pos=neg=obj=count=0
    
    ###################################################################################
senti_score = []

for pos_val in X['pos_tags']:
    senti_val = [get_sentiment(x,y) for (x,y) in pos_val]
    for score in senti_val:
        try:
            pos = pos + score[1]  #positive score is stored at 2nd position
            neg = neg + score[2]  #negative score is stored at 3rd position
        except:
            continue
    senti_score.append(pos - neg)
    pos=neg=0    
    
X['senti_score'] = senti_score


X.head

In [None]:
overall=[]
for i in range(len(X)):
    if X['senti_score'][i]>= 0:
        overall.append('Positive')
    elif X['senti_score'][i]<0:
        overall.append('Negative')
    
X['Overall Sentiment']=overall

In [None]:
X.head()

In [None]:
plt.title("Sentiment Analysis Using SentiWordnet")
plt.xlabel("Sentiment")
plt.ylabel("Counts")
X['Overall Sentiment'].value_counts().plot(kind="bar")
plt.show()

In [None]:
def subjectivity(review):
    return TextBlob(review).sentiment.subjectivity
def polarity(review):
    return TextBlob(review).sentiment.polarity
Polarity=[]
Subjectivity=[]
for i in range(0,len( X)):
    Polarity.append(polarity(corpus[i]))
    Subjectivity.append(subjectivity(corpus[i]))

Polarity = np.array(Polarity)
Subjectivity = np.array(Subjectivity)

In [None]:
Sentiment=[]
for i in range(0,len(X)):
    if Polarity[i]>= 0:
        Sentiment.append('Positive')
    elif Polarity[i]< 0:
        Sentiment.append('Negative')
    
X["Senti"]=Sentiment
X.head()

In [None]:
plt.title("Sentiment Analysis Using TextBlob")
plt.xlabel("Sentiment")
plt.ylabel("Counts")
X['Senti'].value_counts().plot(kind="bar")
plt.show()

In [None]:
import nltk
nltk.download('vader_lexicon')

In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
vader = SentimentIntensityAnalyzer()
cader=[]
for i in range(0,len( X)):
    cader.append(vader.polarity_scores(corpus[i]))



X["Vader_Sentiment"] = cader
cader





In [None]:

Vader_Senti = []

for i in range(0,len( X)):
    if cader[i]['compound']>= 0 :
        Vader_Senti.append("Positive")
    elif cader[i]['compound']<0 :
        Vader_Senti.append("Negative")
    
        
X["Vader Sentiment"] = Vader_Senti
X


In [None]:
plt.title("Sentiment Analysis Using VADER")
plt.xlabel("Sentiment")
plt.ylabel("Counts")
X["Vader Sentiment"].value_counts().plot(kind="bar")
plt.show()

In [None]:
X.head()

In [None]:
plt.title("Original")
plt.xlabel("Sentiment")
plt.ylabel("Counts")
X["sentiment"].value_counts().plot(kind="bar")
plt.show()

In [None]:
for i in range(0,1000):
    if X['sentiment'][i] == 'positive':
        X['sentiment'][i]=1
    elif X['sentiment'][i] == 'negative':
        X['sentiment'][i]=0

In [None]:
for i in range(0,1000):
    if X['Overall Sentiment'][i] == 'Positive':
        X['Overall Sentiment'][i]=1
    elif X['Overall Sentiment'][i] == 'Negative':
        X['Overall Sentiment'][i]=0
        
for i in range(0,1000):
    if X['Senti'][i] == 'Positive':
        X['Senti'][i]=1
    elif X['Senti'][i] == 'Negative':
        X['Senti'][i]=0
        
for i in range(0,1000):
    if X['Vader Sentiment'][i] == 'Positive':
        X['Vader Sentiment'][i]=1
    elif X['Vader Sentiment'][i] == 'Negative':
        X['Vader Sentiment'][i]=0

In [None]:
X

In [None]:
Ori=[]
TxtB =[]
SentiWrd = []
Vad = []

Ori = X['sentiment'].astype(float)
TxtB = X['Senti'].astype(float)
SentiWrd = X['Overall Sentiment'].astype(float)
Vad = X['Vader Sentiment'].astype(float)

from sklearn.metrics import classification_report
print("              TextBlob Classifiaction Report")
print(classification_report(Ori,TxtB))
print("              SentiWordNet Classifiaction Report")
print(classification_report(Ori,SentiWrd))
print("              Vader Classifiaction Report")
print(classification_report(Ori,Vad))

In [None]:
Ori

In [None]:
Vad