In [1]:
import os
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.metrics import accuracy_score, classification_report
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\16046\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [2]:
# Created an object for the sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Processed positive reviews
positive_folder = 'PositiveReviews100'
positive_reviews = []

for file in os.listdir(positive_folder)[:100]:
    with open(os.path.join(positive_folder, file), 'r', encoding='utf-8') as f:
        review = f.read()
        positive_reviews.append(review)

# Processed negative reviews
negative_folder = 'NegativeReviews100'
negative_reviews = []

for file in os.listdir(negative_folder)[:100]:
    with open(os.path.join(negative_folder, file), 'r', encoding='utf-8') as f:
        review = f.read()
        negative_reviews.append(review)

# Tested positive reviews
positive_sentiments = []

for review in positive_reviews:
    sentiment = sia.polarity_scores(review)
    if sentiment['compound'] > 0.5:
        positive_sentiments.append('positive')
    elif sentiment['compound'] < -0.5:
        positive_sentiments.append('negative')
    else:
        positive_sentiments.append('neutral')

# Tested negative reviews
negative_sentiments = []

for review in negative_reviews:
    sentiment = sia.polarity_scores(review)
    if sentiment['compound'] > 0.5:
        negative_sentiments.append('positive')
    elif sentiment['compound'] < -0.5:
        negative_sentiments.append('negative')
    else:
        negative_sentiments.append('neutral')

# Combined positive and negative reviews and sentiments
reviews = positive_reviews + negative_reviews
sentiments = positive_sentiments + negative_sentiments
true_sentiments = ['positive']*100 + ['negative']*100

# Computed accuracy score
accuracy = accuracy_score(true_sentiments, sentiments)
print('Accuracy:', round(accuracy * 100, 2), '%')

# Printed classification report
print(classification_report(true_sentiments, sentiments, zero_division=1))


Accuracy: 61.5 %
              precision    recall  f1-score   support

    negative       0.69      0.44      0.54       100
     neutral       0.00      1.00      0.00         0
    positive       0.60      0.79      0.68       100

    accuracy                           0.61       200
   macro avg       0.43      0.74      0.41       200
weighted avg       0.64      0.61      0.61       200



In [3]:
#Created an object for the first misclassified review
ReviewA = negative_reviews[17]

In [4]:
ReviewA

'so what do you get when you mix together plot elements from various successful sci-fi films such as close encounters of the third kind , 2001 : a space odyssey , apollo 13 and contact ? \nwell , whatever it is , you\'d sure as hell hope that it would be a thousand times better than this shoddy attempt at such a melange , considering the disastrous results we\'re left with here . \nthis is a film that takes a little bit of everything , but ultimately adds up to a lot of nothing ! \nit\'s like i said . . . this \nmovie sucks . \nplot : a rescue crew of astronauts is sent down to mars in the year 2020 , after an unknown energy force leads to a loss of contact with the previous gang of space aviators to visit the red planet . \ncritique : extremely underwhelming is the best way to describe this movie . \nuneven , would be another . \nthe trailer for this movie actually showed some promise , the buzz around it had been so-so , and even the film itself starts off with a decent first twenty 

In [5]:
#Analyzed the sentiment of the first misclassified review
SentimentA = sia.polarity_scores(ReviewA)

In [6]:
#Compound is extremely positive even though the review is negative
SentimentA

{'neg': 0.107, 'neu': 0.753, 'pos': 0.14, 'compound': 0.9807}

In [7]:
#Created an object for the second misclassified review
ReviewB = positive_reviews[9]

In [8]:
ReviewB

"the american action film has been slowly drowning to death in a sea of asian wire-fu copycats . \nit's not a pretty death , and it's leaving the likes of schwartznager , stallone , and van damme wearing cement galoshes at the bottom of a kung fu sea . \nsometimes , the mix results in a mind-blowing spectacle unlike any other . \nquality action with amazing and exciting stunt work , as in 1999's the matrix , can be a real gem . \nbut too often hollywood gets it wrong , even when they pay off chinese directors . \nflying ninjas and floating karate masters have been replaced by soaring bronx detectives and slow motion kicking scientists . \nmostly it's laughable . \nin hollywood's rush to emulate the success of the matrix , trademark asian stunt choreography has become more of a joke than an art form . \nbut iron monkey , the latest asian import , shows us how to get it right . \niron monkey ( actually a reissue of a 1993 film ) is the story of a 19th chinese vigilante ( rongguang yu ) ,

In [9]:
#Analyzed the sentiment of the second misclassified review
SentimentB = sia.polarity_scores(ReviewB)

In [14]:
#Compound is extremely negative even though the review is positive
SentimentB

{'neg': 0.161, 'neu': 0.729, 'pos': 0.11, 'compound': -0.9813}

In [10]:
#Created an object for the third misclassified review
ReviewC = positive_reviews[13]

In [11]:
ReviewC

"synopsis : in this movie , steven spielberg , one of today's finest directors , attempts to spice up the 1800s story of a long courtroom battle over the fate of prisoner cinque ( djimon hounsou ) - - a young angry man from sierra leone who was kidnapped into slavery - - and his fellow prisoners . \ncinque and friends have landed a ship on the shores of america after escaping spanish slave traders , but since the americans don't speak cinque's language , the black men are hauled into court to determine whether or not they are legally slaves . \ntechnically , since the international slave trade was outlawed at that time , people like cinque couldn't be kidnapped into slavery ; one had to be born a slave to be legally considered a slave . \nlawyers baldwin and adams ( matthew mcconaughey and anthony hopkins ) must prove cinque and the others were captured into slavery , rather than born slaves , in order to get them out of prison as free men . \nthree lengthy court cases are portrayed wi

In [12]:
#Analyzed the sentiment of the third misclassified review
SentimentC = sia.polarity_scores(ReviewC)

In [13]:
#Compound is extremely negative even though the review is positive
SentimentC

{'neg': 0.13, 'neu': 0.779, 'pos': 0.091, 'compound': -0.9159}