### Sentiment analysis for movie review

In [2]:
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
#  Download necessary NLTK data
# Ensures required datasets for tokenization,stop-word removal,and sentiment analysis
nltk.download("punkt_tab")
nltk.download("stopwords")
nltk.download("vader_lexicon")

# Raw review text
review_text = "The movie was very fantastic ! the plot and acting were top-notch"

# step 1:Tokenization
word_token = nltk.tokenize.word_tokenize(review_text)
print(f"tokens :{word_token}")

# step 2 Stop word removal
stop_word = set(stopwords.words("english"))
print(f"stop word from English :{stop_word}")

filter_token = [word for word in word_token if word.lower() not in stop_word]
print(f"Filter token (after stop word removal) {filter_token}")

# step 3 TF-IDF Weighting
# Corpus with multiple review
corpus = [
    "The movie was absolutely good! , Plot and acting were top-notch",
    "The movie was okay, but acting would have been better.",
    "I do not like the movie. The movie Plot was to slow"
]
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(corpus)
feature_names = vectorizer.get_feature_names_out()

tfidf_dict = {feature_names[i]: tfidf_matrix[0,i] for i in range(len(feature_names))} 
print("TF-IDF Weights for the first review:",tfidf_dict)


# Step 4 : Sentiment Detection
Sanalyzer = SentimentIntensityAnalyzer()
Sentiment_scores = Sanalyzer.polarity_scores(review_text)
print("Sentiment Scores:",Sentiment_scores)


if Sentiment_scores['compound'] >0:
    Sentiment = "Positive"
elif Sentiment_scores['compound'] < 0:
    Sentiment = "negative"
else:
    Sentiment = "Neutral"

print("Overall Sentiment:",Sentiment)

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\sarth\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sarth\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\sarth\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


tokens :['The', 'movie', 'was', 'very', 'fantastic', '!', 'the', 'plot', 'and', 'acting', 'were', 'top-notch']
stop word from English :{'by', "aren't", 'who', 'y', "you'd", 'wasn', "haven't", 'be', 'she', 'ours', 're', 'ain', 'did', 'our', "shouldn't", "i've", 'such', "you're", 'where', 'at', 'again', 'i', 'as', 'he', 'herself', 'most', 'each', "you've", 'against', 'what', 'didn', 'it', 'its', 'they', 'very', 'hers', 'out', "they're", 'wouldn', 'weren', 'nor', 'are', 'on', 'mustn', 'is', 'm', "won't", 'hasn', 'am', 'once', 'don', 'below', "you'll", 'through', 'but', "she's", 'when', 'itself', "couldn't", 'o', "we'd", "it's", 'was', 'isn', 'not', 'then', 'hadn', 'this', "hadn't", 'too', "she'll", "mustn't", 'shouldn', "should've", 'whom', 'aren', 'only', 'these', "don't", 'you', "wasn't", 'd', 'couldn', 'more', "he's", "doesn't", "he'd", 'ma', 'your', 'that', 'and', 'or', 'few', 'both', "mightn't", 'over', 'them', 'why', 'before', "wouldn't", 'after', 'do', 'because', 'their', 'a', 'tha