In [1]:
# STEP 1: Install & Import Libraries
!pip install nltk scikit-learn pandas seaborn

import pandas as pd
import seaborn as sns
import nltk
from nltk.corpus import movie_reviews
from nltk.corpus import stopwords
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy
from sklearn.model_selection import train_test_split

nltk.download('movie_reviews')
nltk.download('punkt')
nltk.download('stopwords')

# STEP 2: Load Dataset
# Using NLTK's Movie Reviews dataset
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# STEP 3: Data Preprocessing
import random
random.shuffle(documents)

# Feature Extraction Function
all_words = nltk.FreqDist(w.lower() for w in movie_reviews.words())
word_features = list(all_words)[:2000]

def extract_features(words):
    words = set(words)
    return {word: (word in words) for word in word_features}

# Prepare Dataset
feature_sets = [(extract_features(d), c) for (d, c) in documents]

# STEP 4: Train-Test Split
train_set, test_set = feature_sets[:1500], feature_sets[1500:]

# STEP 5: Model Training
classifier = NaiveBayesClassifier.train(train_set)

# STEP 6: Evaluation
print(f"Accuracy: {accuracy(classifier, test_set):.2f}")

# Most Informative Features
classifier.show_most_informative_features(10)

# STEP 7: Example Prediction
example_review = "This movie was incredibly good, I loved every moment!"
example_features = extract_features(example_review.split())
print("Predicted Sentiment:", classifier.classify(example_features))




[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Accuracy: 0.81
Most Informative Features
             wonderfully = True              pos : neg    =     11.5 : 1.0
             outstanding = True              pos : neg    =     10.9 : 1.0
                   damon = True              pos : neg    =      6.6 : 1.0
                  wasted = True              neg : pos    =      5.9 : 1.0
                    lame = True              neg : pos    =      5.8 : 1.0
                   awful = True              neg : pos    =      5.7 : 1.0
                  stupid = True              neg : pos    =      5.1 : 1.0
                     era = True              pos : neg    =      5.0 : 1.0
               portrayed = True              pos : neg    =      5.0 : 1.0
                  superb = True              pos : neg    =      5.0 : 1.0
Predicted Sentiment: neg
