In [1]:
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score


In [2]:
# ⬇️ Download NLTK movie review dataset
nltk.download('movie_reviews')
from nltk.corpus import movie_reviews

[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Unzipping corpora/movie_reviews.zip.


In [3]:
# 📂 Load movie reviews dataset
docs = [(list(movie_reviews.words(fileid)), category)
        for category in movie_reviews.categories()
        for fileid in movie_reviews.fileids(category)]

In [4]:
# 🧼 Preprocess - convert list of words into single strings
texts = [" ".join(words) for words, label in docs]
labels = [label for words, label in docs]

In [5]:
# 📊 Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

In [6]:
# 🧠 Create ML pipeline: Vectorizer + Naive Bayes Classifier
model = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('classifier', MultinomialNB())
])


In [7]:
# 🚀 Train the model
model.fit(X_train, y_train)

# ✅ Evaluate accuracy
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"🎯 Model Accuracy: {accuracy:.2f}")

🎯 Model Accuracy: 0.81


In [9]:
# 🔍 Try on a custom review
sample_review = "The movie was absolutely disaster!"
pred = model.predict([sample_review])
print(f"Sentiment: {pred[0].capitalize()}")


Sentiment: Neg
