In [None]:
!pip install nltk scikit-learn pandas


In [1]:
import nltk
from nltk.corpus import movie_reviews
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Download the necessary NLTK data
nltk.download('movie_reviews')

# Load movie reviews dataset
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# Shuffle the documents
random.shuffle(documents)

# Convert to a DataFrame for easier manipulation
df = pd.DataFrame(documents, columns=['text', 'label'])

# Join the words to make a single string of text
df['text'] = df['text'].apply(lambda x: ' '.join(x))

# Encode labels (pos: 1, neg: 0)
df['label'] = df['label'].map({'pos': 1, 'neg': 0})


[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\Sparrow\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\movie_reviews.zip.


In [2]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Convert text data to feature vectors using CountVectorizer
vectorizer = CountVectorizer(stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train a Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Predict sentiment on the test set
y_pred = model.predict(X_test_vec)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 77.75%


In [5]:
def predict_sentiment(text):
    text_vec = vectorizer.transform([text])
    prediction = model.predict(text_vec)
    return 'Positive' if prediction[0] == 1 else 'Negative'

# Test the model with a sample text
sample_text = "This movie was fantastic! I loved the story and the characters."
print(predict_sentiment(sample_text))


Positive
