In [10]:
import nltk
nltk.download('movie_reviews')
from nltk.corpus import movie_reviews
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load movie reviews from NLTK corpus
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# Prepare the data for SVM
reviews = [' '.join(words) for words, category in documents]
labels = [category for words, category in documents]

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(reviews, labels, test_size=0.25, random_state=42)

# Initialize TF-IDF vectorizer
vectorizer = TfidfVectorizer(min_df=5, max_df=0.8, ngram_range=(1, 3))

# Transform the training and test data
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Initialize and train the Support Vector Machine (SVM) classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train_tfidf, y_train)

# Test the classifier
y_pred = svm_classifier.predict(X_test_tfidf)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Make predictions on new reviews
new_reviews = ["This movie was excellent!",
               "The acting was awful."]
new_reviews_tfidf = vectorizer.transform(new_reviews)
predictions = svm_classifier.predict(new_reviews_tfidf)
for review, prediction in zip(new_reviews, predictions):
    sentiment = 'Positive' if prediction == 'pos' else 'Negative'
    print(f"Review: {review} -> Sentiment: {sentiment}")

#This code analyzes IMDb movie reviews to detect their positive or negative tones. It starts by fetching reviews sorted as positive and negative from NLTK's 'movie_reviews' dataset. These reviews are then processed into understandable text strings, paired with their respective sentiments.
#The data is split into training and testing sets for model assessment. To handle words efficiently, it converts them into numerical forms using TF-IDF, capturing word importance in reviews.
#The Support Vector Machine (SVM) model is then set up and trained using the transformed training data. It evaluates the model's accuracy on the test data.
#Finally, the trained model is used to assess new, unseen reviews. These new reviews are converted into numerical representations using TF-IDF, allowing the SVM to predict their sentiment—positive or negative—based on its training. This process gauges the model's ability to understand and categorize sentiments in reviews.

[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\TOSHIBA\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


Accuracy: 0.836
Review: This movie was excellent! -> Sentiment: Positive
Review: The acting was awful. -> Sentiment: Negative
