In [42]:
import numpy as np
import pandas as pd

In [43]:
# Load the dataset
data = pd.read_csv("IMDB_Dataset.csv")

# Print the first 10 rows of the dataset
print(data.head(10))

                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive
5  Probably my all-time favorite movie, a story o...  positive
6  I sure would like to see a resurrection of a u...  positive
7  This show was an amazing, fresh & innovative i...  negative
8  Encouraged by the positive comments about this...  negative
9  If you like original gut wrenching laughter yo...  positive


In [44]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Print the shape of the training and testing sets
print("Training set shape:", train_data.shape)
print("Testing set shape:", test_data.shape)

Training set shape: (40000, 2)
Testing set shape: (10000, 2)


In [45]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression

# Initialize the vectorizer
vectorizer = CountVectorizer(stop_words='english')

# Vectorize the training data
train_vectors = vectorizer.fit_transform(train_data['review'])

# Vectorize the testing data
test_vectors = vectorizer.transform(test_data['review'])

# Train the Naïve Bayes model
nb_model = MultinomialNB()
nb_model.fit(train_vectors, train_data['sentiment'])

# Evaluate the Naïve Bayes model
nb_predictions = nb_model.predict(test_vectors)
print("Naïve Bayes accuracy:", accuracy_score(test_data['sentiment'], nb_predictions))
print("Naïve Bayes precision:", precision_score(test_data['sentiment'], nb_predictions, average='weighted'))
print("Naïve Bayes recall:", recall_score(test_data['sentiment'], nb_predictions, average='weighted'))
print("Naïve Bayes confusion matrix:\n", confusion_matrix(test_data['sentiment'], nb_predictions))

# Training the Support Vector Machine model with a higher max_iter parameter
svm_model = LinearSVC(max_iter=50000)
svm_model.fit(train_vectors, train_data['sentiment'])

# Evaluate the Support Vector Machine model
svm_predictions = svm_model.predict(test_vectors)
svm_accuracy = accuracy_score(test_data['sentiment'], svm_predictions)
svm_precision = precision_score(test_data['sentiment'], svm_predictions, average='weighted')
svm_recall = recall_score(test_data['sentiment'], svm_predictions, average='weighted')
svm_confusion_matrix = confusion_matrix(test_data['sentiment'], svm_predictions)

print("Support Vector Machine accuracy:", svm_accuracy)
print("Support Vector Machine precision:", svm_precision)
print("Support Vector Machine recall:", svm_recall)
print("Support Vector Machine confusion matrix:\n", svm_confusion_matrix)



Naïve Bayes accuracy: 0.8565
Naïve Bayes precision: 0.8570981140127285
Naïve Bayes recall: 0.8565
Naïve Bayes confusion matrix:
 [[4342  619]
 [ 816 4223]]
Support Vector Machine accuracy: 0.8621
Support Vector Machine precision: 0.8621011415216031
Support Vector Machine recall: 0.8621
Support Vector Machine confusion matrix:
 [[4262  699]
 [ 680 4359]]


In [46]:
# Load the trained Support Vector Machine model
svm_model = LinearSVC()
svm_model.fit(train_vectors, train_data['sentiment'])

# Process the new tweet
new_tweet = "I just watched a great movie today"
new_tweet_vector = vectorizer.transform([new_tweet])

# Predict the sentiment of the new tweet
new_tweet_sentiment = svm_model.predict(new_tweet_vector)

print("The sentiment of this tweet is : " + new_tweet_sentiment[0])

The sentiment of this tweet is :positive


