In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn import metrics
import pandas as pd

# Sample dataset
texts = ["This is a great movie", "I hated this film", "Amazing acting and direction", "Fantastic movie ever", "Loved the screenplay", "Boring storyline", "Movie was the worst"]
labels = [1, 0, 1, 1, 1, 0,0]  # 1 for positive, 0 for negative

In [12]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Create a pipeline with TF-IDF Vectorizer and Naive Bayes Classifier
model = make_pipeline(TfidfVectorizer(), MultinomialNB())

# Train the model
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)


In [13]:
# Evaluate the model
accuracy = metrics.accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Display classification report
print("\nClassification Report:")
print(metrics.classification_report(y_test, y_pred))

# Test with a new example
new_text = ["This is a fantastic movie", "It was a boring film"]
predictions = model.predict(new_text)

# Display results
results_df = pd.DataFrame({'Text': new_text, 'Predicted Sentiment': ["Positive" if pred == 1 else "Negative" for pred in predictions]})
print(results_df)


Accuracy: 50.00%

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2

                        Text Predicted Sentiment
0  This is a fantastic movie            Positive
1       It was a boring film            Negative


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
