In [9]:
# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Step 2: Load IMDb dataset from local file
df = pd.read_csv("/home/rguktrkvalley/Music/IMDB Dataset.csv") 

# Step 3: Prepare features and labels
X = df['review']
y = df['sentiment'].map({'positive': 1, 'negative': 0})

# Step 4: Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: TF-IDF vectorization
vectorizer = TfidfVectorizer(max_features=10000, stop_words='english')
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 6: Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# Step 7: Evaluate
y_pred = model.predict(X_test_vec)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("📊 Report:\n", classification_report(y_test, y_pred, zero_division=0))

# Step 8: Predict new samples
new_reviews = [
    "This movie was a total masterpiece!",
    "The worst acting I have ever seen.",
    "Average film. Not good, not bad.",
    "What a beautiful story and music!"
]
new_vec = vectorizer.transform(new_reviews)
preds = model.predict(new_vec)

print("\n📝 Predictions:")
for review, label in zip(new_reviews, preds):
    sentiment = "Positive" if label == 1 else "Negative"
    print(f"{review} ➜ {sentiment}")


✅ Accuracy: 0.8904
📊 Report:
               precision    recall  f1-score   support

           0       0.90      0.87      0.89      4961
           1       0.88      0.91      0.89      5039

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000


📝 Predictions:
This movie was a total masterpiece! ➜ Positive
The worst acting I have ever seen. ➜ Negative
Average film. Not good, not bad. ➜ Negative
What a beautiful story and music! ➜ Positive
