In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score


file_path = 'B:\\OneDrive - Amity University\\Desktop\\Orison Tech\\task-3\\Test.csv\\MovieReview.csv'  
data = pd.read_csv(file_path)


print(data.head())


if 'text' not in data.columns or 'text' not in data.columns:
    raise ValueError("The CSV must contain 'Review' and 'Sentiment' columns.")


X = data['text']
y = data['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


model = MultinomialNB()
model.fit(X_train_tfidf, y_train)


y_pred = model.predict(X_test_tfidf)


accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


import joblib
joblib.dump(model, 'movie_review_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

print("Model and vectorizer saved!")


def predict_sentiment(review):
    review_tfidf = vectorizer.transform([review])
    prediction = model.predict(review_tfidf)
    return prediction[0]


new_review = "The movie was fantastic and very entertaining!"
print("Prediction for new review:", predict_sentiment(new_review))

                                                text  label
0  I always wrote this series off as being a comp...      0
1  1st watched 12/7/2002 - 3 out of 10(Dir-Steve ...      0
2  This movie was so poorly written and directed ...      0
3  The most interesting thing about Miryang (Secr...      1
4  when i first read about "berlin am meer" i did...      0
Accuracy: 0.851

Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.85      0.85       503
           1       0.85      0.85      0.85       497

    accuracy                           0.85      1000
   macro avg       0.85      0.85      0.85      1000
weighted avg       0.85      0.85      0.85      1000

Model and vectorizer saved!
Prediction for new review: 0
