In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score
from nltk.corpus import stopwords
import nltk

# Ensure NLTK stopwords are downloaded
nltk.download('stopwords')

# Load the dataset
file_path = "review-details.csv"  # Adjust the file path if necessary
df = pd.read_csv(file_path, encoding='ISO-8859-1')  # Use correct encoding

# Use the 'review_text' column as the input text
X = df['review_text'].fillna('')  # Replace NaN with empty string if there are missing values

# For this example, we'll create labels based on 'verified_purchase' column (1 for verified, 0 for unverified)
# This can be changed to a different column for a different labeling mechanism
y = df['verified_purchase'].astype(int)  # Convert to 1 or 0 for classification

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the TfidfVectorizer and fit it on the training data
vectorizer = TfidfVectorizer(stop_words=stopwords.words('english'))
X_train_tfidf = vectorizer.fit_transform(X_train)

# Train a Multinomial Naive Bayes model
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# Transform the test data and make predictions
X_test_tfidf = vectorizer.transform(X_test)
y_pred = model.predict(X_test_tfidf)

# Evaluate the model's performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Function to predict whether a review is fake or not
def predict_review(input_text):
    # Transform the input text using the trained TfidfVectorizer
    input_tfidf = vectorizer.transform([input_text])
    
    # Predict the label using the trained model (1 for verified, 0 for unverified)
    prediction = model.predict(input_tfidf)
    
    # Return the prediction (0 = fake, 1 = verified)
    return "Verified Purchase" if prediction[0] == 1 else "Fake Review"

# Get input from the user
user_input = input("Enter the review text: ")

# Make the prediction
result = predict_review(user_input)

# Output the result
print(f"The review is: {result}")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\chitt\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


Accuracy: 0.8761651131824234

Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.81      0.85       326
           1       0.86      0.93      0.89       425

    accuracy                           0.88       751
   macro avg       0.88      0.87      0.87       751
weighted avg       0.88      0.88      0.88       751

