In [None]:
import numpy as np
from scipy.sparse import hstack
import joblib

# Load model (joblib)
model = joblib.load("models/logistic_regression_no_stars.pkl")

# Load TF-IDF (pickle is OK if you saved it using pickle)
tfidf = joblib.load("models/tfidf_vectorizer.pkl")


In [8]:
import re
import string

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+", "", text)
    text = re.sub(r"<.*?>", "", text)
    text = re.sub(f"[{string.punctuation}]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text


In [19]:
def extract_features(raw_review):
    cleaned = clean_text(raw_review)

    # TF-IDF
    X_tfidf = tfidf.transform([cleaned])

    # Engineered features  (NO STARS)
    review_length = len(cleaned.split())
    char_length = len(cleaned)
    exclamation_count = raw_review.count("!")
    question_count = raw_review.count("?")

    # Combine engineered features into array

    extra_features = np.array([[
        review_length,
        char_length,
        exclamation_count,
        question_count
    ]])

    # Combine
    X_final = hstack([X_tfidf, extra_features])

    return X_final


In [36]:
def predict_review(raw_review):
    
    X = extract_features(raw_review)

    prediction = model.predict(X)[0]
    probability = model.predict_proba(X)[0]

    confidence = float(max(probability) * 100)
    confidence = round(min(confidence, 99.9), 2)
    
    return {
        "label": "FAKE REVIEW" if prediction == 1 else "GENUINE REVIEW",
        "confidence": confidence
    }

In [34]:
print(model.predict_proba(extract_features("test review")))

[[0.99160007 0.00839993]]


In [37]:
result = predict_review("Highly recommended! Best restaurant ever!!!")
print(result)

test = predict_review("This place was okay. Nothing special?")
print(test)

test2 = predict_review("Worst service ever. I will never come back!!!")
print(test2)

test3 = predict_review("Mediocre food, but the ambiance was nice.")
print(test3)

{'label': 'FAKE REVIEW', 'confidence': 99.9}
{'label': 'GENUINE REVIEW', 'confidence': 88.53}
{'label': 'FAKE REVIEW', 'confidence': 97.59}
{'label': 'GENUINE REVIEW', 'confidence': 98.83}
