<a href="https://colab.research.google.com/github/Thush-ar/fake-product-review-analyzer/blob/main/Fake_product_review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install streamlit pyngrok scikit-learn pandas joblib


Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.4.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.4.0-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m58.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.4.0 streamlit-1.50.0


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

st.set_page_config(page_title="Fake Review Detector (SVM)", layout="centered")
st.title("🕵️‍♂️ Fake vs Genuine Product Review Detector")
st.write("Upload a dataset, train the model, and classify reviews as Genuine or Fake.")

@st.cache_data
def train_model(df, text_col='text_', label_col='label'):
    X = df[text_col].astype(str)
    # Convert label values to 0/1 automatically:
    y_raw = df[label_col].astype(str).str.lower()
    # Anything containing 'g' (like 'genuine', 'cg') -> 1, else 0
    y = y_raw.map(lambda x: 1 if 'g' in x else 0)

    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)
    tfidf = TfidfVectorizer(ngram_range=(1,2), max_df=0.85, min_df=1)
    svc = LinearSVC(class_weight='balanced', max_iter=5000)
    calibrated = CalibratedClassifierCV(svc)
    pipe = Pipeline([
        ('tfidf', tfidf),
        ('clf', calibrated)
    ])
    pipe.fit(X_train, y_train)
    preds = pipe.predict(X_val)
    acc = accuracy_score(y_val, preds)
    report = classification_report(y_val, preds, zero_division=0)
    return pipe, acc, report

# Upload dataset in sidebar
st.sidebar.header("Upload your dataset")
uploaded_file = st.sidebar.file_uploader("Upload CSV file with 'text_' and 'label' columns", type=["csv"])

if uploaded_file is not None:
    dataset = pd.read_csv(uploaded_file)
    st.sidebar.success(f"Loaded dataset with {len(dataset)} rows.")
    model, acc, report = train_model(dataset, text_col='text_', label_col='label')
    st.sidebar.success(f"Validation Accuracy: {acc:.3f}")
    st.sidebar.text_area("Validation report", value=report, height=200)

    st.header("Classify a Review")
    user_text = st.text_area("Enter a product review to classify:", height=160)
    if st.button("Classify"):
        if not user_text or user_text.strip() == "":
            st.error("Please enter a review text first.")
        else:
            pred_proba = model.predict_proba([user_text])[0]
            pred_label = int(pred_proba.argmax())
            confidence = pred_proba.max()
            label_name = "Genuine" if pred_label == 1 else "Fake"
            st.subheader(f"Prediction: {label_name}")
            st.write(f"Confidence: {confidence:.2%}")
else:
    st.info("Upload a dataset in the sidebar to train the model.")


Writing app.py


In [None]:
from pyngrok import ngrok

# Replace with your token from dashboard
ngrok.set_auth_token("32lQcpyElc6ZG3yHYYBt2HKOO53_6CFc6gftegh29dJRTmkKs")




In [None]:
# Start a tunnel to the Streamlit port
public_url = ngrok.connect(8501)   # integer, no keyword
print("Streamlit public URL:", public_url)

# Launch Streamlit app
!streamlit run app.py &>/dev/null &


Streamlit public URL: NgrokTunnel: "https://d1a644e0092b.ngrok-free.app" -> "http://localhost:8501"
