<a href="https://colab.research.google.com/github/Thush-ar/fake-product-review-analyzer/blob/main/fake_product_review_analyser_using_SVM_and_ngrok.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
!pip install streamlit joblib
!npm install -g localtunnel
!pip install pyngrok

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K
changed 22 packages in 3s
[1G[0K⠦[1G[0K
[1G[0K⠦[1G[0K3 packages are looking for funding
[1G[0K⠦[1G[0K  run `npm fund` for details


In [28]:
%%writefile app.py
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from scipy.sparse import hstack
import joblib

st.set_page_config(page_title="Fake Review Detector (SVM)", layout="centered")
st.title("🕵️‍♂️ Fake vs Genuine Product Review Detector")
st.write("Upload a dataset, train the model, and classify reviews as Genuine or Fake using both review text and rating.")

@st.cache_data
def train_model(df, text_col='text_', rating_col='rating', label_col='label'):
    X_text = df[text_col].astype(str)
    X_rating = df[rating_col].values.reshape(-1,1)
    # Convert label to numeric automatically
    y = df[label_col].map(lambda x: 1 if str(x).lower().startswith('c') else 0)  # CG=1, DG=0 etc.

    X_train_text, X_val_text, X_train_rating, X_val_rating, y_train, y_val = train_test_split(
        X_text, X_rating, y, test_size=0.2, random_state=42, stratify=y)

    tfidf = TfidfVectorizer(ngram_range=(1,2), max_df=0.85, min_df=1)
    tfidf.fit(X_train_text)
    X_train_tfidf = tfidf.transform(X_train_text)
    X_val_tfidf = tfidf.transform(X_val_text)

    scaler = StandardScaler()
    X_train_rating_scaled = scaler.fit_transform(X_train_rating)
    X_val_rating_scaled = scaler.transform(X_val_rating)

    X_train_combined = hstack([X_train_tfidf, X_train_rating_scaled])
    X_val_combined = hstack([X_val_tfidf, X_val_rating_scaled])

    svc = LinearSVC(class_weight='balanced', max_iter=5000)
    calibrated = CalibratedClassifierCV(svc)
    calibrated.fit(X_train_combined, y_train)

    preds = calibrated.predict(X_val_combined)
    acc = accuracy_score(y_val, preds)
    report = classification_report(y_val, preds, zero_division=0)

    joblib.dump(tfidf, 'tfidf.joblib')
    joblib.dump(scaler, 'rating_scaler.joblib')
    joblib.dump(calibrated, 'svm_review_model.joblib')

    return acc, report

# Sidebar upload
st.sidebar.header("Upload your dataset")
uploaded_file = st.sidebar.file_uploader("Upload CSV with 'text_', 'rating', 'label'", type=["csv"])

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.sidebar.success(f"Loaded dataset with {len(df)} rows.")
    acc, report = train_model(df)
    st.sidebar.success(f"Validation Accuracy: {acc:.3f}")
    st.sidebar.text_area("Validation report", value=report, height=200)

    # Load trained components
    tfidf = joblib.load('tfidf.joblib')
    scaler = joblib.load('rating_scaler.joblib')
    model = joblib.load('svm_review_model.joblib')

    st.header("Classify a Review")
    user_text = st.text_area("Enter a product review to classify:", height=160)

    # Full star options
    star_choices = ["⭐", "⭐⭐", "⭐⭐⭐", "⭐⭐⭐⭐", "⭐⭐⭐⭐⭐"]
    selected_stars = st.radio("Click on stars to choose rating:", star_choices, index=2)

    # Convert stars to numeric rating (1–5)
    user_rating = star_choices.index(selected_stars) + 1
    st.write(f"Selected Rating: {user_rating} {selected_stars}")

    if st.button("Classify"):
        if not user_text.strip():
            st.error("Please enter a review text first.")
        else:
            X_text = tfidf.transform([user_text])
            rating_scaled = scaler.transform([[user_rating]])
            X_combined = hstack([X_text, rating_scaled])
            pred_proba = model.predict_proba(X_combined)[0]
            pred_label = int(pred_proba.argmax())
            confidence = pred_proba.max()
            label_name = "Genuine" if pred_label == 1 else "Fake"
            st.subheader(f"Prediction: {label_name}")
            st.write(f"Confidence: {confidence:.2%}")
else:
    st.info("Upload a dataset in the sidebar to train the model.")


Overwriting app.py


In [29]:
from pyngrok import ngrok

#note: ivide ningalude own keys upayogikanam. sign up in ngrok to get the key
ngrok.set_auth_token("32lQcpyElc6ZG3yHYYBt2HKOO53_6CFc6gftegh29dJRTmkKs")


In [30]:
from pyngrok import ngrok
import os
import time


try:
  public_url = ngrok.connect(8501)
  print("Streamlit public URL:", public_url)
except Exception as e:
  print(f"An error occurred: {e}")
  print("Attempting to reconnect after killing ngrok process...")
  os.system('ngrok kill') # Ensure ngrok is killed again if the first attempt failed
  time.sleep(2) # Add a short delay
  public_url = ngrok.connect(8501)
  print("Streamlit public URL:", public_url)

# Start a tunnel to the Streamlit port


# Launch Streamlit app
get_ipython().system('streamlit run app.py &>/dev/null &')

Streamlit public URL: NgrokTunnel: "https://cac17ce9e8fd.ngrok-free.app" -> "http://localhost:8501"
