<a href="https://colab.research.google.com/github/Thush-ar/fake-product-review-analyzer/blob/main/fake_product_review_analyzer_using_randomforest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install streamlit pandas scikit-learn joblib nltk pyngrok

Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pyngrok
  Downloading pyngrok-7.4.0-py3-none-any.whl.metadata (8.1 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.4.0-py3-none-any.whl (25 kB)
Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, pydeck, streamlit
Successfully installed pydeck-0.9.1 pyngrok-7.4.0 streamlit-1.50.0


In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import os

# --- Page Configuration ---
st.set_page_config(
    page_title="Fake Review Detection",
    page_icon="🤖",
    layout="wide"
)

# --- NLTK Setup ---
# Download necessary NLTK data (this will be cached by Streamlit)
@st.cache_resource
def download_nltk_data():
    try:
        stopwords.words('english')
    except LookupError:
        nltk.download('stopwords')
    try:
        nltk.data.find('corpora/wordnet.zip')
    except LookupError:
        nltk.download('wordnet')
download_nltk_data()

# --- Text Preprocessing Function ---
# This function is cached to speed up repeated runs
@st.cache_data
def preprocess_text(text):
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    text = re.sub('[^a-zA-Z]', ' ', text).lower()
    words = text.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return " ".join(words)

# --- Model Training Function ---
# Caching the whole training process to avoid re-training on every interaction
@st.cache_data
def train_model(df):
    # Map labels to binary (CG = Genuine, OR = Fake)
    df['label'] = df['label'].map({'CG': 0, 'OR': 1})
    df.dropna(subset=['text_', 'label'], inplace=True)

    df['processed_text'] = df['text_'].apply(preprocess_text)

    vectorizer = TfidfVectorizer(max_features=5000)
    X = vectorizer.fit_transform(df['processed_text'])
    y = df['label']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, target_names=['Genuine (CG)', 'Fake (OR)'])

    # Save the model and vectorizer
    joblib.dump(model, 'fake_review_model.pkl')
    joblib.dump(vectorizer, 'vectorizer.pkl')

    return accuracy, report

# --- Main App Interface ---
st.title("🤖 Fake Review Detection System")
st.markdown("Upload a dataset, train a Random Forest model, and classify new reviews.")

# --- Step 1: Upload Dataset ---
st.header("1. Upload Your Dataset")
st.markdown("The CSV file must contain a `text_` column for the review text and a `label` column with 'CG' for genuine and 'OR' for fake reviews.")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

if uploaded_file is not None:
    try:
        df = pd.read_csv(uploaded_file)
        st.success("Dataset loaded successfully!")
        st.dataframe(df.head())

        # --- Step 2: Train Model ---
        st.header("2. Train the Classification Model")
        if st.button("Click to Train Model"):
            with st.spinner('Training in progress... This may take a moment.'):
                accuracy, report = train_model(df)
                st.session_state['model_trained'] = True
                st.session_state['accuracy'] = accuracy
                st.session_state['report'] = report

            st.success("Model trained successfully!")
            st.metric(label="Model Accuracy", value=f"{st.session_state['accuracy']:.4f}")
            st.text("Classification Report:")
            st.code(st.session_state['report'])

    except Exception as e:
        st.error(f"An error occurred: {e}")
        st.warning("Please ensure your CSV has 'text_' and 'label' columns.")

# --- Step 3: Classify New Review ---
if st.session_state.get('model_trained', False):
    st.header("3. Classify a New Review")
    user_input = st.text_area("Enter a review text to classify:", height=150)

    if st.button("Classify Review"):
        if user_input.strip():
            # Load the saved model and vectorizer
            try:
                model = joblib.load('fake_review_model.pkl')
                vectorizer = joblib.load('vectorizer.pkl')

                # Preprocess and transform the user input
                processed_input = preprocess_text(user_input)
                vectorized_input = vectorizer.transform([processed_input])

                # Make prediction
                prediction = model.predict(vectorized_input)
                prediction_proba = model.predict_proba(vectorized_input)

                # Display result
                if prediction[0] == 1:
                    st.error(f"**Prediction: FAKE Review** (Confidence: {prediction_proba[0][1]:.2%})")
                else:
                    st.success(f"**Prediction: GENUINE Review** (Confidence: {prediction_proba[0][0]:.2%})")

            except FileNotFoundError:
                st.error("Model files not found. Please train the model first.")
            except Exception as e:
                st.error(f"An error occurred during prediction: {e}")
        else:
            st.warning("Please enter a review to classify.")

else:
    st.info("Please upload a dataset and train the model to start classifying reviews.")

Writing app.py


In [None]:
from pyngrok import ngrok
ngrok.kill()
# Replace with your token from dashboard
ngrok.set_auth_token("32lQofxVE9Ujp64uPPcC9SzA0hG_2rcUYzR6gxhbTdKzNsWJf")



In [None]:
# Start a tunnel to the Streamlit port
public_url = ngrok.connect(8501)   # integer, no keyword
print("Streamlit public URL:", public_url)

# Launch Streamlit app
!streamlit run app.py &>/dev/null &


Streamlit public URL: NgrokTunnel: "https://09ac5dc77a99.ngrok-free.app" -> "http://localhost:8501"
