In [1]:
!pip install gradio nltk scikit-learn


Collecting gradio
  Downloading gradio-5.25.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

## #Import necessary libraries <br>
**pandas**  	Load and manipulate dataset <br>
**string**	 For removing punctuation <br>
**nltk**	   Stopword removal (e.g., "the", "is") <br>
**sklearn	** For TF-IDF, model training <br>
**joblib**	 To save/load model and vectorizer <br>
**gradio**	 To build the web UI

In [2]:
import pandas as pd
import string
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import gradio as gr
import joblib


In [3]:
# Load dataset
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/fake reviews dataset.csv")
df

Unnamed: 0,category,rating,label,text
0,Home_and_Kitchen_5,5,CG,"Love this! Well made, sturdy, and very comfor..."
1,Home_and_Kitchen_5,5,CG,"love it, a great upgrade from the original. I..."
2,Home_and_Kitchen_5,5,CG,This pillow saved my back. I love the look and...
3,Home_and_Kitchen_5,1,CG,"Missing information on how to use it, but it i..."
4,Home_and_Kitchen_5,5,CG,Very nice set. Good quality. We have had the s...
...,...,...,...,...
40427,Clothing_Shoes_and_Jewelry_5,4,OR,I had read some reviews saying that this bra r...
40428,Clothing_Shoes_and_Jewelry_5,5,CG,I wasn't sure exactly what it would be. It is ...
40429,Clothing_Shoes_and_Jewelry_5,2,OR,"You can wear the hood by itself, wear it with ..."
40430,Clothing_Shoes_and_Jewelry_5,1,CG,I liked nothing about this dress. The only rea...


In [4]:
df = df[['text', 'rating', 'label']]  # select required columns
df.dropna(inplace=True)
df.head()
# label --> CG = fake , OG = Real

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(inplace=True)


Unnamed: 0,text,rating,label
0,"Love this! Well made, sturdy, and very comfor...",5,CG
1,"love it, a great upgrade from the original. I...",5,CG
2,This pillow saved my back. I love the look and...,5,CG
3,"Missing information on how to use it, but it i...",1,CG
4,Very nice set. Good quality. We have had the s...,5,CG


In [5]:
#Preprocessing using NLTK
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [6]:
# Text Preprocessing
def preprocess(text):
    text = text.lower()
    text = ''.join([c for c in text if c not in string.punctuation])
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return ' '.join(words)

df['cleaned'] = df['text'].apply(preprocess)

In [7]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned'])
X

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 1055545 stored elements and shape (40432, 51120)>

In [8]:
y = df['rating']
y

Unnamed: 0,rating
0,5
1,5
2,5
3,1
4,5
...,...
40427,4
40428,5
40429,2
40430,1


In [9]:
# model train
## Linear Regression for Rating Prediction
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

In [10]:
#Save Models and Vectorizer
joblib.dump(model, "rating_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")

['vectorizer.pkl']

In [12]:
# Load model & vectorizer
model = joblib.load("rating_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

# Predict function
def predict_rating(review_text):
    cleaned = preprocess(review_text)
    vect = vectorizer.transform([cleaned])
    predicted_rating = model.predict(vect)[0]

    # Label and color based on rating
    if predicted_rating >= 3:
        label = f"<span style='color: green; font-weight: bold;'>Real</span>"
    else:
        label = f"<span style='color: red; font-weight: bold;'>Fake</span>"

    return f"Predicted Rating: {predicted_rating:.1f} / 5<br>{label}"

# Gradio Interface
iface = gr.Interface(
    fn=predict_rating,
    inputs=gr.Textbox(lines=5, label="Enter Product Review"),
    outputs=gr.HTML(),
    title="Fake Review Detection",
    description="Enter a product review to predict the star rating and detect if it's real or fake."
)

iface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://cdb8265d31c0c80851.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


