In [1]:
import pandas as pd

data = {
    "review_text": [
        "This product is very good and useful",
        "Best product ever buy now",
        "Waste of money totally disappointed",
        "Limited offer buy fast 5 stars",
        "Quality is good and delivery was fast",
        "Amazing product must buy now",
        "Very poor quality not recommended",
        "Best deal hurry up buy now",
        "Product works as expected",
        "Free gift offer best product"
    ],
    "label": [
        "Genuine",
        "Fake",
        "Genuine",
        "Fake",
        "Genuine",
        "Fake",
        "Genuine",
        "Fake",
        "Genuine",
        "Fake"
    ]
}

df = pd.DataFrame(data)
df


Unnamed: 0,review_text,label
0,This product is very good and useful,Genuine
1,Best product ever buy now,Fake
2,Waste of money totally disappointed,Genuine
3,Limited offer buy fast 5 stars,Fake
4,Quality is good and delivery was fast,Genuine
5,Amazing product must buy now,Fake
6,Very poor quality not recommended,Genuine
7,Best deal hurry up buy now,Fake
8,Product works as expected,Genuine
9,Free gift offer best product,Fake


In [2]:
df.to_csv("reviews.csv", index=False)


In [3]:
import nltk
nltk.download('stopwords')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [4]:
df['label'] = df['label'].map({'Genuine': 1, 'Fake': 0})
df


Unnamed: 0,review_text,label
0,This product is very good and useful,1
1,Best product ever buy now,0
2,Waste of money totally disappointed,1
3,Limited offer buy fast 5 stars,0
4,Quality is good and delivery was fast,1
5,Amazing product must buy now,0
6,Very poor quality not recommended,1
7,Best deal hurry up buy now,0
8,Product works as expected,1
9,Free gift offer best product,0


In [5]:
import re
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z ]', '', text)
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return " ".join(words)

df['clean_review'] = df['review_text'].apply(clean_text)
df


Unnamed: 0,review_text,label,clean_review
0,This product is very good and useful,1,product good useful
1,Best product ever buy now,0,best product ever buy
2,Waste of money totally disappointed,1,waste money totally disappointed
3,Limited offer buy fast 5 stars,0,limited offer buy fast stars
4,Quality is good and delivery was fast,1,quality good delivery fast
5,Amazing product must buy now,0,amazing product must buy
6,Very poor quality not recommended,1,poor quality recommended
7,Best deal hurry up buy now,0,best deal hurry buy
8,Product works as expected,1,product works expected
9,Free gift offer best product,0,free gift offer best product


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['clean_review'])
y = df['label']


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


In [9]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)


In [10]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.5
              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [11]:
def predict_review(text):
    text = clean_text(text)
    vec = vectorizer.transform([text])
    result = model.predict(vec)
    return "Genuine Review ✅" if result[0] == 1 else "Fake Review ❌"

print(predict_review("Best product ever buy now"))
print(predict_review("Quality is good but price is high"))


Fake Review ❌
Genuine Review ✅
