In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from textblob import TextBlob
from collections import Counter
import datetime

# Sample Data (Replace with actual data)
reviews = [
    {
        "review_text": "Fantastic product!  Fantastic Works like a charm.",
        "account_age": 5,  # In years
        "verified_purchase": True,
        "helpful_votes": 3,
        "timestamp": "2025-01-23 12:30:00"
    },
     {
        "review_text": "Fantastic product! Works like a charm.",
        "account_age": 10,  # In years
        "verified_purchase": True,
        "helpful_votes": 3,
        "timestamp": "2025-01-10 12:30:00"
    },
  
    # Add more reviews
]

# Weights for User Credibility Score (UCS)
W_ACCOUNT_AGE = 0.3
W_VERIFIED_PURCHASE = 0.5
W_HELPFUL_VOTES = 0.2

# Weights for Unusual Pattern Score (UPS)
W_SENTIMENT = 0.4
W_REPEATED_PHRASES = 0.3
W_FREQUENCY = 0.2
W_TIMING = 0.1

# Functions
def calculate_user_credibility_score(review):
    # Normalize account age (assume max age is 10 years)
    account_age_score = min(review["account_age"] / 10, 1)
    verified_purchase_score = 1 if review["verified_purchase"] else 0
    # Normalize helpful votes (assume max helpful votes is 10)
    helpful_votes_score = min(review["helpful_votes"] / 10, 1)

    # Calculate UCS
    ucs = (W_ACCOUNT_AGE * account_age_score +
           W_VERIFIED_PURCHASE * verified_purchase_score +
           W_HELPFUL_VOTES * helpful_votes_score)
    return ucs



In [2]:
for i, review in enumerate(reviews):
    ucs = calculate_user_credibility_score(review)
    print(ucs)


0.71
0.71


In [13]:
result_df

Unnamed: 0,review_text,user_credibility_score,unusual_pattern_score,final_fake_review_score,is_fake
0,Fantastic product! Works like a charm.,0.71,0.4,0.59,True
1,Fantastic product! Works like a charm.,0.71,0.4,0.59,True


In [None]:
def calculate_unusual_pattern_score(reviews, review, index):
    # Sentiment Analysis
    sentiment = TextBlob(review["review_text"]).sentiment.polarity
    sentiment_score = abs(1)  # Higher absolute values are more extreme


    # Repeated Phrases
    phrases = review["review_text"].split()
    common_phrases = [word for word, count in Counter(phrases).items() if count > 1]
    repeated_phrases_score = min(len(common_phrases) / len(phrases), 1)

    # Review Frequency
    timestamps = [datetime.datetime.strptime(r["timestamp"], "%Y-%m-%d %H:%M:%S") for r in reviews]
    review_time = timestamps[index]
    time_differences = [abs((review_time - t).total_seconds()) for t in timestamps if t != review_time]
    frequency_score = min(sum(1 for diff in time_differences if diff < 60) / len(reviews), 1)

    # Timing Anomalies (e.g., reviews posted late at night)
    timing_score = 1 if 0 <= review_time.hour <= 6 else 0

    print(sentiment_score)
    print(repeated_phrases_score)
    print(frequency_score)
    print(timing_score)

    # Calculate UPS
    ups = (W_SENTIMENT * sentiment_score +
           W_REPEATED_PHRASES * repeated_phrases_score +
           W_FREQUENCY * frequency_score +
           W_TIMING * timing_score)
    return ups

# Main Scoring
def calculate_fake_review_score(reviews):
    results = []
    for i, review in enumerate(reviews):
        ucs = calculate_user_credibility_score(review)
        ups = calculate_unusual_pattern_score(reviews, review, i)

        # Combine UCS and UPS into final score
        ALPHA = 0.6  # Weight for UCS
        BETA = 0.4   # Weight for UPS
        frs = ALPHA * ucs + BETA * ups

        results.append({
            "review_text": review["review_text"],
            "user_credibility_score": round(ucs, 2),
            "unusual_pattern_score": round(ups, 2),
            "final_fake_review_score": round(frs, 2),
            "is_fake": frs < 0.7  # Threshold for fake reviews
        })
    return pd.DataFrame(results)

# Run the scoring
result_df = calculate_fake_review_score(reviews)
print(result_df)
