In [None]:
import pandas as pd
df = pd.read_csv("/content/drive/MyDrive/product_review_dataset.csv")

In [None]:
# -------------------------------
# 1. EXPLORATORY ANALYSIS
# -------------------------------

# 1. Prior probability: P(Defective)
p_defective = df['Defective'].mean()

# 2. Average review rating for defective vs non-defective
avg_rating_defective = df[df['Defective'] == 1]['ReviewRating'].mean()
avg_rating_non_defective = df[df['Defective'] == 0]['ReviewRating'].mean()

# 3. Return rates
return_rate_defective = df[df['Defective'] == 1]['HighReturn'].mean()
return_rate_non_defective = df[df['Defective'] == 0]['HighReturn'].mean()

In [None]:

# -------------------------------
# 2. BAYESIAN INFERENCE
# -------------------------------

# Probabilities
p_highreturn_given_defective = df[df['Defective'] == 1]['HighReturn'].mean()
p_highreturn_given_not_defective = df[df['Defective'] == 0]['HighReturn'].mean()
p_highreturn = df['HighReturn'].mean()

# Bayes’ Theorem: P(Defective | HighReturn)
p_defective_given_highreturn = (p_highreturn_given_defective * p_defective) / p_highreturn

In [None]:
# -------------------------------
# 3. MULTI-FEATURE RISK SCORING
# -------------------------------

# Define risk score based on:
# - HighReturn == 1
# - ReviewRating <= 2
# - HasComplaint == True
df['RiskScore'] = (
    (df['HighReturn'] == 1).astype(int) +
    (df['ReviewRating'] <= 2).astype(int) +
    (df['HasComplaint'] == True).astype(int)
)

# Top 10 highest-risk products
top_risk_products = df.sort_values(by='RiskScore', ascending=False).head(10)

In [None]:
# -------------------------------
# 4. DECISION SCENARIO
# -------------------------------

# Example product input
product = {
    "HighReturn": 1,
    "ReviewRating": 1.5,
    "HasComplaint": True,
    "VerifiedPurchase": False
}

# Risk score for this product
product_risk_score = int(product['HighReturn'] == 1) + int(product['ReviewRating'] <= 2) + int(product['HasComplaint'] == True)

# Recommendation logic
recall_recommendation = "Yes" if product_risk_score >= 2 else "No"

In [None]:
# -------------------------------
# 5. DISPLAY RESULTS
# -------------------------------

print("Exploratory Analysis:")
print(f"P(Defective): {p_defective:.4f}")
print(f"Avg Review Rating - Defective: {avg_rating_defective:.2f}")
print(f"Avg Review Rating - Non-Defective: {avg_rating_non_defective:.2f}")
print(f"Return Rate - Defective: {return_rate_defective:.4f}")
print(f"Return Rate - Non-Defective: {return_rate_non_defective:.4f}")

print("\n Bayesian Inference:")
print(f"P(HighReturn | Defective): {p_highreturn_given_defective:.4f}")
print(f"P(HighReturn | Not Defective): {p_highreturn_given_not_defective:.4f}")
print(f"P(HighReturn): {p_highreturn:.4f}")
print(f"P(Defective | HighReturn): {p_defective_given_highreturn:.4f}")

print("\n Top 10 Highest-Risk Products:")
print(top_risk_products[['Defective', 'HighReturn', 'ReviewRating', 'HasComplaint', 'VerifiedPurchase', 'RiskScore']])

print("\n Decision Scenario:")
print(f"Product Risk Score: {product_risk_score}")
print(f"Recommend Recall? {recall_recommendation}")

Exploratory Analysis:
P(Defective): 0.0961
Avg Review Rating - Defective: 2.48
Avg Review Rating - Non-Defective: 4.18
Return Rate - Defective: 0.6961
Return Rate - Non-Defective: 0.1026

 Bayesian Inference:
P(HighReturn | Defective): 0.6961
P(HighReturn | Not Defective): 0.1026
P(HighReturn): 0.1596
P(Defective | HighReturn): 0.4192

 Top 10 Highest-Risk Products:
      Defective  HighReturn  ReviewRating  HasComplaint  VerifiedPurchase  \
9983          1           1           1.5          True              True   
7942          1           1           1.1          True              True   
7924          1           1           1.6          True              True   
8392          1           1           1.8          True              True   
7817          1           1           2.0          True              True   
7796          1           1           1.9          True              True   
2954          1           1           1.9          True              True   
2927          1