In [2]:
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

df = pd.read_csv("Final-snapdeal-dataset.csv")

X = df[[
    "Product_Title",
    "Selling_Price",
    "MRP",
    "Discount_Pct",
    "Review_Count"
]]

y = df["Is_Grey_Market"]

preprocessor = ColumnTransformer(
    transformers=[
        (
            "text",
            TfidfVectorizer(
                max_features=4000,
                ngram_range=(1, 2),
                min_df=2,
                stop_words="english"
            ),
            "Product_Title"
        ),
        (
            "num",
            StandardScaler(),
            ["Selling_Price", "MRP", "Discount_Pct", "Review_Count"]
        )
    ]
)

model = Pipeline(
    steps=[
        (
            "preprocessor",
            preprocessor
        ),
        (
            "classifier",
            LogisticRegression(
                max_iter=2000,
                class_weight="balanced",
                solver="liblinear",
                random_state=42
            )
        )
    ]
)

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nROC-AUC:", roc_auc_score(y_test, y_prob))

joblib.dump(model, "logistic_grey_market_model.pkl")
print("\nModel saved as logistic_grey_market_model.pkl")

Confusion Matrix:
[[85  8]
 [ 0  7]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.91      0.96        93
           1       0.47      1.00      0.64         7

    accuracy                           0.92       100
   macro avg       0.73      0.96      0.80       100
weighted avg       0.96      0.92      0.93       100


ROC-AUC: 0.9969278033794163

Model saved as logistic_grey_market_model.pkl


In [4]:
import pandas as pd
import joblib
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

from xgboost import XGBClassifier

df = pd.read_csv("Final-snapdeal-dataset.csv")

X = df[
    [
        "Product_Title",
        "Selling_Price",
        "MRP",
        "Discount_Pct",
        "Review_Count"
    ]
]

y = df["Is_Grey_Market"]

pos_weight = (y == 0).sum() / (y == 1).sum()

preprocessor = ColumnTransformer(
    transformers=[
        (
            "text",
            TfidfVectorizer(
                max_features=4000,
                ngram_range=(1, 2),
                min_df=2,
                stop_words="english"
            ),
            "Product_Title"
        ),
        (
            "num",
            StandardScaler(),
            ["Selling_Price", "MRP", "Discount_Pct", "Review_Count"]
        )
    ]
)

xgb_model = XGBClassifier(
    n_estimators=400,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=pos_weight,
    eval_metric="logloss",
    random_state=42,
    n_jobs=-1
)

model = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("classifier", xgb_model)
    ]
)

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nROC-AUC:", roc_auc_score(y_test, y_prob))

joblib.dump(model, "grey_market_xgboost_model.pkl")
print("\nModel saved as grey_market_xgboost_model.pkl")


Confusion Matrix:
[[93  0]
 [ 0  7]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        93
           1       1.00      1.00      1.00         7

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100


ROC-AUC: 1.0

Model saved as grey_market_xgboost_model.pkl
