In [20]:
#imports and installations
import pandas as pd
from sklearn.model_selection import train_test_split #for splitting the data
from sklearn.preprocessing import StandardScaler #for standardisation
from sklearn.linear_model import LogisticRegression #for baseline model
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report, confusion_matrix, roc_auc_score #for metrics
pip install xgboost lightgbm
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb

In [2]:
#Importing data
X = pd.read_csv("X.csv") 
y = pd.read_csv("y.csv")

In [3]:
#train_test split, Split X and y into train/test

#Splitting X and y into train and test sets(80/20), while preserving the class imbalance(ratio of returned, non-retuned) 
#with stratify as class is imbalanced

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [4]:
X.dtypes

delivery_delay                     float64
payment_value                      float64
price                              float64
customer_state_SP                     bool
product_category_bed_bath_table       bool
review_score                       float64
dtype: object

In [5]:
#we are converting boolean to int as its friendly for future pipeline building

X_train[['customer_state_SP','product_category_bed_bath_table']] = X_train[['customer_state_SP','product_category_bed_bath_table']].astype(int)
X_test[['customer_state_SP','product_category_bed_bath_table']] = X_test[['customer_state_SP','product_category_bed_bath_table']].astype(int)

In [6]:
#Standardising numerical columns
num_cols = ['delivery_delay', 'payment_value', 'price', 'review_score']

# Fit and transform on train, transform on test
scaler= StandardScaler()
X_train[num_cols]=scaler.fit_transform(X_train[num_cols])
X_test[num_cols]=scaler.transform(X_test[num_cols])

In [7]:
#Train Baseline Model

model= LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)
model.fit(X_train, y_train)
y_pred= model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]

  y = column_or_1d(y, warn=True)


In [13]:
#Evaluate the model

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, y_prob))

[[20450  2569]
 [   12    96]]
              precision    recall  f1-score   support

           0       1.00      0.89      0.94     23019
           1       0.04      0.89      0.07       108

    accuracy                           0.89     23127
   macro avg       0.52      0.89      0.50     23127
weighted avg       0.99      0.89      0.94     23127

ROC AUC Score: 0.9277923390178484


In [14]:
#Tune the classification threshold (Right now, predict() uses default threshold = 0.5
# But maybe predicting return only if probability > 0.9 improves precision.)
# Set threshold high (e.g., 0.85 to 0.95)
threshold = 0.9
y_pred_thresh = (y_prob > threshold).astype(int)

In [15]:
# Re-evaluate
print(confusion_matrix(y_test, y_pred_thresh))
print(classification_report(y_test, y_pred_thresh))

[[22047   972]
 [   61    47]]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98     23019
           1       0.05      0.44      0.08       108

    accuracy                           0.96     23127
   macro avg       0.52      0.70      0.53     23127
weighted avg       0.99      0.96      0.97     23127



In [17]:
#trying to find the best threshold to tune

threshold=[0,0.5,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,1.0]
for num in threshold:
    y_pred_thresh = (y_prob > num).astype(int)
    # Re-evaluate
    print('I am for this ', num )
    print(confusion_matrix(y_test, y_pred_thresh))
    print(classification_report(y_test, y_pred_thresh))

I am for this  0
[[    0 23019]
 [    0   108]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00     23019
           1       0.00      1.00      0.01       108

    accuracy                           0.00     23127
   macro avg       0.00      0.50      0.00     23127
weighted avg       0.00      0.00      0.00     23127

I am for this  0.5
[[20450  2569]
 [   12    96]]
              precision    recall  f1-score   support

           0       1.00      0.89      0.94     23019
           1       0.04      0.89      0.07       108

    accuracy                           0.89     23127
   macro avg       0.52      0.89      0.50     23127
weighted avg       0.99      0.89      0.94     23127

I am for this  0.1
[[13430  9589]
 [    2   106]]
              precision    recall  f1-score   support

           0       1.00      0.58      0.74     23019
           1       0.01      0.98      0.02       108

    accuracy                      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


              precision    recall  f1-score   support

           0       1.00      0.86      0.92     23019
           1       0.03      0.90      0.06       108

    accuracy                           0.86     23127
   macro avg       0.51      0.88      0.49     23127
weighted avg       0.99      0.86      0.92     23127

I am for this  0.45
[[20157  2862]
 [   11    97]]
              precision    recall  f1-score   support

           0       1.00      0.88      0.93     23019
           1       0.03      0.90      0.06       108

    accuracy                           0.88     23127
   macro avg       0.52      0.89      0.50     23127
weighted avg       0.99      0.88      0.93     23127

I am for this  0.5
[[20450  2569]
 [   12    96]]
              precision    recall  f1-score   support

           0       1.00      0.89      0.94     23019
           1       0.04      0.89      0.07       108

    accuracy                           0.89     23127
   macro avg       0.52    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [18]:
threshold = 0.65
y_pred_final = (y_prob > threshold).astype(int)

In [23]:
#Extracting final metrics

precision = precision_score(y_test, y_pred_final)
recall = recall_score(y_test, y_pred_final)
f1 = f1_score(y_test, y_pred_final)
roc_auc = roc_auc_score(y_test, y_prob)

print('precision :',precision,'\n','recall :',recall,'\n','f1_score :',f1,'\n','roc-auc:',roc_auc,'\n')

precision : 0.046875 
 recall : 0.8611111111111112 
 f1_score : 0.08891013384321224 
 roc-auc: 0.9277923390178484 



In [29]:
#Trying RF, XGBoost, LGB models

models = {
    "RandomForest": RandomForestClassifier(class_weight='balanced', random_state=42),
    "XGBoost": xgb.XGBClassifier(scale_pos_weight=100, use_label_encoder=False, eval_metric='logloss', random_state=42),
    "LightGBM": lgb.LGBMClassifier(scale_pos_weight=100, random_state=42)
}

#Added scale_pos_weight as classes are highly imbalanced

for name, model in models.items():
    print(f"\n Training {name}")
    model.fit(X_train, y_train)
    y_prob = model.predict_proba(X_test)[:, 1]

    threshold = 0.65  # Based on your earlier tuning
    y_pred = (y_prob > threshold).astype(int)

    print(f"\n Results for {name} at threshold = {threshold}")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))
    print("ROC AUC Score:", roc_auc_score(y_test, y_prob))


 Training RandomForest


  return fit_method(estimator, *args, **kwargs)



 Results for RandomForest at threshold = 0.65
[[23013     6]
 [   86    22]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     23019
           1       0.79      0.20      0.32       108

    accuracy                           1.00     23127
   macro avg       0.89      0.60      0.66     23127
weighted avg       1.00      1.00      0.99     23127

ROC AUC Score: 0.9342469505867135

 Training XGBoost


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



 Results for XGBoost at threshold = 0.65
[[22843   176]
 [   32    76]]
              precision    recall  f1-score   support

           0       1.00      0.99      1.00     23019
           1       0.30      0.70      0.42       108

    accuracy                           0.99     23127
   macro avg       0.65      0.85      0.71     23127
weighted avg       1.00      0.99      0.99     23127

ROC AUC Score: 0.9909507122135819

 Training LightGBM
[LightGBM] [Info] Number of positive: 430, number of negative: 92076
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000813 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 663
[LightGBM] [Info] Number of data points in the train set: 92506, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.004648 -> initscore=-5.366584
[LightGBM] [Info] Start training from score -5

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)



 Results for LightGBM at threshold = 0.65
[[19992  3027]
 [   23    85]]
              precision    recall  f1-score   support

           0       1.00      0.87      0.93     23019
           1       0.03      0.79      0.05       108

    accuracy                           0.87     23127
   macro avg       0.51      0.83      0.49     23127
weighted avg       0.99      0.87      0.93     23127

ROC AUC Score: 0.8273706664221023
