In [1]:
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import make_scorer, f1_score,precision_score,recall_score
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import BorderlineSMOTE
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from agentds import BenchmarkClient
from xgboost import XGBClassifier
import pandas as pd
import numpy as np

In [2]:
# Import train and test data
train_data = pd.read_csv('interactions_train.csv')


# For SMOTE:
# smote = SMOTE(random_state=42, k_neighbors=5, sampling_strategy=0.3)
# smote = BorderlineSMOTE(kind='borderline-1', random_state=42, k_neighbors=5)
# train_data = train_data.fillna(-1)
features = ['PaymentRatio', 'TotalBalance', 'Limit',
             'Age', 'CreditScore', 'AnnualSalary', 'Utilisation', 'HardInquiries']
engineered_features = ['balance_to_limit',
                       'missed_payment_flag', 'total_accounts',
                       'credit_to_savings_ratio', 'balance_to_salary',
                       'limit_to_salary', 'payment_to_salary_ratio',
                        'SalaryBalance', 'SalaryCredit',
                      'CreditSquared', 'AgeCredit', 'AgeSalary', 'AgeBalance',
                       'PrevInquiries', 'LaggedUtilisation', 'LaggedPaymentRatio',
                       'RollingUtil', 'RollingPayRatio']

features = features + engineered_features

X_train_df = train_data[features]
y_train_df = train_data['DefaultLabel']
X, y = X_train_df.to_numpy(), y_train_df.to_numpy()
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, stratify = y, random_state=42)

X_train = np.nan_to_num(X_train, nan=-1)
X_val = np.nan_to_num(X_val, nan=-1)
smote = BorderlineSMOTE(kind='borderline-1', random_state=42, k_neighbors=5)

test_data = pd.read_csv('interactions_test.csv')
X_test_df = test_data[features]
X_test = X_test_df.to_numpy()

# Impute missing values in X_train, X_val, X_test
imputer = SimpleImputer(strategy='constant', fill_value=-1)

X_train = imputer.fit_transform(X_train)
X_val = imputer.transform(X_val)
X_test = imputer.transform(X_test)
# X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

In [3]:
neg_count = np.sum(y_train == 0)
pos_count = np.sum(y_train == 1)
scale_pos_weight = neg_count / pos_count
param_grid = {
    'x__max_depth': [3, 4, 5, 6],
    'x__min_child_weight': [1, 3, 5, 7],
    'x__subsample': [0.6, 0.7, 0.8, 0.9],
    'x__colsample_bytree': [0.6, 0.7, 0.8, 0.9],
    'x__learning_rate': [0.005, 0.01, 0.02, 0.03, 0.05],
    'x__max_delta_step': [0, 1, 2],
    'x__n_estimators': [500, 800, 1000, 1500],
    'x__gamma': [0, 0.5, 1.0, 2.0, 5.0],
    'x__reg_lambda': [1, 2, 3, 5, 7, 10],
    'x__reg_alpha': [0, 0.5, 1, 2],
    'x__scale_pos_weight': [scale_pos_weight, scale_pos_weight * 1.5, scale_pos_weight * 2.0]
}



xgb_model = XGBClassifier(
    n_estimators=20000,                # many trees + early stopping
    learning_rate=0.03,               # small eta ‚Üí stabler generalization
    max_depth=4,                      # shallow trees reduce overfit
    min_child_weight=6,               # be conservative with splits
    subsample=0.7,                    # add randomness
    colsample_bytree=0.8,
    colsample_bylevel=0.8,          # sample cols at each level
    colsample_bynode=0.8,
    gamma=2.0,                        # require a gain to split
    reg_lambda=3.0,                   # L2
    reg_alpha=1.0,                    # L1 (helps sparsity)
    max_delta_step=2,                 # stabilizes updates for rare positives
    #scale_pos_weight=ratio,           # critical for 1:300 imbalance
    tree_method="hist",
    random_state=42,
    n_jobs=1,
    eval_metric="aucpr",
    early_stopping_rounds=None # best for heavy imbalance
)



In [4]:
# test = pd.read_csv('Additional_test.csv')
# train = pd.read_csv('Additional_train.csv')
# X = train.drop(columns=['DefaultLabel','CustomerID','Week','Utilisation', 'HardInquiries', 'NumChecking', 'Tenure', 'payment_ratio_change', 'credit_utilization_change', 'utilization_trend',  'HomeCity', 'is_rapid_credit_growth', 'is_high_utilization'])
# Y = train['DefaultLabel']

In [5]:
bst = [('x',xgb_model)]
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
clf_stack = StackingClassifier(estimators = bst, final_estimator=LogisticRegression(solver='liblinear',max_iter=1000,class_weight='balanced', C=5,penalty='l2',fit_intercept=True), n_jobs=-1,passthrough=True,cv=cv)
scorer = make_scorer(f1_score, average='macro')
grid_search = RandomizedSearchCV(
    estimator=clf_stack,
    param_distributions=param_grid,
    n_iter=5,              # number of random draws (increase for more exploration)
    scoring=scorer,         # e.g., 'f1', 'recall', or custom scorer
    cv=cv,
    n_jobs=1,
    verbose=3,
    random_state=42
)
grid_search.fit(X_train, y_train)
stack_best = grid_search.best_estimator_

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV 1/5] END x__colsample_bytree=0.7, x__gamma=0.5, x__learning_rate=0.005, x__max_delta_step=1, x__max_depth=6, x__min_child_weight=3, x__n_estimators=800, x__reg_alpha=1, x__reg_lambda=2, x__scale_pos_weight=19.651162790697676, x__subsample=0.9;, score=0.542 total time=  19.7s
[CV 2/5] END x__colsample_bytree=0.7, x__gamma=0.5, x__learning_rate=0.005, x__max_delta_step=1, x__max_depth=6, x__min_child_weight=3, x__n_estimators=800, x__reg_alpha=1, x__reg_lambda=2, x__scale_pos_weight=19.651162790697676, x__subsample=0.9;, score=0.562 total time=  14.6s
[CV 3/5] END x__colsample_bytree=0.7, x__gamma=0.5, x__learning_rate=0.005, x__max_delta_step=1, x__max_depth=6, x__min_child_weight=3, x__n_estimators=800, x__reg_alpha=1, x__reg_lambda=2, x__scale_pos_weight=19.651162790697676, x__subsample=0.9;, score=0.556 total time=  13.5s
[CV 4/5] END x__colsample_bytree=0.7, x__gamma=0.5, x__learning_rate=0.005, x__max_delta_step=1, x__

In [6]:
y_hat_train = stack_best.predict(X_train)
y_hat_val = stack_best.predict(X_val)

F1_train = f1_score(y_train, y_hat_train, average='macro')
precision_train = precision_score(y_train, y_hat_train)
recall_train = recall_score(y_train, y_hat_train)

F1_val = f1_score(y_val, y_hat_val, average='macro')
precision_val = precision_score(y_val, y_hat_val)
recall_val = recall_score(y_val, y_hat_val)

print('----------------------------------------')
print(f'Best clf_stack F1-Score on Training Set: {F1_train}')
print(f'Best clf_stack Precision on Training Set: {precision_train}')
print(f'Best clf_stack Recall on Training Set: {recall_train}')
print('----------------------------------------')
print(f'Best clf_stack F1-Score on Validation Set: {F1_val}')
print(f'Best clf_stack Precision on Validation Set: {precision_val}')
print(f'Best clf_stack Recall on Validation Set: {recall_val}')

----------------------------------------
Best clf_stack F1-Score on Training Set: 0.5494752455842598
Best clf_stack Precision on Training Set: 0.187206020696143
Best clf_stack Recall on Training Set: 0.6941860465116279
----------------------------------------
Best clf_stack F1-Score on Validation Set: 0.5591424978245231
Best clf_stack Precision on Validation Set: 0.1945080091533181
Best clf_stack Recall on Validation Set: 0.6929347826086957


In [7]:
# Create submission file (format: TxnID,FraudLabel)
predictions = stack_best.predict(X_test)

submission_df = pd.DataFrame({
    'CustomerID':test_data['CustomerID'],
    'Week': test_data['Week'],                   
    'DefaultLabel': predictions
})

# Save predictions
submission_df.to_csv("RB_C2_stack.csv", index=False)
print(len(X))
print(f"‚úÖ Predictions saved: {submission_df.shape[0]} predictions")
print(f"   Preview: {submission_df.head(3)}")
print(f"   Default rate: {predictions.mean():.3f} ({predictions.sum()} default cases out of {len(predictions)})")

13301
‚úÖ Predictions saved: 13290 predictions
   Preview:   CustomerID  Week  DefaultLabel
0    C000001     1             0
1    C000001     2             0
2    C000001     3             0
   Default rate: 0.317 (4216 default cases out of 13290)


In [8]:
# # 3. Submit Predictions
#  Predictions saved: 13290 predictions
#    Preview:   CustomerID  Week  DefaultLabel
# 0    C000001     1             0
# 1    C000001     2             0
# 2    C000001     3             0
#    Default rate: 0.315 (4189 default cases out of 13290)
# # Submit predictions to the competition
# client = BenchmarkClient(
#     api_key="adsb_hdm1DRk1iW2I1VA84Oc9jz6z_1756090832",
#     team_name="agi"
# )

# print("üöÄ Submitting predictions...")

# try:
#     result = client.submit_prediction("Retailbanking", 2, "RB_C2_stack.csv")
    
#     if result['success']:
#         print("‚úÖ Submission successful!")
#         print(f"   üìä Score: {result['score']:.4f}")
#         print(f"   üìè Metric: {result['metric_name']}")
#         print(f"   ‚úîÔ∏è  Validation: {'Passed' if result['validation_passed'] else 'Failed'}")
#     else:
#         print("‚ùå Submission failed!")
#         print(f"   Error details: {result.get('details', {}).get('validation_errors', 'Unknown error')}")
        
# except Exception as e:
#     print(f"üí• Submission error: {e}")
#     print("üîß Check your API key and team name are correct!")

# print("\nüéØ Next steps:")
# print("   1. Try incorporating relevant information outside this table!")
# print("   2. You've completed all Retail Banking challenges!")

In [9]:
# #0.550 for macro f1
# #0.4XX for pred
# #üöÄ Submitting predictions...
# ‚úÖ Prediction submitted successfully!
# üìä Score: 0.5823 (Macro-F1)
# ‚úÖ Validation passed
# ‚úÖ Submission successful!
#    üìä Score: 0.5823
#    üìè Metric: Macro-F1
#    ‚úîÔ∏è  Validation: Passed

# üéØ Next steps:
#    1. Try incorporating relevant information outside this table!
#    2. You've completed all Retail Banking challenges!
#    ‚úÖ Predictions saved: 13290 predictions
#    Preview:   CustomerID  Week  DefaultLabel
# 0    C000001     1             0
# 1    C000001     2             0
# 2    C000001     3             0
#    Default rate: 0.086 (1148 default cases out of 13290)