In [1]:
import pandas as pd
import numpy as np

In [2]:
def eval_gini(y_true, y_pred):
    n_samples = y_true.shape[0]
    L_mid = np.linspace(1 / n_samples, 1, n_samples)

    pred_order = y_true[y_pred.argsort()]
    L_pred = np.cumsum(pred_order) / np.sum(pred_order)
    G_pred = np.sum(L_mid - L_pred)

    true_order = y_true[y_true.argsort()]
    L_true = np.cumsum(true_order) / np.sum(true_order)
    G_true = np.sum(L_mid - L_true)

    eval_result = G_pred / G_true
    return eval_result

In [3]:
lgb_oof = pd.read_csv("../input/workbook-lgb/lgb_oof.csv")
dnn_oof = pd.read_csv("../input/workbook-dae/dnn_oof.csv")

target = pd.read_csv("../input/porto-seguro-safe-driver-prediction/train.csv", usecols=['id','target']) 

In [4]:
lgb_oof_ranks = (lgb_oof.target.rank() / len(lgb_oof))
dnn_oof_ranks = (dnn_oof.target.rank() / len(dnn_oof))

In [5]:
baseline = eval_gini(y_true=target.target, y_pred=lgb_oof_ranks)

print(f"starting from a oof lgb baseline {baseline:0.5f}\n")

best_alpha = 1.0

for alpha in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    ensemble = alpha * lgb_oof_ranks + (1.0 - alpha) * dnn_oof_ranks
    score = eval_gini(y_true=target.target, y_pred=ensemble)
    print(f"lgd={alpha:0.1f} dnn={(1.0 - alpha):0.1f} -> {score:0.5f}")
    
    if score > baseline:
        baseline = score
        best_alpha = alpha
        
print(f"\nBest alpha is {best_alpha:0.1f}")

starting from a oof lgb baseline 0.28850

lgd=0.1 dnn=0.9 -> 0.26632
lgd=0.2 dnn=0.8 -> 0.27188
lgd=0.3 dnn=0.7 -> 0.27682
lgd=0.4 dnn=0.6 -> 0.28102
lgd=0.5 dnn=0.5 -> 0.28440
lgd=0.6 dnn=0.4 -> 0.28692
lgd=0.7 dnn=0.3 -> 0.28857
lgd=0.8 dnn=0.2 -> 0.28938
lgd=0.9 dnn=0.1 -> 0.28935

Best alpha is 0.8


In [6]:
lgb_submission = pd.read_csv("../input/workbook-lgb/lgb_submission.csv")
dnn_submission = pd.read_csv("../input/workbook-dae/dnn_submission.csv")

submission = pd.read_csv("../input/porto-seguro-safe-driver-prediction/sample_submission.csv")

In [7]:
lgb_ranks = (lgb_submission.target.rank() / len(lgb_submission))
dnn_ranks = (dnn_submission.target.rank() / len(dnn_submission))

submission.target = lgb_ranks * 0.5 +  dnn_ranks * 0.5

submission.to_csv("equal_blend_rank.csv", index=False)

In [8]:
best_alpha= 0.8

lgb_ranks = (lgb_submission.target.rank() / len(lgb_submission))
dnn_ranks = (dnn_submission.target.rank() / len(dnn_submission))

submission.target = lgb_ranks * best_alpha +  dnn_ranks * (1.0 - best_alpha)

submission.to_csv("blend_rank.csv", index=False)