### Bayesian optimization

In [None]:
%%capture
def objective(params):
    fit_params = {"early_stopping_rounds": 200, 
                  "eval_metric": "auc", 
                  "eval_set": [(df_val[lgb_cols], y_val)]}
    
    clf = lgb.LGBMClassifier(n_estimators=params[0], 
                             num_leaves=params[1], 
                             learning_rate=np.exp(-params[2]), 
                             reg_lambda=np.exp(-params[3]), 
                             subsample=0.5, 
                             colsample_bytree=0.75)
    
    return 1 - cross_val_score(estimator=clf, 
                               X=df, 
                               y=y, 
                               fit_params=fit_params, 
                               scoring="roc_auc").mean()

space = [[20000], 
        (77, 521), 
        (-9.0, 0), 
        (-9.0, 0)]

result = gp_minimize(objective, space, n_calls=200, random_state=0)

In [None]:
result.x

In [None]:
1 - result.fun

In [None]:
plot_convergence(result, yscale="log")

### LightGBM Random Search

In [None]:
%%capture
params = {"n_estimators": [20000], 
         "num_leaves": randint(77, 521), 
         "learning_rate": uniform(0, 0.01), 
         "reg_lambda": uniform(0, 1), 
         "subsample": uniform(0.5, 0.5), 
         "colsample_bytree": uniform(0.5, 0.5)}

fit_params={"early_stopping_rounds": 200, 
            "eval_metric": "auc", 
            "eval_set": [(df_val[lgb_cols], y_val)]}

cv = RandomizedSearchCV(estimator=lgb.LGBMClassifier(), 
                        param_distributions=params, 
                        fit_params=fit_params, 
                        scoring="roc_auc", 
                        cv=3, 
                        n_iter=100)

cv.fit(df_train[lgb_cols], y_train)

In [None]:
cv_results = pd.DataFrame(cv.cv_results_).sort_values(by="mean_test_score", ascending=False)
cv_results.head(20)

### Synthetic target

In [None]:
cols = ["AMT_CREDIT", 
        "AMT_GOODS_PRICE", 
        "HOUR_APPR_PROCESS_START", 
        "NAME_CONTRACT_TYPE", 
        "NAME_TYPE_SUITE", 
        "WEEKDAY_APPR_PROCESS_START"]

app_temp = pd.get_dummies(application[cols])

app_temp.head()

In [None]:
impute = Imputer(strategy="median")
scale = StandardScaler()

clf = LogisticRegressionCV(Cs=[np.exp(-i) for i in range(13)], 
                          penalty="l2", 
                          scoring="roc_auc")

y_app_temp = app_temp.pop("TARGET")

clf.fit(scale.fit_transform(impute.fit_transform(app_temp)), y_app_temp)

In [None]:
with open("/Users/danielsaxton/home_credit_default_risk/linear_model.pkl", "wb") as f:
    pickle.dump(clf, f)