In [None]:
import pandas as pd
import numpy as np
from scipy.stats import zscore
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

train = pd.read_csv("Bank_Personal_Loan_Modelling_train.csv")
reserved = pd.read_csv("Bank_Personal_Loan_Modelling_reserved.csv")

train = train.drop(columns=["ID"])
reserved_ids = reserved["ID"]
reserved = reserved.drop(columns=["ID"])

corr = train.corr(numeric_only=True)
least_corr = corr.abs().sum().sort_values().index[0]

train = train.drop(columns=[least_corr])
reserved = reserved.drop(columns=[least_corr])

zs = zscore(train["Mortgage"])
mask = np.abs(zs) > 3
train_clean = train.loc[~mask]

exp_shift = train_clean["Experience"].min()
train_clean["Experience"] -= exp_shift
reserved["Experience"] -= exp_shift

train_clean["CCAvg"] *= 12
reserved["CCAvg"] *= 12

X = train_clean.drop(columns=["Personal Loan"])
y = train_clean["Personal Loan"]

param_grid = {
    "criterion": ["gini", "entropy"],
    "max_depth": list(range(6, 31, 2)),
    "min_samples_split": [2, 3, 4, 5, 6, 8, 10, 12, 15, 20],
    "min_samples_leaf": [1, 2, 3, 4, 5, 6, 8, 10, 12],
    "max_features": [None, "sqrt", "log2"],
    "class_weight": ["balanced", None],
}

gs = GridSearchCV(
    DecisionTreeClassifier(random_state=105),
    param_grid=param_grid,
    scoring="f1",
    cv=7,
    n_jobs=-1,
    verbose=1
)

gs.fit(X, y)

best_model = gs.best_estimator_

reserved["Personal Loan"] = best_model.predict(reserved)
preds = reserved["Personal Loan"].tolist()

print(preds)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_clean["Experience"] -= exp_shift
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_clean["CCAvg"] *= 12


Fitting 7 folds for each of 14040 candidates, totalling 98280 fits
[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,