In [1]:
import pandas as pd
from tree_based_models import model_selection_using_kfold, get_model, evaluate_model

## Load Data

In [2]:
train = pd.read_csv("data/train.csv")
X_val = pd.read_csv("data/X_val.csv")
y_val = pd.read_csv("data/y_val.csv")

## Define Features names

In [3]:
features = [
    col for col in train.columns if col not in ["ROW_ID", "TS", "ALLOCATION", "target"]
]

target_name = "target"
unique_id = "TS"
model_name = "xgb"

##  Model Evaluation using Cross Validation

In [4]:
model_selection_using_kfold(
    data=train,
    target=target_name,
    features=features,
    model_type=model_name,
    feat_engineering=None,
    unique_id=unique_id,
    plot_ft_importance=False,
    n_splits=5,
)

Fold 1 - Accuracy: 50.92%
Fold 2 - Accuracy: 52.36%
Fold 3 - Accuracy: 51.77%
Fold 4 - Accuracy: 51.54%
Fold 5 - Accuracy: 52.32%
Accuracy: 51.78% (Â± 0.53%) [Min: 50.92% ; Max: 52.36%]


## Train Model and test on X_val

In [5]:
model = get_model(model_name)
model.fit(train[features], train[target_name])

_ = evaluate_model(
    model=model,
    X=X_val[features],
    y=y_val[target_name],
    verbose=True,
    log=True,
)

Model evaluation: accuracy: 51.47 %


## Prediction

In [6]:
X_test = pd.read_csv("data/X_test.csv")

preds_sub = model.predict(X_test[features])
preds_sub = pd.DataFrame(preds_sub, index=X_test[unique_id], columns=[target_name])

# (preds_sub > 0).astype(int).to_csv("data/preds_test.csv")