<a href="https://www.kaggle.com/code/eliyahusanti/ps4e5-automl-fedot-catboost?scriptVersionId=179536092" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Open source AutoML Framework
## https://github.com/aimclub/FEDOT

In [None]:
!pip install fedot fedot[extra]
!pip uninstall xarray -y

In [None]:
import pandas as pd
from fedot.api.main import Fedot
from fedot.core.pipelines.pipeline_builder import PipelineBuilder

In [None]:
train = pd.read_csv('/kaggle/input/playground-series-s4e5/train.csv')
test = pd.read_csv('/kaggle/input/playground-series-s4e5/test.csv')

In [None]:
train.drop(columns=["id"], inplace=True)
test.drop(columns=["id"], inplace=True)

Create and add new features

In [None]:
initial_features = list(test.columns)
unique_vals = []
for df in [train, test]:
    for col in initial_features:
        unique_vals += list(df[col].unique())
unique_vals = list(set(unique_vals))

for df in [train, test]:
    df["fsum"] = df[initial_features].sum(axis=1)
    df["fstd"] = df[initial_features].std(axis=1)
    df["special1"] = df["fsum"].isin(np.arange(72, 76))
    df["fskew"] = df[initial_features].skew(axis=1)
    df["fkurtosis"] = df[initial_features].kurtosis(axis=1)

    for i in [0.0, 0.2, 0.4, 0.5, 0.6, 0.8, 1.0]:
        df["q_{}".format(int(i * 100))] = df[initial_features].quantile(i, axis=1)

    for v in unique_vals:
        df["cnt_{}".format(v)] = (df[initial_features] == v).sum(axis=1)

Configure basic params

In [None]:
auto_model = Fedot(
    problem="regression",
    metric=["r2"],
    preset="best_quality",
    with_tuning=True,
    timeout=5,
    cv_folds=10,
    seed=42,
    n_jobs=1,
    logging_level=10,
    initial_assumption=PipelineBuilder()
    .add_node(
        "catboostreg",
        params={"use_eval_set": True, "use_best_model": True, "iterations": 10000, "n_jobs": -1},
    )
    .build(),
    use_pipelines_cache=False,
    use_auto_preprocessing=False
)

In [None]:
auto_model.fit(features=train, target="FloodProbability")

In [None]:
prediction = auto_model.predict(features=test, save_predictions=True)

In [None]:
print(auto_model.return_report().head(10))

In [None]:
auto_model.current_pipeline.show()

In [None]:
test_data = pd.read_csv('/kaggle/input/playground-series-s4e5/test.csv')

In [None]:
predictions_ = pd.read_csv("/kaggle/working/predictions.csv")
sub = pd.DataFrame({"id": test_data["id"], "FloodProbability": predictions_["Prediction"]})
sub.to_csv("submission.csv", index=False)