In [1]:
import os
import shutil
import pandas as pd
import mercury as mr
from supervised.automl import AutoML 

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
app = mr.App(title="Train AutoML 🧑‍💻", description="Train ML pipeline with MLJAR AutoML")

# Train Machine Learning Pipeline with MLJAR AutoML
Please follow the steps:
1. Upload CSV file with data. Data should heave column names in the first line.
2. Select input features and target column.
3. Select AutoML training mode, algorithms and training time limit.
4. Directory with all ML models will be zipped and available to download.

In [4]:
data_file = mr.File(label="Upload CSV with training data", max_file_size="1MB")

mercury.File

In [5]:
if data_file.filepath is None:
    mr.Stop()

In [None]:
df = pd.read_csv(data_file.filepath)

In [None]:
mr.Markdown("### Training data")

In [None]:
df

In [None]:
x_columns = mr.MultiSelect(label="Input features", value=list(df.columns)[:-1], 
                           choices=list(df.columns))

In [None]:
y_column = mr.Select(label="Target", value=list(df.columns)[-1], choices=list(df.columns))

In [None]:
if x_columns.value is None or len(x_columns.value) == 0 or y_column.value is None:
    print("Please select input features and target column")
    mr.Stop()

In [None]:
mode = mr.Select(label="AutoML Mode", value="Explain", choices=["Explain", "Perform", "Compete"])

In [None]:
algos = {
    "Explain": ["Baseline", "Linear", "Decision Tree", "Random Forest", "Xgboost", "Neural Network"],
    "Perform": ["Linear", "Random Forest", "LightGBM", "Xgboost", "CatBoost", "Neural Network"],
    "Compete": ["Decision Tree", "Random Forest", "Extra Trees", "LightGBM", 
                "Xgboost", "CatBoost", "Neural Network", "Nearest Neighbors"]
}

In [None]:
algorithms = mr.MultiSelect(label="Algorithms", value=algos[mode.value], choices=algos[mode.value])

In [None]:
time_limit = mr.Select(label="Time limit (seconds)", value="60", choices=["60", "120", "240", "300"])

In [None]:
start_training = mr.Button(label="Start training", style="success")

In [None]:
output_dir = mr.OutputDir()

In [None]:
automl = AutoML(mode=mode.value, algorithms=algorithms.value,
                total_time_limit=int(time_limit.value))

In [None]:
if start_training.clicked:
    mr.Markdown("### AutoML training logs")
    automl.fit(df[x_columns.value], df[y_column.value])
    
    output_filename = os.path.join(output_dir.path, automl._results_path)
    shutil.make_archive(output_filename, 'zip', automl._results_path)

In [None]:
if automl._best_model is None:
    mr.Stop()

In [None]:
automl.report()