In [1]:
import sys, os

src = r"C:\Users\user\Desktop\Coding mo\AutoML\src" # copy the location of the folder that has paths.py file using (ctrl + shift + C ) and paste it here    
sys.path.append(src)

from paths import *

get_paths()

sys.path.append(SRC_PATH)

from pipeline import *


In [2]:
# ---------------- Task 1+9 ----------------

from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task
import torch

def loop_dfs_and_evaluate_lightautoml():

    for dataset_name in data_dict_classification_only.keys():
        dataset_summary = []
        runtime_memory_rows = []
        dataset_base = os.path.splitext(dataset_name)[0]

        for seed in RANDOM_SEEDS:
            np.random.seed(seed)
            random.seed(seed)
            try:
                torch.manual_seed(seed)
                torch.cuda.empty_cache()
            except:
                pass

            df_train = splits_dict[dataset_name]['train']
            df_test = splits_dict[dataset_name]['test']
            task_type = tasks_dict_classification_only[dataset_name]

            start_time = time.time()
            task = Task(task_type)

            automl = TabularAutoML(
                task=task,
                timeout=TIME_BUDGET,
                cpu_limit=MEMORY_LIMIT,
                reader_params={'n_jobs': MEMORY_LIMIT},
                general_params={"nested_cv": False}
            )

            _ = automl.fit_predict(df_train, roles={'target': TARGET_COLS[dataset_name]})
            training_runtime_sec = round(time.time() - start_time, 3)

            infer_start = time.time()
            results = evaluate_and_save_results_general(df_train, df_test, automl, dataset_name, framework='lightautoml')
            inference_time_per_sample = round((time.time() - infer_start) / len(df_test), 6)
            cpu_usage_percent = psutil.cpu_percent(interval=None)

            temp_model_file = os.path.join("temp_lama_models", f"{dataset_base}_{seed}.pkl")
            os.makedirs("temp_lama_models", exist_ok=True)
            with open(temp_model_file, "wb") as f:
                pickle.dump(automl, f)

            total_bytes = os.path.getsize(temp_model_file)
            model_size_mb = round(total_bytes / (1024 * 1024), 3)
            os.remove(temp_model_file)

            memory_usage_mb = get_memory_usage_mb()

            runtime_memory_rows.append({
                "dataset": dataset_name,
                "seed": seed,
                "training_runtime_sec": training_runtime_sec,
                "model_size_MB": model_size_mb,
                "memory_usage_MB": memory_usage_mb,
                "inference_time_per_sample_sec": inference_time_per_sample,
                "cpu_usage_percent": cpu_usage_percent
            })

            dataset_summary.append({
                'dataset': dataset_name,
                'seed': seed,
                **results,
                'runtime': training_runtime_sec
            })

            del automl
            gc.collect()

        output_path = os.path.join(AUTOML_METRICS_PATH, f"lightautoml_{dataset_base}_summary.csv")
        pd.DataFrame(dataset_summary).to_csv(output_path, index=False)

        task1_output_path = os.path.join(RUNTIME_MEMORY_TASK1_PATH, f"lightautoml_{dataset_base}_task1_runtime_memory_metrics.csv")
        pd.DataFrame(runtime_memory_rows).to_csv(task1_output_path, index=False)

        json_path = os.path.join(RUNTIME_MEMORY_TASK1_PATH, f"lightautoml_{dataset_base}_runtime_memory_metrics_in_json.json")
        with open(json_path, 'w') as f:
            json.dump(runtime_memory_rows, f, indent=2)

loop_dfs_and_evaluate_lightautoml()






In [3]:
for dataset_name in data_dict_classification_only.keys():
    dataset_base = dataset_name.replace('.csv', '')
    
    csv_file1 = os.path.join(AUTOML_METRICS_PATH, f"lightautoml_{dataset_base}_summary.csv")
    csv_file2 = os.path.join(RUNTIME_MEMORY_TASK1_PATH, f"lightautoml_{dataset_base}_task1_runtime_memory_metrics.csv")

    print(f"CSV for {dataset_base}: lightautoml_{dataset_base}_summary.csv")
    df1 = pd.read_csv(csv_file1)
    display(df1)

    print(f"CSV for {dataset_base}: lightautoml_{dataset_base}_task1_runtime_memory_metrics.csv")
    df2 = pd.read_csv(csv_file2)
    display(df2)


CSV for modeldata: lightautoml_modeldata_summary.csv


Unnamed: 0,dataset,seed,accuracy,f1,precision,recall,runtime
0,modeldata.csv,42,0.932969,0.933072,0.933222,0.932969,36.683
1,modeldata.csv,123,0.93337,0.933473,0.933624,0.93337,35.086


CSV for modeldata: lightautoml_modeldata_task1_runtime_memory_metrics.csv


Unnamed: 0,dataset,seed,training_runtime_sec,model_size_MB,memory_usage_MB,inference_time_per_sample_sec,cpu_usage_percent
0,modeldata.csv,42,36.683,19.187,5022.938,1.4e-05,74.3
1,modeldata.csv,123,35.086,20.399,5061.918,1.4e-05,94.4


CSV for titanic: lightautoml_titanic_summary.csv


Unnamed: 0,dataset,seed,accuracy,f1,precision,recall,runtime
0,titanic.csv,42,0.798883,0.789733,0.813974,0.798883,20.09
1,titanic.csv,123,0.798883,0.788531,0.818125,0.798883,23.733


CSV for titanic: lightautoml_titanic_task1_runtime_memory_metrics.csv


Unnamed: 0,dataset,seed,training_runtime_sec,model_size_MB,memory_usage_MB,inference_time_per_sample_sec,cpu_usage_percent
0,titanic.csv,42,20.09,1.848,5037.766,0.000278,75.3
1,titanic.csv,123,23.733,1.841,5037.703,0.000279,88.2


CSV for train: lightautoml_train_summary.csv


Unnamed: 0,dataset,seed,accuracy,f1,precision,recall,runtime
0,train.csv,42,1.0,1.0,1.0,1.0,23.251
1,train.csv,123,1.0,1.0,1.0,1.0,22.625


CSV for train: lightautoml_train_task1_runtime_memory_metrics.csv


Unnamed: 0,dataset,seed,training_runtime_sec,model_size_MB,memory_usage_MB,inference_time_per_sample_sec,cpu_usage_percent
0,train.csv,42,23.251,6.894,5037.562,0.00083,95.4
1,train.csv,123,22.625,4.56,5035.547,0.000446,95.7


CSV for wine: lightautoml_wine_summary.csv


Unnamed: 0,dataset,seed,accuracy,f1,precision,recall,runtime
0,wine.csv,42,1.0,1.0,1.0,1.0,22.982
1,wine.csv,123,1.0,1.0,1.0,1.0,20.695


CSV for wine: lightautoml_wine_task1_runtime_memory_metrics.csv


Unnamed: 0,dataset,seed,training_runtime_sec,model_size_MB,memory_usage_MB,inference_time_per_sample_sec,cpu_usage_percent
0,wine.csv,42,22.982,0.014,5035.594,0.000307,97.2
1,wine.csv,123,20.695,5.323,5035.734,0.000891,97.3


In [4]:
# ---------------- Task 3+9 ----------------

from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task
import torch

def evaluate_cv_folds_lightautoml(folds_dict):
    for dataset_name, folds_data in folds_dict.items():
        task_type = tasks_dict_classification_only[dataset_name]
        target_col = TARGET_COLS[dataset_name]

        dataset_summary = []
        runtime_memory_rows = []
        dataset_base = dataset_name.replace(".csv", "")

        for seed in RANDOM_SEEDS:
            np.random.seed(seed)
            random.seed(seed)
            try:
                torch.manual_seed(seed)
                torch.cuda.empty_cache()
            except:
                pass

            fold_num = 1
            for fold_name, fold_splits in folds_data.items():
                df_train = fold_splits['train']
                df_val = fold_splits['val']

                start_time = time.time()
                task = Task(task_type)

                automl = TabularAutoML(
                    task=task,
                    timeout=CV_TIME_BUDGET,
                    cpu_limit=CV_MEMORY_LIMIT,
                    reader_params={'n_jobs': CV_MEMORY_LIMIT},
                    general_params={"nested_cv": False}
                )

                _ = automl.fit_predict(df_train, roles={'target': target_col})
                training_runtime_sec = round(time.time() - start_time, 3)

                infer_start = time.time()
                results = evaluate_and_save_results_general(df_train, df_val, automl, dataset_name, framework='lightautoml')
                inference_time_per_sample = round((time.time() - infer_start) / len(df_val), 6)
                cpu_usage_percent = psutil.cpu_percent(interval=None)

                temp_dir = tempfile.mkdtemp()
                temp_model_file = os.path.join(temp_dir, f"{dataset_base}_{seed}_fold{fold_num}.pkl")
                with open(temp_model_file, "wb") as f:
                    pickle.dump(automl, f)
                total_bytes = os.path.getsize(temp_model_file)
                model_size_mb = round(total_bytes / (1024 * 1024), 3)
                shutil.rmtree(temp_dir, ignore_errors=True)

                memory_usage_mb = get_memory_usage_mb()

                runtime_memory_rows.append({
                    "dataset": dataset_name,
                    "seed": seed,
                    "fold": fold_num,
                    "training_runtime_sec": training_runtime_sec,
                    "model_size_MB": model_size_mb,
                    "memory_usage_MB": memory_usage_mb,
                    "inference_time_per_sample_sec": inference_time_per_sample,
                    "cpu_usage_percent": cpu_usage_percent
                })

                dataset_summary.append({
                    'dataset': dataset_name,
                    'seed': seed,
                    'fold': fold_num,
                    **results,
                    'runtime': training_runtime_sec
                })

                fold_num += 1
                del automl
                gc.collect()

        output_path = os.path.join(CV_FOLDS_METRICS_PATH, f"lightautoml_{dataset_base}_cv_summary.csv")
        pd.DataFrame(dataset_summary).to_csv(output_path, index=False)

        task3_output_path = os.path.join(RUNTIME_MEMORY_TASK3_PATH, f"lightautoml_{dataset_base}_task3_runtime_memory_metrics.csv")
        pd.DataFrame(runtime_memory_rows).to_csv(task3_output_path, index=False)

        json_path = os.path.join(RUNTIME_MEMORY_TASK3_PATH, f"lightautoml_{dataset_base}_runtime_memory_metrics_cv_in_json.json")
        with open(json_path, 'w') as f:
            json.dump(runtime_memory_rows, f, indent=2)

evaluate_cv_folds_lightautoml(folds_dict)







In [5]:
for dataset_name in data_dict_classification_only.keys():
    dataset_base = dataset_name.replace('.csv', '')
    csv_file1 = os.path.join(CV_FOLDS_METRICS_PATH, f"lightautoml_{dataset_base}_cv_summary.csv")
    csv_file2 = os.path.join(RUNTIME_MEMORY_TASK3_PATH, f"lightautoml_{dataset_base}_task3_runtime_memory_metrics.csv")

    print(f"CSV for {dataset_base}: lightautoml_{dataset_base}_cv_summary.csv")
    df1 = pd.read_csv(csv_file1)
    display(df1)

    print(f"CSV for {dataset_base}: lightautoml_{dataset_base}_task3_runtime_memory_metrics.csv")
    df2 = pd.read_csv(csv_file2)
    display(df2)



CSV for modeldata: lightautoml_modeldata_cv_summary.csv


Unnamed: 0,dataset,seed,fold,accuracy,f1,precision,recall,runtime
0,modeldata.csv,42,1,0.905546,0.904789,0.905094,0.905546,19.468
1,modeldata.csv,42,2,0.93337,0.933435,0.933518,0.93337,27.906
2,modeldata.csv,42,3,0.907366,0.906588,0.906972,0.907366,18.073
3,modeldata.csv,42,4,0.930748,0.930843,0.930977,0.930748,26.628
4,modeldata.csv,42,5,0.909371,0.908551,0.909068,0.909371,17.69
5,modeldata.csv,123,1,0.93226,0.932301,0.932349,0.93226,17.174
6,modeldata.csv,123,2,0.93337,0.933435,0.933518,0.93337,17.729
7,modeldata.csv,123,3,0.933833,0.93391,0.934015,0.933833,18.172
8,modeldata.csv,123,4,0.930748,0.930843,0.930977,0.930748,15.582
9,modeldata.csv,123,5,0.93664,0.936652,0.936666,0.93664,18.13


CSV for modeldata: lightautoml_modeldata_task3_runtime_memory_metrics.csv


Unnamed: 0,dataset,seed,fold,training_runtime_sec,model_size_MB,memory_usage_MB,inference_time_per_sample_sec,cpu_usage_percent
0,modeldata.csv,42,1,19.468,0.042,5022.848,8e-06,92.7
1,modeldata.csv,42,2,27.906,17.767,5064.266,1.1e-05,90.0
2,modeldata.csv,42,3,18.073,0.042,5021.172,7e-06,95.0
3,modeldata.csv,42,4,26.628,13.729,5058.363,1.1e-05,88.6
4,modeldata.csv,42,5,17.69,0.042,5007.531,6e-06,89.0
5,modeldata.csv,123,1,17.174,16.003,5061.445,1e-05,87.4
6,modeldata.csv,123,2,17.729,17.767,5011.234,1.3e-05,88.3
7,modeldata.csv,123,3,18.172,19.325,5066.762,1.5e-05,89.0
8,modeldata.csv,123,4,15.582,13.729,5058.199,1.1e-05,89.3
9,modeldata.csv,123,5,18.13,18.679,5073.531,1.4e-05,89.8


CSV for titanic: lightautoml_titanic_cv_summary.csv


Unnamed: 0,dataset,seed,fold,accuracy,f1,precision,recall,runtime
0,titanic.csv,42,1,0.765363,0.765363,0.765363,0.765363,10.432
1,titanic.csv,42,2,0.724719,0.67227,0.809554,0.724719,10.217
2,titanic.csv,42,3,0.820225,0.816175,0.820343,0.820225,10.213
3,titanic.csv,42,4,0.803371,0.803639,0.80396,0.803371,10.523
4,titanic.csv,42,5,0.61236,0.465137,0.374984,0.61236,10.069
5,titanic.csv,123,1,0.776536,0.775196,0.774661,0.776536,10.916
6,titanic.csv,123,2,0.797753,0.796494,0.796077,0.797753,10.16
7,titanic.csv,123,3,0.820225,0.816175,0.820343,0.820225,10.088
8,titanic.csv,123,4,0.803371,0.803639,0.80396,0.803371,10.55
9,titanic.csv,123,5,0.61236,0.465137,0.374984,0.61236,10.344


CSV for titanic: lightautoml_titanic_task3_runtime_memory_metrics.csv


Unnamed: 0,dataset,seed,fold,training_runtime_sec,model_size_MB,memory_usage_MB,inference_time_per_sample_sec,cpu_usage_percent
0,titanic.csv,42,1,10.432,0.036,5069.586,0.000184,68.8
1,titanic.csv,42,2,10.217,0.095,5069.68,0.000225,66.4
2,titanic.csv,42,3,10.213,0.032,5069.688,0.000191,71.7
3,titanic.csv,42,4,10.523,0.899,5062.785,0.000202,70.3
4,titanic.csv,42,5,10.069,0.432,5062.809,0.00027,65.6
5,titanic.csv,123,1,10.916,2.847,5062.832,0.000218,75.9
6,titanic.csv,123,2,10.16,0.031,5062.891,0.000191,68.4
7,titanic.csv,123,3,10.088,0.032,5062.895,0.00018,66.9
8,titanic.csv,123,4,10.55,0.899,5062.809,0.000208,71.3
9,titanic.csv,123,5,10.344,0.432,5062.82,0.000253,67.4


CSV for train: lightautoml_train_cv_summary.csv


Unnamed: 0,dataset,seed,fold,accuracy,f1,precision,recall,runtime
0,train.csv,42,1,0.966667,0.966583,0.969697,0.966667,7.74
1,train.csv,42,2,1.0,1.0,1.0,1.0,4.649
2,train.csv,42,3,0.933333,0.93266,0.944444,0.933333,6.115
3,train.csv,42,4,1.0,1.0,1.0,1.0,4.658
4,train.csv,42,5,0.966667,0.966583,0.969697,0.966667,6.198
5,train.csv,123,1,0.966667,0.966583,0.969697,0.966667,4.91
6,train.csv,123,2,1.0,1.0,1.0,1.0,4.604
7,train.csv,123,3,0.933333,0.93266,0.944444,0.933333,5.995
8,train.csv,123,4,1.0,1.0,1.0,1.0,4.705
9,train.csv,123,5,0.966667,0.966583,0.969697,0.966667,5.659


CSV for train: lightautoml_train_task3_runtime_memory_metrics.csv


Unnamed: 0,dataset,seed,fold,training_runtime_sec,model_size_MB,memory_usage_MB,inference_time_per_sample_sec,cpu_usage_percent
0,train.csv,42,1,7.74,2.356,5063.934,0.000533,75.4
1,train.csv,42,2,4.649,4.365,5065.121,0.000703,90.1
2,train.csv,42,3,6.115,7.566,5065.156,0.000693,96.7
3,train.csv,42,4,4.658,4.841,5062.781,0.000482,92.7
4,train.csv,42,5,6.198,8.847,5066.941,0.000639,94.5
5,train.csv,123,1,4.91,0.014,5062.602,0.000333,92.1
6,train.csv,123,2,4.604,4.497,5064.852,0.000801,94.1
7,train.csv,123,3,5.995,7.566,5064.867,0.000666,96.1
8,train.csv,123,4,4.705,4.841,5062.66,0.00044,90.4
9,train.csv,123,5,5.659,8.847,5066.918,0.000603,93.0


CSV for wine: lightautoml_wine_cv_summary.csv


Unnamed: 0,dataset,seed,fold,accuracy,f1,precision,recall,runtime
0,wine.csv,42,1,0.972222,0.972369,0.974747,0.972222,8.653
1,wine.csv,42,2,1.0,1.0,1.0,1.0,4.669
2,wine.csv,42,3,0.972222,0.97197,0.974074,0.972222,4.378
3,wine.csv,42,4,0.971429,0.971471,0.973626,0.971429,4.965
4,wine.csv,42,5,1.0,1.0,1.0,1.0,6.027
5,wine.csv,123,1,0.972222,0.972369,0.974747,0.972222,4.888
6,wine.csv,123,2,1.0,1.0,1.0,1.0,5.483
7,wine.csv,123,3,0.972222,0.97197,0.974074,0.972222,5.472
8,wine.csv,123,4,0.971429,0.971471,0.973626,0.971429,5.992
9,wine.csv,123,5,1.0,1.0,1.0,1.0,6.023


CSV for wine: lightautoml_wine_task3_runtime_memory_metrics.csv


Unnamed: 0,dataset,seed,fold,training_runtime_sec,model_size_MB,memory_usage_MB,inference_time_per_sample_sec,cpu_usage_percent
0,wine.csv,42,1,8.653,0.014,5062.621,0.000335,83.3
1,wine.csv,42,2,4.669,6.089,5072.453,0.000409,95.4
2,wine.csv,42,3,4.378,0.016,5070.703,0.000279,94.3
3,wine.csv,42,4,4.965,3.951,5071.16,0.000898,93.4
4,wine.csv,42,5,6.027,4.555,5071.391,0.000872,93.4
5,wine.csv,123,1,4.888,6.839,5073.516,0.001031,93.7
6,wine.csv,123,2,5.483,6.096,5086.082,0.000379,94.1
7,wine.csv,123,3,5.472,0.016,5081.105,0.000257,93.6
8,wine.csv,123,4,5.992,3.952,5081.328,0.000626,92.9
9,wine.csv,123,5,6.023,4.555,5081.418,0.00087,93.0


In [2]:
# ---------------- Task 6 ----------------

from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

def evaluate_pipeline_folds_times_lightautoml(leakage_free_final_output):
    for dataset_name, folds in leakage_free_final_output.items():
        dataset_base = dataset_name.replace('.csv', '')

        dataset_summary = []
        time_log_rows = []

        target_col = TARGET_COLS[dataset_name]

        for seed in RANDOM_SEEDS:
            np.random.seed(seed)
            random.seed(seed)

            for fold_name, split in folds.items():
                df_train = split['train'].copy()
                df_val = split['val'].copy()
                pipe_times = split['pipeline_time']
                pipe_times_copy = pipe_times.copy()
                pipe_times_copy["total_time"] = sum(pipe_times_copy.values())

                X_train = df_train.drop(columns=[target_col])
                y_train = df_train[target_col]
                X_val = df_val.drop(columns=[target_col])
                y_val = df_val[target_col]

                temp_dir = tempfile.mkdtemp()
                start = time.time()

                df_train_proc = X_train.copy()
                df_train_proc[target_col] = y_train.values

                automl = TabularAutoML(
                    task=Task(tasks_dict_classification_only[dataset_name]),
                    timeout=CV_TIME_BUDGET,
                    reader_params={"random_state": seed}
                )

                automl.fit_predict(df_train_proc, roles={"target": target_col})

                preds = automl.predict(X_val).data
                if preds.shape[1] == 1:
                    preds = preds[:, 0] > 0.5
                else:
                    preds = preds.argmax(axis=1)

                runtime = round(time.time() - start, 3)
                fold_idx = int(fold_name.split("_")[1])
                best_model_raw = automl.blender.__class__.__name__
                best_model = f"lightautoml - {best_model_raw}"

                dataset_summary.append({
                    "dataset": dataset_name,
                    "seed": seed,
                    "fold": fold_idx,
                    "model_name": best_model,
                    "accuracy": accuracy_score(y_val, preds),
                    "f1": f1_score(y_val, preds, average='weighted'),
                    "precision": precision_score(y_val, preds, average='weighted'),
                    "recall": recall_score(y_val, preds, average='weighted'),
                    "runtime": runtime
                })

                row = {"dataset": dataset_name, "fold": fold_idx, "model_name": best_model}
                row.update(pipe_times_copy)
                time_log_rows.append(row)

                shutil.rmtree(temp_dir, ignore_errors=True)

        df_summary = pd.DataFrame(dataset_summary).drop_duplicates()
        df_times = pd.DataFrame(time_log_rows).drop_duplicates()

        df_summary = df_summary.sort_values(["dataset", "seed", "fold"])
        df_times = df_times.sort_values(["dataset", "fold"])

        summary_path = os.path.join(
            PIPELINE_DATAFRAMES_METRICS_PATH,
            f"lightautoml_{dataset_base}_pipeline_summary.csv"
        )
        df_summary.to_csv(summary_path, index=False)

        time_log_path = os.path.join(
            PIPELINE_LOG_TIMES_PATH,
            f"lightautoml_{dataset_base}_pipeline_time_log.csv"
        )
        df_times.to_csv(time_log_path, index=False)

evaluate_pipeline_folds_times_lightautoml(run_pipeline_on_folds_with_control(pipeline_data_dict))


In [3]:
for dataset_name in data_dict_classification_only.keys():
    dataset_base = dataset_name.replace('.csv', '')
    
    csv_file_1 = os.path.join(PIPELINE_DATAFRAMES_METRICS_PATH, f"lightautoml_{dataset_base}_pipeline_summary.csv")
    csv_file_2 = os.path.join(PIPELINE_LOG_TIMES_PATH, f"lightautoml_{dataset_base}_pipeline_time_log.csv")
    
    df1 = pd.read_csv(csv_file_1)
    df2 = pd.read_csv(csv_file_2)
    
    print(f"CSV for {dataset_base}: lightautoml_{dataset_base}_pipeline_summary.csv")
    display(df1) 
    print(f"CSV for {dataset_base}: lightautoml_{dataset_base}_pipeline_time_log.csv")
    display(df2) 



CSV for modeldata: lightautoml_modeldata_pipeline_summary.csv


Unnamed: 0,dataset,seed,fold,model_name,accuracy,f1,precision,recall,runtime
0,modeldata.csv,42,1,lightautoml - WeightedBlender,0.907681,0.906657,0.907572,0.907681,10.684
1,modeldata.csv,42,2,lightautoml - WeightedBlender,0.926276,0.926443,0.926718,0.926276,10.866
2,modeldata.csv,123,1,lightautoml - WeightedBlender,0.920415,0.920258,0.920175,0.920415,9.829
3,modeldata.csv,123,2,lightautoml - WeightedBlender,0.926399,0.926499,0.926638,0.926399,17.882


CSV for modeldata: lightautoml_modeldata_pipeline_time_log.csv


Unnamed: 0,dataset,fold,model_name,preprocessing,nan_guard_before_vif,vif,binner,nan_guard_before_poly,selector,total_time
0,modeldata.csv,1,lightautoml - WeightedBlender,0.517331,0.210151,0.017747,0.063528,0.055334,0.026718,0.890809
1,modeldata.csv,2,lightautoml - WeightedBlender,0.845989,0.314093,0.020792,0.064586,0.076902,0.026215,1.348576


CSV for titanic: lightautoml_titanic_pipeline_summary.csv


Unnamed: 0,dataset,seed,fold,model_name,accuracy,f1,precision,recall,runtime
0,titanic.csv,42,1,lightautoml - WeightedBlender,0.616592,0.470354,0.380186,0.616592,7.659
1,titanic.csv,42,2,lightautoml - WeightedBlender,0.813483,0.807272,0.816734,0.813483,6.228
2,titanic.csv,123,1,lightautoml - WeightedBlender,0.616592,0.470354,0.380186,0.616592,7.141
3,titanic.csv,123,2,lightautoml - WeightedBlender,0.61573,0.469291,0.379124,0.61573,7.152


CSV for titanic: lightautoml_titanic_pipeline_time_log.csv


Unnamed: 0,dataset,fold,model_name,preprocessing,nan_guard_before_vif,vif,binner,nan_guard_before_poly,poly,selector,total_time
0,titanic.csv,1,lightautoml - WeightedBlender,0.011136,0.003029,0.001004,0.002495,0.001003,0.013323,0.004001,0.03599
1,titanic.csv,2,lightautoml - WeightedBlender,0.010129,0.004383,0.002007,0.002024,0.001002,0.011076,0.004001,0.034622


CSV for train: lightautoml_train_pipeline_summary.csv


Unnamed: 0,dataset,seed,fold,model_name,accuracy,f1,precision,recall,runtime
0,train.csv,42,1,lightautoml - WeightedBlender,0.946667,0.946581,0.94847,0.946667,5.45
1,train.csv,42,2,lightautoml - WeightedBlender,0.933333,0.933851,0.934872,0.933333,5.85
2,train.csv,123,1,lightautoml - WeightedBlender,0.946667,0.946581,0.94847,0.946667,6.575
3,train.csv,123,2,lightautoml - WeightedBlender,0.933333,0.933851,0.934872,0.933333,5.593


CSV for train: lightautoml_train_pipeline_time_log.csv


Unnamed: 0,dataset,fold,model_name,preprocessing,nan_guard_before_vif,vif,binner,nan_guard_before_poly,poly,selector,total_time
0,train.csv,1,lightautoml - WeightedBlender,0.002001,0.0,0.001,0.001,0.0,0.0,0.0,0.004001
1,train.csv,2,lightautoml - WeightedBlender,0.00205,0.0,0.0,0.001002,0.0,0.001002,0.0,0.004055


CSV for wine: lightautoml_wine_pipeline_summary.csv


Unnamed: 0,dataset,seed,fold,model_name,accuracy,f1,precision,recall,runtime
0,wine.csv,42,1,lightautoml - WeightedBlender,0.932584,0.93268,0.934031,0.932584,4.372
1,wine.csv,42,2,lightautoml - WeightedBlender,0.853933,0.854725,0.861554,0.853933,4.817
2,wine.csv,123,1,lightautoml - WeightedBlender,0.88764,0.88764,0.88764,0.88764,4.912
3,wine.csv,123,2,lightautoml - WeightedBlender,0.853933,0.854725,0.861554,0.853933,4.656


CSV for wine: lightautoml_wine_pipeline_time_log.csv


Unnamed: 0,dataset,fold,model_name,preprocessing,nan_guard_before_vif,vif,binner,nan_guard_before_poly,poly,selector,total_time
0,wine.csv,1,lightautoml - WeightedBlender,0.002004,0.0,0.0,0.0,0.001003,0.0,0.0,0.003007
1,wine.csv,2,lightautoml - WeightedBlender,0.002004,0.0,0.0,0.001002,0.0,0.0,0.0,0.003006
