In [None]:
!unzip Gd_fps.zip

In [30]:
import pandas as pd
import numpy as np
import os
import random
import warnings

warnings.filterwarnings("ignore", category=UserWarning)

In [31]:
def seed_everything(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

seed_everything(seed=42)

In [32]:
def load_datasets(path: str):
    files = [file for file in os.listdir(path) if file.endswith('.csv')]
    datasets = {}
    for file in files:
        df = pd.read_csv(os.path.join(path, file))
        datasets[file] = df
    return datasets

In [33]:
datasets = load_datasets("Gd_fps")

In [34]:
from sklearn.feature_selection import VarianceThreshold


def filter_var(df: pd.DataFrame) -> tuple[np.ndarray, np.ndarray]:
    feature_cols = [c for c in df.columns if c != "lgK"]
    X = df[feature_cols].astype(np.float32).values
    y = df["lgK"].values
    vt = VarianceThreshold(threshold=0.01)  # remove features with <1% variance
    X = vt.fit_transform(X)
    print(f"Reduced from {len(feature_cols)} to {X.shape[1]} features")
    return X, y

In [35]:
from fedot import Fedot

def init_fedot(problem='regression', timeout=10., n_jobs=-1, logging_level=50, seed=42):
    model = Fedot(
        problem=problem, 
        timeout=timeout, 
        n_jobs=n_jobs, 
        logging_level=logging_level,
        seed=seed
    )
    return model

In [36]:
def cross_validate(X, y, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    cv_results = {}
    for fold, (train_index, test_index) in enumerate(kf.split(X)):
        model = init_fedot(
            problem=problem, 
            timeout=timeout, 
            n_jobs=n_jobs, 
            logging_level=logging_level, 
            seed=seed
        )
        print(f"Fold {fold}")
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        rmse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        cv_results[fold] = {"rmse": rmse, "r2": r2}
        print(f"  RMSE: {rmse}, R2: {r2}")
    return cv_results

In [37]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, KFold


def run_fedot(X, y):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    model = init_fedot(
        problem=problem, 
        timeout=timeout, 
        n_jobs=n_jobs, 
        logging_level=logging_level,
        seed=seed
    )
    cv_scores = cross_validate(X, y)
    results = {"cv_scores": cv_scores}
    print("CV R2 scores:", cv_scores)
    
    pipeline = model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    test_rmse = mean_squared_error(y_test, y_pred)
    test_r2 = r2_score(y_test, y_pred)
    results.update({"test_rmse": test_rmse, "test_r2": test_r2})
    results.update({"pipeline": pipeline})
    print("Test RMSE:", test_rmse)
    print("Test R2:", test_r2)
    
    return results

In [38]:
problem='regression'
timeout = 10.
n_jobs = -1
logging_level = 50
seed = 42

In [39]:
all_results = {}

In [40]:
for dataset_name in datasets:
    df = datasets[dataset_name]
    print(f"Running FEDOT on {dataset_name}")
    X, y = filter_var(df)
    results = run_fedot(X, y)
    all_results[dataset_name] = results

Running FEDOT on Gd_ctopo_fp_cmplx.csv
Reduced from 2048 to 267 features
Fold 0
2025-08-29 16:39:06,201 - Topological features operation requires extra dependencies for time series forecasting, which are not installed. It can infuence the performance. Please install it by 'pip install fedot[extra]'


Generations:   0%|          | 13/10000 [05:46<73:53:55, 26.64s/gen]


  0%|          | 56/100000 [04:09<123:48:11,  4.46s/trial, best loss: 3.99002383725207]  
  RMSE: 14.265312214594873, R2: 0.26467919176069066
Fold 1


Generations:   0%|          | 9/10000 [06:19<116:59:43, 42.16s/gen]


  0%|          | 51/100000 [03:36<118:00:30,  4.25s/trial, best loss: 3.851295053746971] 
  RMSE: 13.249909432223866, R2: 0.22811645123127577
Fold 2


Generations:   0%|          | 11/10000 [05:41<86:13:08, 31.07s/gen]


  0%|          | 53/100000 [04:14<133:26:00,  4.81s/trial, best loss: 3.9309793599341134]
  RMSE: 15.353125673775937, R2: 0.3358239504863032
Fold 3


Generations:   0%|          | 9/10000 [05:43<106:04:31, 38.22s/gen]


  0%|          | 22/100000 [04:10<315:56:18, 11.38s/trial, best loss: 4.099219233428639]
  RMSE: 11.960099436128054, R2: 0.4163027114663066
Fold 4


Generations:   0%|          | 22/10000 [05:50<44:06:02, 15.91s/gen]


  0%|          | 30/100000 [04:05<227:10:30,  8.18s/trial, best loss: 3.9632529971371127]
  RMSE: 23.422433459547438, R2: 0.021816654375630296
CV R2 scores: {0: {'rmse': 14.265312214594873, 'r2': 0.26467919176069066}, 1: {'rmse': 13.249909432223866, 'r2': 0.22811645123127577}, 2: {'rmse': 15.353125673775937, 'r2': 0.3358239504863032}, 3: {'rmse': 11.960099436128054, 'r2': 0.4163027114663066}, 4: {'rmse': 23.422433459547438, 'r2': 0.021816654375630296}}


Generations:   0%|          | 28/10000 [05:49<34:33:14, 12.47s/gen]


  0%|          | 29/100000 [04:07<236:43:31,  8.52s/trial, best loss: 3.8440009958964474]
Test RMSE: 14.395997764554659
Test R2: 0.2579428650139608
Running FEDOT on Gd_ctopo_fp_cmplx_da.csv
Reduced from 2048 to 277 features
Fold 0


Generations:   0%|          | 20/10000 [05:49<48:29:56, 17.49s/gen]


  0%|          | 51/100000 [04:03<132:32:32,  4.77s/trial, best loss: 4.004669514928091]
  RMSE: 14.781019852960942, R2: 0.23809648878482792
Fold 1


Generations:   0%|          | 22/10000 [05:45<43:34:18, 15.72s/gen]


  0%|          | 55/100000 [04:10<126:40:39,  4.56s/trial, best loss: 3.8304354621291843]
  RMSE: 15.593622520663779, R2: 0.091581663182006
Fold 2


Generations:   0%|          | 13/10000 [05:53<75:30:43, 27.22s/gen]


  0%|          | 56/100000 [04:03<120:29:43,  4.34s/trial, best loss: 3.9344520891195316]
  RMSE: 14.990969795267032, R2: 0.35149080984817604
Fold 3


Generations:   0%|          | 23/10000 [05:48<41:58:10, 15.14s/gen]


  0%|          | 64/100000 [04:06<107:04:15,  3.86s/trial, best loss: 4.150889042893219]
  RMSE: 10.725213185548116, R2: 0.47656974853906864
Fold 4


Generations:   0%|          | 10/10000 [05:31<92:07:02, 33.20s/gen]


  0%|          | 33/100000 [04:18<217:15:41,  7.82s/trial, best loss: 3.8097408659686005]
  RMSE: 26.202524840441615, R2: -0.09428738292695149
CV R2 scores: {0: {'rmse': 14.781019852960942, 'r2': 0.23809648878482792}, 1: {'rmse': 15.593622520663779, 'r2': 0.091581663182006}, 2: {'rmse': 14.990969795267032, 'r2': 0.35149080984817604}, 3: {'rmse': 10.725213185548116, 'r2': 0.47656974853906864}, 4: {'rmse': 26.202524840441615, 'r2': -0.09428738292695149}}


Generations:   0%|          | 24/10000 [05:52<40:45:11, 14.71s/gen]


  0%|          | 29/100000 [03:59<229:13:36,  8.25s/trial, best loss: 4.075843150079475]
Test RMSE: 15.009554563153035
Test R2: 0.22631642219524017
Running FEDOT on Gd_ctopo_fp_cmplx_da_bonds.csv
Reduced from 2048 to 285 features
Fold 0


Generations:   0%|          | 15/10000 [05:51<65:00:21, 23.44s/gen]


  0%|          | 17/100000 [03:55<384:53:27, 13.86s/trial, best loss: 3.9673500922659826]
  RMSE: 13.459005723442646, R2: 0.30624112407900805
Fold 1


Generations:   0%|          | 8/10000 [05:24<112:41:44, 40.60s/gen]


  0%|          | 60/100000 [04:29<124:51:03,  4.50s/trial, best loss: 3.8105491853813485]
  RMSE: 15.750158719712873, R2: 0.08246252788143349
Fold 2


Generations:   0%|          | 20/10000 [05:46<48:00:05, 17.32s/gen]


  0%|          | 62/100000 [04:07<110:58:46,  4.00s/trial, best loss: 3.9275718841625804]
  RMSE: 14.88380433027375, R2: 0.3561267866971841
Fold 3


Generations:   0%|          | 9/10000 [05:57<110:07:52, 39.68s/gen]


  0%|          | 32/100000 [03:59<208:06:03,  7.49s/trial, best loss: 4.058234814130858]
  RMSE: 10.839341240298427, R2: 0.4709998754406767
Fold 4


Generations:   0%|          | 12/10000 [05:52<81:25:20, 29.35s/gen]


  0%|          | 30/100000 [04:01<223:27:46,  8.05s/trial, best loss: 3.761731683850777] 
  RMSE: 21.43888221795274, R2: 0.10465504915948143
CV R2 scores: {0: {'rmse': 13.459005723442646, 'r2': 0.30624112407900805}, 1: {'rmse': 15.750158719712873, 'r2': 0.08246252788143349}, 2: {'rmse': 14.88380433027375, 'r2': 0.3561267866971841}, 3: {'rmse': 10.839341240298427, 'r2': 0.4709998754406767}, 4: {'rmse': 21.43888221795274, 'r2': 0.10465504915948143}}


Generations:   0%|          | 27/10000 [05:47<35:41:48, 12.89s/gen]


  0%|          | 32/100000 [04:08<215:40:38,  7.77s/trial, best loss: 4.034736711169483]
Test RMSE: 15.277848733229906
Test R2: 0.21248691163009603
Running FEDOT on Gd_ctopo_fp_cmplx_da_sub.csv
Reduced from 2048 to 294 features
Fold 0


Generations:   0%|          | 11/10000 [05:41<86:15:36, 31.09s/gen]


  0%|          | 59/100000 [04:15<120:21:38,  4.34s/trial, best loss: 3.8727738904576263]
  RMSE: 13.152573127002842, R2: 0.32203652070933586
Fold 1


Generations:   0%|          | 9/10000 [05:28<101:22:37, 36.53s/gen]


  0%|          | 359/100000 [04:23<20:21:10,  1.36trial/s, best loss: inf]
  RMSE: 14.797089764235665, R2: 0.13798428456498657
Fold 2


Generations:   0%|          | 11/10000 [05:45<87:16:03, 31.45s/gen]


  0%|          | 22/100000 [04:06<310:53:03, 11.19s/trial, best loss: 3.709643787792102]
  RMSE: 16.328289840906084, R2: 0.29363835923187254
Fold 3


Generations:   0%|          | 11/10000 [05:50<88:20:37, 31.84s/gen]


  0%|          | 18/100000 [04:08<382:56:46, 13.79s/trial, best loss: 3.9411108702521687]
  RMSE: 11.05828164202794, R2: 0.4603147703943121
Fold 4


Generations:   0%|          | 6/10000 [05:15<146:12:04, 52.66s/gen]


  0%|          | 183/100000 [04:31<41:06:05,  1.48s/trial, best loss: inf]
  RMSE: 25.00267819490266, R2: -0.04417858411249442
CV R2 scores: {0: {'rmse': 13.152573127002842, 'r2': 0.32203652070933586}, 1: {'rmse': 14.797089764235665, 'r2': 0.13798428456498657}, 2: {'rmse': 16.328289840906084, 'r2': 0.29363835923187254}, 3: {'rmse': 11.05828164202794, 'r2': 0.4603147703943121}, 4: {'rmse': 25.00267819490266, 'r2': -0.04417858411249442}}


Generations:   0%|          | 30/10000 [05:52<32:31:56, 11.75s/gen]


  0%|          | 32/100000 [04:04<211:57:47,  7.63s/trial, best loss: 3.9459738998617313]
Test RMSE: 17.1254264740487
Test R2: 0.11725153667111798
Running FEDOT on Gd_ctopo_fp_cmplx_da_sub_bonds.csv
Reduced from 2048 to 298 features
Fold 0


Generations:   0%|          | 4/10000 [01:55<80:19:35, 28.93s/gen] 


  0%|          | 107/100000 [07:58<124:11:03,  4.48s/trial, best loss: 3.9063802369336584]
  RMSE: 12.892833394508397, R2: 0.335425083620302
Fold 1


Generations:   0%|          | 6/10000 [05:26<150:53:25, 54.35s/gen]


  0%|          | 48/100000 [04:26<154:10:25,  5.55s/trial, best loss: 3.624112983973731] 
  RMSE: 15.084489548097968, R2: 0.12124159162676273
Fold 2


Generations:   0%|          | 9/10000 [06:02<111:46:55, 40.28s/gen]


  0%|          | 53/100000 [03:51<121:23:01,  4.37s/trial, best loss: 3.7984821799546724]
  RMSE: 14.999535404649865, R2: 0.3511202616795197
Fold 3


Generations:   0%|          | 8/10000 [05:25<113:00:44, 40.72s/gen]


  0%|          | 46/100000 [04:26<160:40:54,  5.79s/trial, best loss: 3.93126916778933]  
  RMSE: 11.389825294714717, R2: 0.4441342083398483
Fold 4


Generations:   0%|          | 9/10000 [06:00<111:03:29, 40.02s/gen]


  0%|          | 24/100000 [03:54<271:53:00,  9.79s/trial, best loss: 3.7293966093934947]
  RMSE: 21.026617647204, R2: 0.12187231814196708
CV R2 scores: {0: {'rmse': 12.892833394508397, 'r2': 0.335425083620302}, 1: {'rmse': 15.084489548097968, 'r2': 0.12124159162676273}, 2: {'rmse': 14.999535404649865, 'r2': 0.3511202616795197}, 3: {'rmse': 11.389825294714717, 'r2': 0.4441342083398483}, 4: {'rmse': 21.026617647204, 'r2': 0.12187231814196708}}


Generations:   0%|          | 17/10000 [05:42<55:51:36, 20.14s/gen]


  0%|          | 26/100000 [04:13<270:55:35,  9.76s/trial, best loss: 4.01170485048343]
Test RMSE: 15.4472709931684
Test R2: 0.20375385964794268
Running FEDOT on Gd_ctopo_fp_cmplx_full.csv
Reduced from 2048 to 301 features
Fold 0


Generations:   0%|          | 6/10000 [05:15<146:01:19, 52.60s/gen]


  0%|          | 51/100000 [04:37<150:57:55,  5.44s/trial, best loss: 3.856472963344886] 
  RMSE: 13.870639575158684, R2: 0.2850230159865107
Fold 1


Generations:   0%|          | 17/10000 [05:52<57:31:57, 20.75s/gen]


  0%|          | 52/100000 [04:01<128:55:36,  4.64s/trial, best loss: 3.742008781044685] 
  RMSE: 15.37010287964375, R2: 0.10460296982659745
Fold 2


Generations:   0%|          | 12/10000 [05:46<80:11:51, 28.91s/gen]


  0%|          | 56/100000 [04:05<121:48:41,  4.39s/trial, best loss: 3.7553034864872417]
  RMSE: 15.307644819696446, R2: 0.3377914517386782
Fold 3


Generations:   0%|          | 4/10000 [02:23<99:34:10, 35.86s/gen] 


  0%|          | 107/100000 [07:32<117:19:53,  4.23s/trial, best loss: 4.04926445348453]
  RMSE: 10.576391051142721, R2: 0.4838328216255535
Fold 4


Generations:   0%|          | 7/10000 [05:35<132:51:53, 47.86s/gen]


  0%|          | 119/100000 [03:29<48:45:53,  1.76s/trial, best loss: inf]
  RMSE: 22.723529560029203, R2: 0.05100474688879297
CV R2 scores: {0: {'rmse': 13.870639575158684, 'r2': 0.2850230159865107}, 1: {'rmse': 15.37010287964375, 'r2': 0.10460296982659745}, 2: {'rmse': 15.307644819696446, 'r2': 0.3377914517386782}, 3: {'rmse': 10.576391051142721, 'r2': 0.4838328216255535}, 4: {'rmse': 22.723529560029203, 'r2': 0.05100474688879297}}


Generations:   0%|          | 11/10000 [05:33<84:12:16, 30.35s/gen]


  0%|          | 51/100000 [04:24<143:43:08,  5.18s/trial, best loss: 4.293823960726572]
Test RMSE: 21.01033337533994
Test R2: -0.083000153556124
Running FEDOT on Gd_ctopo_fp_ligand.csv
Reduced from 2048 to 262 features
Fold 0


Generations:   0%|          | 9/10000 [05:30<102:03:09, 36.77s/gen]


  0%|          | 49/100000 [04:25<150:38:53,  5.43s/trial, best loss: 3.70316008218616] 
  RMSE: 15.041787039748387, R2: 0.22465496464106205
Fold 1


Generations:   0%|          | 11/10000 [05:40<85:47:35, 30.92s/gen]


  0%|          | 32/100000 [04:10<216:58:34,  7.81s/trial, best loss: 3.817447919815519]
  RMSE: 17.07859589656072, R2: 0.0050734100443072805
Fold 2


Generations:   0%|          | 4/10000 [02:31<105:28:53, 37.99s/gen]


  0%|          | 107/100000 [07:23<114:59:19,  4.14s/trial, best loss: 3.8641868484623756]
  RMSE: 13.747193416188434, R2: 0.4052965624673889
Fold 3


Generations:   0%|          | 27/10000 [05:49<35:50:46, 12.94s/gen]


  0%|          | 26/100000 [04:08<265:56:28,  9.58s/trial, best loss: 3.8701987466441508]
  RMSE: 9.064138291341584, R2: 0.5576363748641779
Fold 4


Generations:   0%|          | 9/10000 [05:52<108:42:50, 39.17s/gen]


  0%|          | 18/100000 [03:51<357:06:54, 12.86s/trial, best loss: 3.903428111149167] 
  RMSE: 22.3470441323476, R2: 0.06672778334717155
CV R2 scores: {0: {'rmse': 15.041787039748387, 'r2': 0.22465496464106205}, 1: {'rmse': 17.07859589656072, 'r2': 0.0050734100443072805}, 2: {'rmse': 13.747193416188434, 'r2': 0.4052965624673889}, 3: {'rmse': 9.064138291341584, 'r2': 0.5576363748641779}, 4: {'rmse': 22.3470441323476, 'r2': 0.06672778334717155}}


Generations:   0%|          | 20/10000 [05:44<47:47:57, 17.24s/gen]


  0%|          | 31/100000 [04:09<223:40:38,  8.05s/trial, best loss: 3.949055954456513]
Test RMSE: 16.344232680531313
Test R2: 0.15751900807304353
Running FEDOT on Gd_ctopo_fp_skl.csv
Reduced from 2048 to 51 features
Fold 0


Generations:   1%|          | 52/10000 [05:58<19:02:03,  6.89s/gen]


  1%|          | 624/100000 [03:59<10:34:33,  2.61trial/s, best loss: inf]
  RMSE: 13.810177102364248, R2: 0.28813961895282114
Fold 1


Generations:   0%|          | 29/10000 [05:48<33:17:10, 12.02s/gen]


  0%|          | 206/100000 [04:08<33:29:40,  1.21s/trial, best loss: 3.9595775985976234]
  RMSE: 13.500192926886747, R2: 0.2135359959423676
Fold 2


Generations:   0%|          | 35/10000 [05:53<27:58:19, 10.11s/gen]


  1%|          | 595/100000 [04:03<11:18:09,  2.44trial/s, best loss: inf]
  RMSE: 18.83886106863894, R2: 0.1850310752501826
Fold 3


Generations:   0%|          | 14/10000 [05:42<67:48:41, 24.45s/gen] 


  0%|          | 258/100000 [04:15<27:26:55,  1.01trial/s, best loss: 3.890812947677502]
  RMSE: 15.673776214139545, R2: 0.23506148706076724
Fold 4


Generations:   0%|          | 18/10000 [05:59<55:18:35, 19.95s/gen]


  0%|          | 177/100000 [03:58<37:25:15,  1.35s/trial, best loss: 3.820951407097536] 
  RMSE: 18.29084113390992, R2: 0.2361256482784968
CV R2 scores: {0: {'rmse': 13.810177102364248, 'r2': 0.28813961895282114}, 1: {'rmse': 13.500192926886747, 'r2': 0.2135359959423676}, 2: {'rmse': 18.83886106863894, 'r2': 0.1850310752501826}, 3: {'rmse': 15.673776214139545, 'r2': 0.23506148706076724}, 4: {'rmse': 18.29084113390992, 'r2': 0.2361256482784968}}


Generations:   1%|          | 52/10000 [05:12<16:36:51,  6.01s/gen]


  0%|          | 182/100000 [04:44<43:20:46,  1.56s/trial, best loss: 4.019666328005463] 
Test RMSE: 14.072358665592873
Test R2: 0.2746251892594088
Running FEDOT on Gd_ctopo_fp_skl_da.csv
Reduced from 2048 to 89 features
Fold 0


Generations:   0%|          | 10/10000 [05:37<93:42:05, 33.77s/gen] 


  0%|          | 27/100000 [04:16<263:25:03,  9.49s/trial, best loss: 3.8957860656649466]
  RMSE: 15.095534456656537, R2: 0.22188449642787833
Fold 1


Generations:   0%|          | 17/10000 [05:51<57:16:00, 20.65s/gen]


  0%|          | 29/100000 [04:05<235:31:18,  8.48s/trial, best loss: 3.8212417585511993]
  RMSE: 13.930461576093405, R2: 0.18847036862073108
Fold 2


Generations:   0%|          | 33/10000 [05:52<29:34:59, 10.69s/gen]


  0%|          | 121/100000 [04:06<56:24:36,  2.03s/trial, best loss: 3.8175396006049724]
  RMSE: 17.542677361401363, R2: 0.24110396831504755
Fold 3


Generations:   0%|          | 21/10000 [05:45<45:34:02, 16.44s/gen]


  0%|          | 139/100000 [04:12<50:28:25,  1.82s/trial, best loss: 3.81506691064007] 
  RMSE: 16.572042270207472, R2: 0.19122276614471057
Fold 4


Generations:   0%|          | 27/10000 [05:48<35:42:54, 12.89s/gen]


  0%|          | 59/100000 [04:09<117:17:13,  4.22s/trial, best loss: 3.893191986515782]
  RMSE: 23.173375748080687, R2: 0.03221796924655762
CV R2 scores: {0: {'rmse': 15.095534456656537, 'r2': 0.22188449642787833}, 1: {'rmse': 13.930461576093405, 'r2': 0.18847036862073108}, 2: {'rmse': 17.542677361401363, 'r2': 0.24110396831504755}, 3: {'rmse': 16.572042270207472, 'r2': 0.19122276614471057}, 4: {'rmse': 23.173375748080687, 'r2': 0.03221796924655762}}


Generations:   0%|          | 49/10000 [05:55<20:04:13,  7.26s/gen]


  0%|          | 134/100000 [04:02<50:11:54,  1.81s/trial, best loss: 4.207401188319978]
Test RMSE: 16.9146017495282
Test R2: 0.12811872306698524
Running FEDOT on Gd_ctopo_fp_skl_da_bonds.csv
Reduced from 2048 to 114 features
Fold 0


Generations:   0%|          | 9/10000 [05:47<107:10:47, 38.62s/gen]


  0%|          | 124/100000 [04:07<55:28:23,  2.00s/trial, best loss: 3.547670315200295] 
  RMSE: 15.705304996095862, R2: 0.1904532203957766
Fold 1


Generations:   0%|          | 22/10000 [05:50<44:08:49, 15.93s/gen]


  RMSE: 12.93035546513509, R2: 0.24673231056233913
Fold 2


Generations:   0%|          | 46/10000 [05:57<21:28:30,  7.77s/gen]


  0%|          | 129/100000 [03:59<51:33:26,  1.86s/trial, best loss: 3.5738833829380914]
  RMSE: 16.341032254726215, R2: 0.2930871225487274
Fold 3


Generations:   0%|          | 47/10000 [06:00<21:12:04,  7.67s/gen]


  0%|          | 127/100000 [03:57<51:50:05,  1.87s/trial, best loss: 3.6681466745734967]
  RMSE: 11.444570086468557, R2: 0.441462459105763
Fold 4


Generations:   0%|          | 34/10000 [05:53<28:46:17, 10.39s/gen]


  0%|          | 225/100000 [03:59<29:31:49,  1.07s/trial, best loss: inf]
  RMSE: 16.938132360796462, R2: 0.2926183775939556
CV R2 scores: {0: {'rmse': 15.705304996095862, 'r2': 0.1904532203957766}, 1: {'rmse': 12.93035546513509, 'r2': 0.24673231056233913}, 2: {'rmse': 16.341032254726215, 'r2': 0.2930871225487274}, 3: {'rmse': 11.444570086468557, 'r2': 0.441462459105763}, 4: {'rmse': 16.938132360796462, 'r2': 0.2926183775939556}}


Generations:   0%|          | 50/10000 [05:55<19:37:38,  7.10s/gen]


Test RMSE: 14.822213504113106
Test R2: 0.23597311788316988
Running FEDOT on Gd_ctopo_fp_skl_da_skl.csv
Reduced from 2048 to 104 features
Fold 0


Generations:   0%|          | 30/10000 [05:50<32:19:15, 11.67s/gen]


  0%|          | 111/100000 [04:06<61:30:31,  2.22s/trial, best loss: 3.8299593692335683]
  RMSE: 14.182265294896087, R2: 0.2689599342496086
Fold 1


Generations:   0%|          | 30/10000 [05:49<32:18:03, 11.66s/gen]


  0%|          | 70/100000 [04:06<97:49:46,  3.52s/trial, best loss: 3.7886833256131522] 
  RMSE: 15.99588952710724, R2: 0.06814729285106591
Fold 2


Generations:   0%|          | 21/10000 [05:47<45:48:52, 16.53s/gen]


  0%|          | 114/100000 [04:09<60:45:56,  2.19s/trial, best loss: 3.9773701938712427]
  RMSE: 15.099457793177065, R2: 0.3467976202395865
Fold 3


Generations:   0%|          | 28/10000 [05:49<34:36:35, 12.49s/gen]


  0%|          | 142/100000 [04:07<48:23:21,  1.74s/trial, best loss: 3.7823957361101437]
  RMSE: 16.634302157579924, R2: 0.18818425232316682
Fold 4


Generations:   0%|          | 11/10000 [05:49<88:12:55, 31.79s/gen]


  0%|          | 301/100000 [04:05<22:33:30,  1.23trial/s, best loss: inf]
  RMSE: 22.89476901762349, R2: 0.04385333002932046
CV R2 scores: {0: {'rmse': 14.182265294896087, 'r2': 0.2689599342496086}, 1: {'rmse': 15.99588952710724, 'r2': 0.06814729285106591}, 2: {'rmse': 15.099457793177065, 'r2': 0.3467976202395865}, 3: {'rmse': 16.634302157579924, 'r2': 0.18818425232316682}, 4: {'rmse': 22.89476901762349, 'r2': 0.04385333002932046}}


Generations:   0%|          | 41/10000 [06:00<24:17:30,  8.78s/gen]


  0%|          | 26/100000 [03:57<253:42:23,  9.14s/trial, best loss: 4.057875745654009] 
Test RMSE: 14.631332218075295
Test R2: 0.24581229836626373
Running FEDOT on Gd_ctopo_fp_skl_da_skl_bonds.csv
Reduced from 2048 to 124 features
Fold 0


Generations:   0%|          | 7/10000 [05:52<139:51:56, 50.39s/gen]


  0%|          | 73/100000 [04:02<92:01:10,  3.32s/trial, best loss: 3.518505418072042] 
  RMSE: 14.772024049594675, R2: 0.2385601871114077
Fold 1


Generations:   0%|          | 35/10000 [05:52<27:53:33, 10.08s/gen]


  RMSE: 14.341600143007126, R2: 0.16451917878895383
Fold 2


Generations:   0%|          | 36/10000 [05:57<27:30:16,  9.94s/gen]


  0%|          | 122/100000 [03:59<54:34:33,  1.97s/trial, best loss: 3.8016459231519635]
  RMSE: 13.658844565498757, R2: 0.40911853278646026
Fold 3


Generations:   0%|          | 26/10000 [05:47<37:01:41, 13.36s/gen]


  0%|          | 126/100000 [04:09<55:01:45,  1.98s/trial, best loss: 3.6403541143992255]
  RMSE: 11.269367648564819, R2: 0.4500129890152378
Fold 4


Generations:   0%|          | 4/10000 [01:31<63:17:05, 22.79s/gen]


  0%|          | 84/100000 [08:26<167:27:01,  6.03s/trial, best loss: 3.6872470488170337]
  RMSE: 16.436697136370753, R2: 0.31355965110807515
CV R2 scores: {0: {'rmse': 14.772024049594675, 'r2': 0.2385601871114077}, 1: {'rmse': 14.341600143007126, 'r2': 0.16451917878895383}, 2: {'rmse': 13.658844565498757, 'r2': 0.40911853278646026}, 3: {'rmse': 11.269367648564819, 'r2': 0.4500129890152378}, 4: {'rmse': 16.436697136370753, 'r2': 0.31355965110807515}}


Generations:   1%|          | 53/10000 [05:54<18:29:18,  6.69s/gen]


  0%|          | 33/100000 [04:00<202:39:00,  7.30s/trial, best loss: 3.7485582206467982]
Test RMSE: 15.07143370309786
Test R2: 0.22312679560222182
Running FEDOT on Gd_ctopo_fp_topo.csv
Reduced from 2048 to 61 features
Fold 0


Generations:   0%|          | 36/10000 [05:55<27:20:36,  9.88s/gen]


  0%|          | 47/100000 [04:01<142:45:16,  5.14s/trial, best loss: 3.8697781378182263]
  RMSE: 19.534861041525698, R2: -0.006945350638805525
Fold 1


Generations:   0%|          | 35/10000 [06:06<28:58:08, 10.47s/gen]


  0%|          | 165/100000 [03:51<38:57:17,  1.40s/trial, best loss: 4.0293745007445105]
  RMSE: 12.898195093108695, R2: 0.2486058374884239
Fold 2


Generations:   0%|          | 23/10000 [06:22<46:06:35, 16.64s/gen]


  0%|          | 78/100000 [03:36<76:54:35,  2.77s/trial, best loss: 3.8219204974705137]
  RMSE: 20.185311019873353, R2: 0.12678366501721183
Fold 3


Generations:   1%|          | 57/10000 [05:35<16:16:12,  5.89s/gen]


  0%|          | 252/100000 [04:22<28:50:58,  1.04s/trial, best loss: 4.1274306204837234]
  RMSE: 13.696109532699063, R2: 0.33157896885471017
Fold 4


Generations:   0%|          | 47/10000 [05:54<20:52:23,  7.55s/gen]


  0%|          | 210/100000 [04:03<32:08:28,  1.16s/trial, best loss: 3.930126551150012] 
  RMSE: 18.012518880116886, R2: 0.24774912855637599
CV R2 scores: {0: {'rmse': 19.534861041525698, 'r2': -0.006945350638805525}, 1: {'rmse': 12.898195093108695, 'r2': 0.2486058374884239}, 2: {'rmse': 20.185311019873353, 'r2': 0.12678366501721183}, 3: {'rmse': 13.696109532699063, 'r2': 0.33157896885471017}, 4: {'rmse': 18.012518880116886, 'r2': 0.24774912855637599}}


Generations:   1%|          | 70/10000 [05:55<14:00:21,  5.08s/gen]


  0%|          | 160/100000 [04:01<41:51:53,  1.51s/trial, best loss: 3.9332715268949086]
Test RMSE: 18.737277589662895
Test R2: 0.03416694326963221
Running FEDOT on Gd_ctopo_fp_topo_da.csv
Reduced from 2048 to 85 features
Fold 0


Generations:   0%|          | 41/10000 [06:19<25:37:16,  9.26s/gen]


  0%|          | 128/100000 [03:36<46:58:40,  1.69s/trial, best loss: 3.7917807173843485]
  RMSE: 15.950044075925609, R2: 0.17783788220471797
Fold 1


Generations:   0%|          | 44/10000 [06:24<24:11:50,  8.75s/gen]


  0%|          | 15/100000 [03:23<376:58:35, 13.57s/trial, best loss: 3.9446391360390267]
  RMSE: 13.767887026926628, R2: 0.1979412725987717
Fold 2


Generations:   0%|          | 22/10000 [05:59<45:19:42, 16.35s/gen]


  0%|          | 135/100000 [03:57<48:42:03,  1.76s/trial, best loss: 3.8112260477113056]
  RMSE: 17.44370262094669, R2: 0.24538561450967955
Fold 3


Generations:   0%|          | 44/10000 [05:57<22:26:23,  8.11s/gen]


  0%|          | 170/100000 [04:00<39:09:33,  1.41s/trial, best loss: 3.9143723367143237]
  RMSE: 14.921490533672811, R2: 0.2717758232781351
Fold 4


Generations:   0%|          | 30/10000 [05:51<32:26:18, 11.71s/gen]


  0%|          | 58/100000 [04:05<117:24:35,  4.23s/trial, best loss: 3.88440351249031]  
  RMSE: 20.168187951517563, R2: 0.15772263374474615
CV R2 scores: {0: {'rmse': 15.950044075925609, 'r2': 0.17783788220471797}, 1: {'rmse': 13.767887026926628, 'r2': 0.1979412725987717}, 2: {'rmse': 17.44370262094669, 'r2': 0.24538561450967955}, 3: {'rmse': 14.921490533672811, 'r2': 0.2717758232781351}, 4: {'rmse': 20.168187951517563, 'r2': 0.15772263374474615}}


Generations:   0%|          | 50/10000 [04:47<15:52:17,  5.74s/gen]


  0%|          | 62/100000 [05:10<139:01:08,  5.01s/trial, best loss: 4.033161386245383]
Test RMSE: 14.624205847861937
Test R2: 0.2461796347572428


In [41]:
all_results

{'Gd_ctopo_fp_cmplx.csv': {'cv_scores': {0: {'rmse': 14.265312214594873,
    'r2': 0.26467919176069066},
   1: {'rmse': 13.249909432223866, 'r2': 0.22811645123127577},
   2: {'rmse': 15.353125673775937, 'r2': 0.3358239504863032},
   3: {'rmse': 11.960099436128054, 'r2': 0.4163027114663066},
   4: {'rmse': 23.422433459547438, 'r2': 0.021816654375630296}},
  'test_rmse': 14.395997764554659,
  'test_r2': 0.2579428650139608,
  'pipeline': {'depth': 1, 'length': 1, 'nodes': [rfr]}},
 'Gd_ctopo_fp_cmplx_da.csv': {'cv_scores': {0: {'rmse': 14.781019852960942,
    'r2': 0.23809648878482792},
   1: {'rmse': 15.593622520663779, 'r2': 0.091581663182006},
   2: {'rmse': 14.990969795267032, 'r2': 0.35149080984817604},
   3: {'rmse': 10.725213185548116, 'r2': 0.47656974853906864},
   4: {'rmse': 26.202524840441615, 'r2': -0.09428738292695149}},
  'test_rmse': 15.009554563153035,
  'test_r2': 0.22631642219524017,
  'pipeline': {'depth': 2, 'length': 2, 'nodes': [rfr, pca]}},
 'Gd_ctopo_fp_cmplx_da_bo

In [46]:
for dataset_name in all_results:
    all_results[dataset_name]["pipeline"].save(f"pipelines/{dataset_name}.json", create_subdir=True, is_datetime_in_path=True)
    all_results[dataset_name].pop("pipeline")

In [48]:
import json

with open("automl_results.json", "w") as f:
    json.dump(all_results, f)