# Input

In [1]:
target_column = "RecommendHiring"

In [2]:
# Parameters
target_column = "RecommendHiring"


# Constants

In [3]:
import os
import numpy as np
from sklearn.metrics import make_scorer

SCRIPT_DIR = os.path.join(os.getcwd(),)

SAVED_MODELS_PATH="./regression_models/saved_models"

DROPPED_LEXICAL_COLUMNS = [
    "Swear",
    "Numbers",
    "Inhibition",
    "Preceptual",
    "Anxiety",
    "Anger",
    "Sadness",
    "Work",
    "Articles",
    "Verbs",
    "Adverbs",
    "Prepositions",
    "Conjunctions",
    "Negations",
]

facial_features = [
    "average_inner_brow_height",
    "average_outer_brow_height",
    "eye_open",
    "inner_lip_height",
    "lip_corner_distance",
    "outer_lip_height",
    "smile",
    "pitch",
    "roll",
    "yaw",
]
stats = ["max", "median", "min", "std", "mean"]
DROPPED_FACIAL_FEATURES = [
    f"{feature}_{stat}" for feature in facial_features for stat in stats
]


DROPPED_PROSODIC_COLUMNS = []

ALREADY_NORMALIZED_FEATURES = [
    "average_outer_brow_height_mean",
    "average_inner_brow_height_mean",
    "eye_open_mean",
    "inner_lip_height_mean",
    "inner_lip_height_mean",
    "lip_corner_distance_mean",
    "average_outer_brow_height_std",
    "average_inner_brow_height_std",
    "eye_open_std",
    "outer_lip_height_std",
    "inner_lip_height_std",
    "lip_corner_distance_std",
    "average_outer_brow_height_min",
    "average_inner_brow_height_min",
    "eye_open_min",
    "outer_lip_height_min",
    "inner_lip_height_min",
    "lip_corner_distance_min",
    "average_outer_brow_height_max",
    "average_inner_brow_height_max",
    "eye_open_max",
    "outer_lip_height_max",
    "inner_lip_height_max",
    "lip_corner_distance_max",
    "average_outer_brow_height_median",
    "average_inner_brow_height_median",
    "eye_open_median",
    "outer_lip_height_median",
    "inner_lip_height_median",
    "lip_corner_distance_median",
]  # these are already in [0, 1]

MUST_KEEP_FEATURES = [
    "pause_duration_avg",
    "average_outer_brow_height_mean",
    "average_inner_brow_height_mean",
    "outer_lip_height_mean",
    "Duration/Filler Words",
]


GROUPS_COLUMN = "cleaned_ids"
INDEX_COLUMN = "participant_id"


def pearson_corr(y_true, y_pred):
    return np.corrcoef(y_true, y_pred)[0, 1]


SCORING_METRICS = {
    "r2": "r2",
    "mae": "neg_mean_absolute_error",
    "pearson": make_scorer(pearson_corr),  # Pearson Correlation Coefficient
}


MUST_KEEP_FEATURES = [
    # "pause_duration_avg",
    # "average_outer_brow_height_mean",
    # "average_inner_brow_height_mean",
    # "outer_lip_height_mean",
    "Duration/Filler Words",
]

PIPELINE_PARAMS = {'feature_selection__estimator__alpha': 0.057376790661083456, 'svr__C': 0.655379988356498, 'svr__gamma': 0.02784736494309893, 'svr__epsilon': 0.2617249201838037, 'svr__kernel': 'rbf'}
HYPERPARAMETER_TUNING_ENABLED = True

# Data Preprocessing

## Import Datasets

In [4]:
import pandas as pd
import os

features_df = pd.read_csv(os.path.join(SCRIPT_DIR, "datasets", "add.csv"))
features_df = features_df.set_index("participant_id")

labels_df = pd.read_csv(
    os.path.join(os.path.join(SCRIPT_DIR,  "datasets", "turker_scores_full_interview.csv"))
)
labels_df = labels_df.set_index("Participant")
labels_df = labels_df.loc[labels_df["Worker"] == "AGGR"]

features_df.index = features_df.index.str.lower()
labels_df.index = labels_df.index.str.lower()
indexed_combined_df = features_df.join(labels_df[[target_column]], how="left")
combined_df = indexed_combined_df.reset_index(drop=True)

# Model

## Split Data

In [5]:
X = combined_df.drop(columns=[target_column, GROUPS_COLUMN])
y = combined_df[target_column]

## Pipeline Creation

In [6]:
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectFromModel
from sklearn.impute import SimpleImputer
from sklearn.linear_model import Lasso, LassoCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import GroupKFold, cross_val_score, cross_validate
import sys

sys.path.append("..")
from models.domain_aware_selector import DomainAwareSelector

groups_column = combined_df[GROUPS_COLUMN].astype(str).values


preprocessor = ColumnTransformer(
    [
        ('dropper', 'drop', DROPPED_FACIAL_FEATURES + 
                            DROPPED_LEXICAL_COLUMNS + 
                            DROPPED_PROSODIC_COLUMNS)
    ],
    remainder='passthrough'
)
unfitted_pipeline = Pipeline(
    [
        ('preprocessor', preprocessor),
        ("imputer", SimpleImputer(strategy="mean")),  # NaN imputation
        ("scaler", StandardScaler()),
        ("feature_selection", SelectFromModel(estimator=Lasso(    max_iter=30000))),
        # ('feature_selection', DomainAwareSelector(
        #     must_keep_features=MUST_KEEP_FEATURES,
        #     selector=SelectFromModel(lasso_feature_selection_model, max_features=10),
        # )),
        ("svr", SVR(kernel="rbf")),
    ]
)

## Hyperparameter Tuning

In [7]:
import optuna
from sklearn.base import clone
from sklearn.model_selection import GroupShuffleSplit, cross_val_score
import numpy as np

def objective(trial):
    pipeline_clone = clone(unfitted_pipeline)  # Clone pipeline for thread safety
    
    params = {
        "feature_selection__estimator__alpha": trial.suggest_float(
            "feature_selection__estimator__alpha", 1e-3, 0.3, log=True
        ),
        "svr__C": trial.suggest_float("svr__C", 0.01, 100, log=True),
        "svr__gamma": trial.suggest_float("svr__gamma", 1e-3, 1e1, log=True),
        "svr__epsilon": trial.suggest_float("svr__epsilon", 0.01, 0.5),
        "svr__kernel": trial.suggest_categorical("svr__kernel", ["rbf"
                                                                #  , "poly"
                                                                 ]),
    }
    
    # if params["svr__kernel"] == "poly":
    #     params["svr__degree"] = trial.suggest_int("svr__degree", 2, 3)  # Reduced from 5
    #     params["svr__coef0"] = trial.suggest_float("svr__coef0", 0.0, 0.5)  # Narrower range
        
    pipeline_clone.set_params(**params)
    
    mc_cv_tuning = GroupShuffleSplit(n_splits=20, test_size=0.2, random_state=42)
    scores = cross_val_score(
        pipeline_clone, X, y, cv=mc_cv_tuning, groups=groups_column, n_jobs=1
    )
    return np.mean(scores)

if HYPERPARAMETER_TUNING_ENABLED:
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100, n_jobs=-1, timeout=4*60)
    
    print("Best hyperparameters:", study.best_params)
    print(f"Best R² score: {study.best_value:.4f}")

  from .autonotebook import tqdm as notebook_tqdm
[I 2025-04-08 00:05:21,665] A new study created in memory with name: no-name-4de8f328-e1f6-4730-9749-2798ef686c3b


[I 2025-04-08 00:05:23,148] Trial 7 finished with value: 0.12106065630821308 and parameters: {'feature_selection__estimator__alpha': 0.23295489008266504, 'svr__C': 0.16329745183849026, 'svr__gamma': 0.10387488595782803, 'svr__epsilon': 0.014846825247926434, 'svr__kernel': 'rbf'}. Best is trial 7 with value: 0.12106065630821308.


[I 2025-04-08 00:05:23,173] Trial 6 finished with value: 0.18750320554977745 and parameters: {'feature_selection__estimator__alpha': 0.09076916174765001, 'svr__C': 1.9469368935578368, 'svr__gamma': 0.001027658428580485, 'svr__epsilon': 0.14314690513943187, 'svr__kernel': 'rbf'}. Best is trial 6 with value: 0.18750320554977745.


[I 2025-04-08 00:05:23,187] Trial 1 finished with value: -0.07145703833097625 and parameters: {'feature_selection__estimator__alpha': 0.0706257779274622, 'svr__C': 1.514041401957959, 'svr__gamma': 3.649387321031822, 'svr__epsilon': 0.043420991057115034, 'svr__kernel': 'rbf'}. Best is trial 6 with value: 0.18750320554977745.


[I 2025-04-08 00:05:23,191] Trial 4 finished with value: 0.10735348483626794 and parameters: {'feature_selection__estimator__alpha': 0.08396723195035317, 'svr__C': 0.14906047608030865, 'svr__gamma': 0.009479943915147532, 'svr__epsilon': 0.4491244026168849, 'svr__kernel': 'rbf'}. Best is trial 6 with value: 0.18750320554977745.


[I 2025-04-08 00:05:23,193] Trial 2 finished with value: 0.10408197527218858 and parameters: {'feature_selection__estimator__alpha': 0.12853236222617043, 'svr__C': 0.32402286951105974, 'svr__gamma': 0.003734041402913637, 'svr__epsilon': 0.09996499254322051, 'svr__kernel': 'rbf'}. Best is trial 6 with value: 0.18750320554977745.


[I 2025-04-08 00:05:23,243] Trial 0 finished with value: -0.11061623888703305 and parameters: {'feature_selection__estimator__alpha': 0.014017186203255864, 'svr__C': 0.015962369024710234, 'svr__gamma': 2.154601777305153, 'svr__epsilon': 0.17786288575371023, 'svr__kernel': 'rbf'}. Best is trial 6 with value: 0.18750320554977745.


[I 2025-04-08 00:05:23,258] Trial 3 finished with value: 0.13231334960128088 and parameters: {'feature_selection__estimator__alpha': 0.003584298361138121, 'svr__C': 64.67888895680039, 'svr__gamma': 0.007338603208542238, 'svr__epsilon': 0.47001608367595676, 'svr__kernel': 'rbf'}. Best is trial 6 with value: 0.18750320554977745.


[I 2025-04-08 00:05:23,263] Trial 5 finished with value: -0.09173102743639772 and parameters: {'feature_selection__estimator__alpha': 0.012484277920529996, 'svr__C': 23.363611202234218, 'svr__gamma': 5.419683782496277, 'svr__epsilon': 0.278678030421261, 'svr__kernel': 'rbf'}. Best is trial 6 with value: 0.18750320554977745.


[I 2025-04-08 00:05:24,738] Trial 12 finished with value: -0.12144206532356487 and parameters: {'feature_selection__estimator__alpha': 0.12594425117617178, 'svr__C': 92.83049358521227, 'svr__gamma': 0.3223744393595612, 'svr__epsilon': 0.36096674142209484, 'svr__kernel': 'rbf'}. Best is trial 9 with value: 0.23967570063962648.


[I 2025-04-08 00:05:24,741] Trial 10 finished with value: 0.03314964207013362 and parameters: {'feature_selection__estimator__alpha': 0.0037684817816394447, 'svr__C': 0.20221197075250327, 'svr__gamma': 0.004154331402509373, 'svr__epsilon': 0.36699589964736223, 'svr__kernel': 'rbf'}. Best is trial 9 with value: 0.23967570063962648.


[I 2025-04-08 00:05:24,744] Trial 9 finished with value: 0.23967570063962648 and parameters: {'feature_selection__estimator__alpha': 0.03780858263494991, 'svr__C': 3.216414058214142, 'svr__gamma': 0.02015280842415415, 'svr__epsilon': 0.3933802075545844, 'svr__kernel': 'rbf'}. Best is trial 9 with value: 0.23967570063962648.


[I 2025-04-08 00:05:24,745] Trial 11 finished with value: 0.040166570861926015 and parameters: {'feature_selection__estimator__alpha': 0.21990266417926382, 'svr__C': 0.10782336135862733, 'svr__gamma': 0.025727799615687594, 'svr__epsilon': 0.37396411018472, 'svr__kernel': 'rbf'}. Best is trial 9 with value: 0.23967570063962648.


[I 2025-04-08 00:05:24,747] Trial 13 finished with value: 0.20664528510759445 and parameters: {'feature_selection__estimator__alpha': 0.10380050187259278, 'svr__C': 0.46547169656349857, 'svr__gamma': 0.1548109920146894, 'svr__epsilon': 0.4238088779454849, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:24,748] Trial 8 finished with value: -0.09174751389398582 and parameters: {'feature_selection__estimator__alpha': 0.0037054607463817032, 'svr__C': 1.9652051920551181, 'svr__gamma': 4.53243565391879, 'svr__epsilon': 0.309867816201157, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:24,748] Trial 15 finished with value: 0.24813142964499768 and parameters: {'feature_selection__estimator__alpha': 0.029180069648507622, 'svr__C': 3.3501215652655776, 'svr__gamma': 0.014582564495148213, 'svr__epsilon': 0.019131771093601813, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:24,913] Trial 14 finished with value: 0.07085688859731416 and parameters: {'feature_selection__estimator__alpha': 0.001504318105098105, 'svr__C': 0.6702230962094851, 'svr__gamma': 0.0013330296953376986, 'svr__epsilon': 0.36705825984670837, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:26,206] Trial 22 finished with value: 0.12066043432393472 and parameters: {'feature_selection__estimator__alpha': 0.02662016442056353, 'svr__C': 8.807551331561898, 'svr__gamma': 0.02430895585054657, 'svr__epsilon': 0.21077565069791157, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:26,231] Trial 19 finished with value: 0.0994799364731748 and parameters: {'feature_selection__estimator__alpha': 0.028931530342246357, 'svr__C': 12.050267038210984, 'svr__gamma': 0.020157359085203573, 'svr__epsilon': 0.20983651645547946, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:26,274] Trial 21 finished with value: 0.09179494065604067 and parameters: {'feature_selection__estimator__alpha': 0.02499717645190666, 'svr__C': 8.08921967460057, 'svr__gamma': 0.03446837691909716, 'svr__epsilon': 0.22580024675860394, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:26,316] Trial 20 finished with value: 0.07936107117961069 and parameters: {'feature_selection__estimator__alpha': 0.030017395466951653, 'svr__C': 9.570518692214351, 'svr__gamma': 0.029128575421011074, 'svr__epsilon': 0.22710176477694538, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:26,353] Trial 17 finished with value: 0.16683164500480865 and parameters: {'feature_selection__estimator__alpha': 0.02779943884194328, 'svr__C': 9.139544691143046, 'svr__gamma': 0.016750903171833136, 'svr__epsilon': 0.21043511425091635, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:26,393] Trial 16 finished with value: 0.1169361660014255 and parameters: {'feature_selection__estimator__alpha': 0.034498631398440456, 'svr__C': 6.870382157051065, 'svr__gamma': 0.035507866580886664, 'svr__epsilon': 0.204998866006327, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:26,426] Trial 18 finished with value: 0.07447810819320785 and parameters: {'feature_selection__estimator__alpha': 0.03241731969673402, 'svr__C': 7.0705970211680365, 'svr__gamma': 0.03707854800412228, 'svr__epsilon': 0.22080440642013632, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:26,468] Trial 23 finished with value: 0.086978941471093 and parameters: {'feature_selection__estimator__alpha': 0.03462342552761465, 'svr__C': 8.347471817977937, 'svr__gamma': 0.03580585578840131, 'svr__epsilon': 0.24286501528984078, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:27,697] Trial 24 finished with value: -0.044757970620684484 and parameters: {'feature_selection__estimator__alpha': 0.045745411829118406, 'svr__C': 6.396223280994758, 'svr__gamma': 0.8944596507968421, 'svr__epsilon': 0.09570340964512325, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:27,706] Trial 27 finished with value: 0.13888917160906505 and parameters: {'feature_selection__estimator__alpha': 0.05281763031314678, 'svr__C': 3.370138900861989, 'svr__gamma': 0.11204106870121136, 'svr__epsilon': 0.42657118562754537, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:27,825] Trial 26 finished with value: -0.029592929527416777 and parameters: {'feature_selection__estimator__alpha': 0.007901307692975533, 'svr__C': 2.756898529577531, 'svr__gamma': 0.19394618058475419, 'svr__epsilon': 0.07653647889895913, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:27,883] Trial 28 finished with value: 0.14985733520441513 and parameters: {'feature_selection__estimator__alpha': 0.054310942888939936, 'svr__C': 0.6234156392753779, 'svr__gamma': 0.17892644443825365, 'svr__epsilon': 0.42774735986429124, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:27,913] Trial 25 finished with value: 0.022229317160861416 and parameters: {'feature_selection__estimator__alpha': 0.044879678630118665, 'svr__C': 3.7462310488115573, 'svr__gamma': 0.4586000698610211, 'svr__epsilon': 0.08338660230831946, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:27,954] Trial 30 finished with value: 0.1299121178772467 and parameters: {'feature_selection__estimator__alpha': 0.05943082406850078, 'svr__C': 3.0083484155748046, 'svr__gamma': 0.15143923145328678, 'svr__epsilon': 0.4225602002740639, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:28,029] Trial 31 finished with value: -0.0041000422087891395 and parameters: {'feature_selection__estimator__alpha': 0.06018087637854856, 'svr__C': 0.04845435929722613, 'svr__gamma': 0.16337658704243593, 'svr__epsilon': 0.4237618806885994, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:28,030] Trial 29 finished with value: 0.16145925035671843 and parameters: {'feature_selection__estimator__alpha': 0.054525724059805665, 'svr__C': 0.5927568439821541, 'svr__gamma': 0.15854730247932283, 'svr__epsilon': 0.4311259521101842, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:29,207] Trial 34 finished with value: -0.09431939465494305 and parameters: {'feature_selection__estimator__alpha': 0.009542808798699464, 'svr__C': 0.7449534857394822, 'svr__gamma': 0.5869951485530753, 'svr__epsilon': 0.49162760767708097, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:29,251] Trial 33 finished with value: -0.07088434190764 and parameters: {'feature_selection__estimator__alpha': 0.007521210130757491, 'svr__C': 0.6273297180493963, 'svr__gamma': 0.2255550187550141, 'svr__epsilon': 0.42160755019881013, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:29,285] Trial 32 finished with value: -0.08667758717033537 and parameters: {'feature_selection__estimator__alpha': 0.009348066113916477, 'svr__C': 0.6455952103772098, 'svr__gamma': 0.3338904669169455, 'svr__epsilon': 0.4899863253264874, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:29,402] Trial 35 finished with value: -0.010552177750852371 and parameters: {'feature_selection__estimator__alpha': 0.13309996094210008, 'svr__C': 27.56945384530035, 'svr__gamma': 0.4109789561367606, 'svr__epsilon': 0.4984488977293095, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:29,446] Trial 37 finished with value: -0.09624844033482798 and parameters: {'feature_selection__estimator__alpha': 0.01581116471211229, 'svr__C': 0.03985312979013148, 'svr__gamma': 0.6800644656533112, 'svr__epsilon': 0.49130496152675573, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:29,485] Trial 36 finished with value: -0.060014579988538874 and parameters: {'feature_selection__estimator__alpha': 0.013908790607415983, 'svr__C': 0.03981063881246289, 'svr__gamma': 0.06648613048059575, 'svr__epsilon': 0.31366048093009385, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:29,639] Trial 39 finished with value: 0.08708090864220011 and parameters: {'feature_selection__estimator__alpha': 0.10534881027285649, 'svr__C': 0.985973461930203, 'svr__gamma': 0.001036767157512272, 'svr__epsilon': 0.32143774674380254, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:29,654] Trial 38 finished with value: 0.12090699650603406 and parameters: {'feature_selection__estimator__alpha': 0.09955374657478798, 'svr__C': 1.0692822091158964, 'svr__gamma': 0.0010648735627785525, 'svr__epsilon': 0.11734533844660036, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:30,666] Trial 41 finished with value: 0.19530469404800815 and parameters: {'feature_selection__estimator__alpha': 0.10069853005290456, 'svr__C': 1.5435268724683835, 'svr__gamma': 0.0014994514174858514, 'svr__epsilon': 0.14708253830911788, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:30,788] Trial 42 finished with value: 0.17537135808482485 and parameters: {'feature_selection__estimator__alpha': 0.11714673970732889, 'svr__C': 1.4917289823291424, 'svr__gamma': 0.0012442607737887202, 'svr__epsilon': 0.1421480140930868, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:30,865] Trial 43 finished with value: 0.1876198023532125 and parameters: {'feature_selection__estimator__alpha': 0.0971358775354925, 'svr__C': 1.4215750005929106, 'svr__gamma': 0.0014659491386802789, 'svr__epsilon': 0.14973886467247258, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:30,896] Trial 40 finished with value: 0.17354517965641206 and parameters: {'feature_selection__estimator__alpha': 0.09958511802224665, 'svr__C': 1.5193641913966716, 'svr__gamma': 0.0011186558748626397, 'svr__epsilon': 0.1545057289424658, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:31,066] Trial 44 finished with value: 0.19018887378139557 and parameters: {'feature_selection__estimator__alpha': 0.08961906625623337, 'svr__C': 1.9313568001465566, 'svr__gamma': 0.001076112238131948, 'svr__epsilon': 0.140821644174219, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:31,193] Trial 45 finished with value: 0.15378341473790386 and parameters: {'feature_selection__estimator__alpha': 0.09113852754282209, 'svr__C': 1.3347103630138035, 'svr__gamma': 0.0010439104662512155, 'svr__epsilon': 0.15505812747692033, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:31,329] Trial 47 finished with value: 0.02632813303621969 and parameters: {'feature_selection__estimator__alpha': 0.19099641027255443, 'svr__C': 0.3400610368611258, 'svr__gamma': 0.002835418281603581, 'svr__epsilon': 0.1673990843909, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:31,412] Trial 46 finished with value: 0.015401058443459532 and parameters: {'feature_selection__estimator__alpha': 0.1850520005230049, 'svr__C': 0.32353786337457957, 'svr__gamma': 0.0026038387034506096, 'svr__epsilon': 0.14587291307978373, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:32,290] Trial 48 finished with value: 0.009672807003590333 and parameters: {'feature_selection__estimator__alpha': 0.1798435786245101, 'svr__C': 0.3162425920944247, 'svr__gamma': 0.0023860408357293957, 'svr__epsilon': 0.1496424992902157, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:32,347] Trial 50 finished with value: 0.01708960159170453 and parameters: {'feature_selection__estimator__alpha': 0.18095598047387046, 'svr__C': 0.29382179576781253, 'svr__gamma': 0.0024515454944440563, 'svr__epsilon': 0.0525798412202088, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:32,400] Trial 49 finished with value: 0.01645283691697492 and parameters: {'feature_selection__estimator__alpha': 0.20147725516727719, 'svr__C': 0.32887987054321965, 'svr__gamma': 0.002628409570782439, 'svr__epsilon': 0.03229167597992828, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:32,501] Trial 51 finished with value: 0.02622320559174061 and parameters: {'feature_selection__estimator__alpha': 0.186132224151453, 'svr__C': 0.30296134743639236, 'svr__gamma': 0.00268483888163982, 'svr__epsilon': 0.01231661497583697, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:32,580] Trial 52 finished with value: 0.038003450644577004 and parameters: {'feature_selection__estimator__alpha': 0.17340389398103193, 'svr__C': 0.3701019456840213, 'svr__gamma': 0.002570017437270471, 'svr__epsilon': 0.045231493761231756, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.


[I 2025-04-08 00:05:32,670] Trial 53 finished with value: 0.0342698704016422 and parameters: {'feature_selection__estimator__alpha': 0.19704098421182073, 'svr__C': 0.32594271419969106, 'svr__gamma': 0.0030873556323624306, 'svr__epsilon': 0.021773643914685226, 'svr__kernel': 'rbf'}. Best is trial 15 with value: 0.24813142964499768.




[I 2025-04-08 00:05:32,833] Trial 55 finished with value: 0.2510909691907664 and parameters: {'feature_selection__estimator__alpha': 0.0197063391009616, 'svr__C': 4.530937755553478, 'svr__gamma': 0.007631913870493268, 'svr__epsilon': 0.03461905964980379, 'svr__kernel': 'rbf'}. Best is trial 55 with value: 0.2510909691907664.


[I 2025-04-08 00:05:32,835] Trial 54 finished with value: 0.12136625017057981 and parameters: {'feature_selection__estimator__alpha': 0.17810464549979318, 'svr__C': 0.383893041574283, 'svr__gamma': 0.008115564397631737, 'svr__epsilon': 0.04675648267018705, 'svr__kernel': 'rbf'}. Best is trial 55 with value: 0.2510909691907664.










[I 2025-04-08 00:05:34,114] Trial 56 finished with value: 0.2817477904884497 and parameters: {'feature_selection__estimator__alpha': 0.07682219440992656, 'svr__C': 4.725180690326845, 'svr__gamma': 0.007543479148510431, 'svr__epsilon': 0.027975528665009075, 'svr__kernel': 'rbf'}. Best is trial 56 with value: 0.2817477904884497.


[I 2025-04-08 00:05:34,232] Trial 61 finished with value: 0.30570881443235776 and parameters: {'feature_selection__estimator__alpha': 0.07713372658882832, 'svr__C': 4.375518726867743, 'svr__gamma': 0.006825349238374233, 'svr__epsilon': 0.18432927654425854, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


6 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt

[W 2025-04-08 00:05:34,273] Trial 57 failed with parameters: {'feature_selection__estimator__alpha': 0.2930495293836648, 'svr__C': 4.837055808614691, 'svr__gamma': 0.0070892886715310036, 'svr__epsilon': 0.016457953705170725, 'svr__kernel': 'rbf'} because of the following error: The value nan is not acceptable.


[W 2025-04-08 00:05:34,280] Trial 58 failed with value np.float64(nan).


[W 2025-04-08 00:05:34,292] Trial 57 failed with value np.float64(nan).


[I 2025-04-08 00:05:34,313] Trial 59 finished with value: 0.2834026481638394 and parameters: {'feature_selection__estimator__alpha': 0.08162301885522416, 'svr__C': 2.3045320217986376, 'svr__gamma': 0.006875419480122634, 'svr__epsilon': 0.18625994167143162, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:34,379] Trial 60 finished with value: 0.30365965096360154 and parameters: {'feature_selection__estimator__alpha': 0.07553664590344296, 'svr__C': 4.717220948326764, 'svr__gamma': 0.006561565010177305, 'svr__epsilon': 0.1865318660313911, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


2 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt

[W 2025-04-08 00:05:34,592] Trial 62 failed with value np.float64(nan).


[I 2025-04-08 00:05:34,678] Trial 63 finished with value: 0.27290010693418043 and parameters: {'feature_selection__estimator__alpha': 0.07021328869779225, 'svr__C': 5.397060965379826, 'svr__gamma': 0.005578736064267427, 'svr__epsilon': 0.3899515517495442, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:35,721] Trial 64 finished with value: 0.09794048728516683 and parameters: {'feature_selection__estimator__alpha': 0.019494206654298463, 'svr__C': 16.470312691686036, 'svr__gamma': 0.012612081026344927, 'svr__epsilon': 0.3943287414347365, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:35,881] Trial 65 finished with value: 0.24634366696464416 and parameters: {'feature_selection__estimator__alpha': 0.021475112115222572, 'svr__C': 4.714999835653949, 'svr__gamma': 0.0059163630164957735, 'svr__epsilon': 0.39321747614848, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:35,908] Trial 67 finished with value: 0.26410856149131695 and parameters: {'feature_selection__estimator__alpha': 0.07190174051929955, 'svr__C': 4.761774064014571, 'svr__gamma': 0.014362728961689065, 'svr__epsilon': 0.1884309659852224, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:35,922] Trial 69 finished with value: 0.2666115615108647 and parameters: {'feature_selection__estimator__alpha': 0.02215252452907048, 'svr__C': 4.8213815974182115, 'svr__gamma': 0.005861711813791785, 'svr__epsilon': 0.2744635682633617, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:35,984] Trial 66 finished with value: 0.23004278021075136 and parameters: {'feature_selection__estimator__alpha': 0.019216654782632084, 'svr__C': 4.993786680294791, 'svr__gamma': 0.012738223722114791, 'svr__epsilon': 0.18308182752363505, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:36,061] Trial 68 finished with value: 0.2627676082636186 and parameters: {'feature_selection__estimator__alpha': 0.022094313789706093, 'svr__C': 4.57525406294619, 'svr__gamma': 0.005077554314433621, 'svr__epsilon': 0.2860972266850448, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:36,294] Trial 70 finished with value: 0.1883874533059792 and parameters: {'feature_selection__estimator__alpha': 0.019030587904388722, 'svr__C': 16.567684891836844, 'svr__gamma': 0.005376121325538222, 'svr__epsilon': 0.28823678084673093, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:36,333] Trial 71 finished with value: 0.2934418824182692 and parameters: {'feature_selection__estimator__alpha': 0.07917583005185624, 'svr__C': 4.407696879172425, 'svr__gamma': 0.005017858889521863, 'svr__epsilon': 0.27531114169496435, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.








6 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt

[W 2025-04-08 00:05:37,365] Trial 72 failed with value np.float64(nan).


[I 2025-04-08 00:05:37,619] Trial 75 finished with value: 0.2511201071140397 and parameters: {'feature_selection__estimator__alpha': 0.07164320940144157, 'svr__C': 27.166557383644264, 'svr__gamma': 0.0050381837743218895, 'svr__epsilon': 0.27716444539114127, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


3 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt

[W 2025-04-08 00:05:37,640] Trial 74 failed with parameters: {'feature_selection__estimator__alpha': 0.2828615959279588, 'svr__C': 13.966284036312928, 'svr__gamma': 0.004734292474075097, 'svr__epsilon': 0.18875380103201195, 'svr__kernel': 'rbf'} because of the following error: The value nan is not acceptable.


[W 2025-04-08 00:05:37,700] Trial 74 failed with value np.float64(nan).


[I 2025-04-08 00:05:37,762] Trial 76 finished with value: 0.28186653030505815 and parameters: {'feature_selection__estimator__alpha': 0.07553381110865022, 'svr__C': 14.004819625729544, 'svr__gamma': 0.005228390165187907, 'svr__epsilon': 0.271072066290216, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:37,856] Trial 77 finished with value: 0.17975583064815143 and parameters: {'feature_selection__estimator__alpha': 0.07663133952817296, 'svr__C': 32.123667118924224, 'svr__gamma': 0.0099562592033595, 'svr__epsilon': 0.25927890441613954, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:37,947] Trial 78 finished with value: 0.13976763138577009 and parameters: {'feature_selection__estimator__alpha': 0.07996781315328097, 'svr__C': 39.28921020916982, 'svr__gamma': 0.01064577082996104, 'svr__epsilon': 0.26450029623131166, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:38,080] Trial 79 finished with value: 0.29630604590948273 and parameters: {'feature_selection__estimator__alpha': 0.07149260157410696, 'svr__C': 2.372073080890454, 'svr__gamma': 0.009671783013023525, 'svr__epsilon': 0.25761671333313446, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.




[I 2025-04-08 00:05:38,840] Trial 80 finished with value: 0.2803458858858944 and parameters: {'feature_selection__estimator__alpha': 0.07417360119825951, 'svr__C': 2.3968044639457755, 'svr__gamma': 0.004043832418116612, 'svr__epsilon': 0.26162220568962696, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


3 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt

[W 2025-04-08 00:05:39,140] Trial 81 failed with value np.float64(nan).


[I 2025-04-08 00:05:39,162] Trial 82 finished with value: 0.3018474147550757 and parameters: {'feature_selection__estimator__alpha': 0.04265675572707276, 'svr__C': 2.189026705966331, 'svr__gamma': 0.009730184540539531, 'svr__epsilon': 0.24320708527571178, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:39,277] Trial 83 finished with value: 0.3005818031157156 and parameters: {'feature_selection__estimator__alpha': 0.07534081509959387, 'svr__C': 2.1758219184704553, 'svr__gamma': 0.008333656583613442, 'svr__epsilon': 0.25675368840014695, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:39,332] Trial 84 finished with value: 0.30491695457311707 and parameters: {'feature_selection__estimator__alpha': 0.04387980923871652, 'svr__C': 2.2698223178676824, 'svr__gamma': 0.009985836703482742, 'svr__epsilon': 0.250586688529689, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


3 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt

[I 2025-04-08 00:05:39,448] Trial 86 finished with value: -0.09178092969361695 and parameters: {'feature_selection__estimator__alpha': 0.0412768633395143, 'svr__C': 11.242730334808844, 'svr__gamma': 8.423844214470174, 'svr__epsilon': 0.3413254420263029, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[W 2025-04-08 00:05:39,456] Trial 85 failed with value np.float64(nan).




[I 2025-04-08 00:05:39,635] Trial 87 finished with value: 0.2825017393576009 and parameters: {'feature_selection__estimator__alpha': 0.04332228999240127, 'svr__C': 2.3564707089269366, 'svr__gamma': 0.003793760289603059, 'svr__epsilon': 0.24061696364316953, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.












[I 2025-04-08 00:05:40,459] Trial 88 finished with value: 0.26866190787824024 and parameters: {'feature_selection__estimator__alpha': 0.04191942385718349, 'svr__C': 13.16522662861538, 'svr__gamma': 0.007404050669460779, 'svr__epsilon': 0.32812166014249344, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


3 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
3 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt

[W 2025-04-08 00:05:40,609] Trial 91 failed with value np.float64(nan).
6 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages

[W 2025-04-08 00:05:40,623] Trial 89 failed with parameters: {'feature_selection__estimator__alpha': 0.29525170765975306, 'svr__C': 13.16990675494826, 'svr__gamma': 0.017835710015006997, 'svr__epsilon': 0.2390220929083243, 'svr__kernel': 'rbf'} because of the following error: The value nan is not acceptable.


[W 2025-04-08 00:05:40,653] Trial 89 failed with value np.float64(nan).


6 fits failed out of a total of 20.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
1 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/Users/bassel27/personal_projects/hireverse/venv/lib/python3.13/site-packages/sklearn/pipeline.py", line 662, in fit
    self._final_estimator.fit(Xt

[W 2025-04-08 00:05:40,727] Trial 90 failed with value np.float64(nan).


[I 2025-04-08 00:05:40,885] Trial 92 finished with value: 0.20543874337313656 and parameters: {'feature_selection__estimator__alpha': 0.04296840043483186, 'svr__C': 3.2112614197475184, 'svr__gamma': 0.051967354161672974, 'svr__epsilon': 0.243429701209381, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:40,959] Trial 94 finished with value: 0.1653882106937529 and parameters: {'feature_selection__estimator__alpha': 0.14861374855348825, 'svr__C': 3.3735169687416757, 'svr__gamma': 0.0228148330188134, 'svr__epsilon': 0.24889141340279808, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:40,974] Trial 93 finished with value: 0.16804261126411685 and parameters: {'feature_selection__estimator__alpha': 0.13525851162877256, 'svr__C': 2.8155146004056992, 'svr__gamma': 0.05196739311818385, 'svr__epsilon': 0.24467260684806472, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:41,170] Trial 95 finished with value: 0.19219677465902313 and parameters: {'feature_selection__estimator__alpha': 0.1485050679627278, 'svr__C': 2.1092073762460193, 'svr__gamma': 0.018928614842857506, 'svr__epsilon': 0.19544307813965472, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:41,470] Trial 96 finished with value: 0.18657713811276241 and parameters: {'feature_selection__estimator__alpha': 0.14595531445201756, 'svr__C': 2.1110299188622093, 'svr__gamma': 0.022388572338239248, 'svr__epsilon': 0.1954891573133331, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:41,568] Trial 97 finished with value: 0.2191371144102261 and parameters: {'feature_selection__estimator__alpha': 0.13602360217037018, 'svr__C': 2.0298388260631204, 'svr__gamma': 0.01770428709770485, 'svr__epsilon': 0.24204094467618115, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:41,590] Trial 98 finished with value: 0.21626138236573972 and parameters: {'feature_selection__estimator__alpha': 0.13525917464601594, 'svr__C': 2.0439741064374664, 'svr__gamma': 0.020073381601721365, 'svr__epsilon': 0.19831354214179459, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


[I 2025-04-08 00:05:41,596] Trial 99 finished with value: 0.16921452477697213 and parameters: {'feature_selection__estimator__alpha': 0.14848700701355116, 'svr__C': 3.296925631774624, 'svr__gamma': 0.021407455933109498, 'svr__epsilon': 0.19511011967228536, 'svr__kernel': 'rbf'}. Best is trial 61 with value: 0.30570881443235776.


Best hyperparameters: {'feature_selection__estimator__alpha': 0.07713372658882832, 'svr__C': 4.375518726867743, 'svr__gamma': 0.006825349238374233, 'svr__epsilon': 0.18432927654425854, 'svr__kernel': 'rbf'}
Best R² score: 0.3057


## Store Trained Model

In [8]:
from sklearn import clone

if HYPERPARAMETER_TUNING_ENABLED:
    unfitted_pipeline.set_params(**study.best_params)
else:
    unfitted_pipeline.set_params(**PIPELINE_PARAMS)
fitted_pipeline = clone(unfitted_pipeline)
fitted_pipeline.fit(X, y)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



## Feature Selection Results

In [9]:
preprocessor = fitted_pipeline.named_steps['preprocessor']
feature_names = preprocessor.get_feature_names_out()    # after preprocessing

feature_selector = fitted_pipeline.named_steps['feature_selection']
selected_mask = feature_selector.get_support()

selected_features = feature_names[selected_mask]
unselected_features = feature_names[~selected_mask]

print(f"Number of Selected features ({len(selected_features)}):")
print(f"Selected features: {selected_features}")
print(f"Unselected features: {unselected_features}")

Number of Selected features (8):
Selected features: ['remainder__intensity_mean' 'remainder__f3_sd' 'remainder__f2_f1_mean'
 'remainder__percent_unvoiced' 'remainder__percent_breaks'
 'remainder__Duration/Total Words' 'remainder__They'
 'remainder__Cognitive']
Unselected features: ['remainder__f0_mean' 'remainder__f0_min' 'remainder__f0_max'
 'remainder__f0_range' 'remainder__f0_sd' 'remainder__intensity_min'
 'remainder__intensity_max' 'remainder__intensity_range'
 'remainder__intensity_sd' 'remainder__f1_mean' 'remainder__f1_sd'
 'remainder__f2_mean' 'remainder__f2_sd' 'remainder__f3_mean'
 'remainder__f3_f1_mean' 'remainder__f2_f1_sd' 'remainder__f3_f1_sd'
 'remainder__jitter' 'remainder__shimmer' 'remainder__pause_duration_max'
 'remainder__pause_duration_avg' 'remainder__duration'
 'remainder__Total Words' 'remainder__Unique Words'
 'remainder__Filler Words' 'remainder__Audio Duration (s)'
 'remainder__Duration/Unique Words' 'remainder__Duration/Filler Words'
 'remainder__Individu

# Monte Carlo Cross Validation

In [10]:
from sklearn.model_selection import cross_validate
from sklearn.metrics import r2_score

scoring = {
    "r2": make_scorer(r2_score),
    "pearson": make_scorer(pearson_corr)
}

results = cross_validate(
    unfitted_pipeline,
    X,
    y,
    cv=GroupShuffleSplit(n_splits=1000, test_size=0.2, random_state=42),
    groups=groups_column,
    scoring=scoring,
    n_jobs=-1,
)

r2_scores = results["test_r2"]
pearson_scores = results["test_pearson"]
# Report the average performance and variability
avg_r2_score = np.mean(r2_scores)
print(
    f"Mean R² Score: {avg_r2_score:.2f} (±{np.std(r2_scores):.2f})"
)
avg_perason_score = np.mean(pearson_scores)
print(
    f"Mean Pearson Correlation: {avg_perason_score:.2f} (±{np.std(pearson_scores):.2f})"
)

# Mean R² Score: 0.20 (±0.23)
# Mean Pearson Correlation: 0.53 (±0.16)

Traceback (most recent call last):


Traceback (most recent call last):


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 422, in _on_run
    cmd.send(self.sock)
    ~~~~~~~~^^^^^^^^^^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 422, in _on_run
    cmd.send(self.sock)
    ~~~~~~~~^^^^^^^^^^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command.py", line 111, in send
    sock.sendall(("Content-Length: %s\r\n\r\n" % len(as_bytes)).encode("ascii"))
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command.py", line 111, in send
    sock.sendall(("Content-Length: %s\r\n\r\n" % len(as_bytes)).encode("ascii"))
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


OSError: [Errno 9] Bad file descriptor


OSError: [Errno 9] Bad file descriptor



During handling of the above exception, another exception occurred:




During handling of the above exception, another exception occurred:



Traceback (most recent call last):


Traceback (most recent call last):


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_daemon_thread.py", line 53, in run
    self._on_run()
    ~~~~~~~~~~~~^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 432, in _on_run
    self.py_db.dispose_and_kill_all_pydevd_threads()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/pydevd.py", line 2558, in dispose_and_kill_all_pydevd_threads
    self._client_socket.close()
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_daemon_thread.py", line 53, in run
    self._on_run()
    ~~~~~~~~~~~~^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 432, in _on_run
    self.py_db.dispose_and_kill_all_pydevd_threads()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 505, in close
    self._real_close()
    ~~~~~~~~~~~~~~~~^^


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 499, in _real_close
    _ss.close(self)
    ~~~~~~~~~^^^^^^


Traceback (most recent call last):


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/pydevd.py", line 2558, in dispose_and_kill_all_pydevd_threads
    self._client_socket.close()
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 505, in close
    self._real_close()
    ~~~~~~~~~~~~~~~~^^


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 499, in _real_close
    _ss.close(self)
    ~~~~~~~~~^^^^^^


OSError: [Errno 9] Bad file descriptor


OSError: [Errno 9] Bad file descriptor


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 422, in _on_run
    cmd.send(self.sock)
    ~~~~~~~~^^^^^^^^^^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command.py", line 111, in send
    sock.sendall(("Content-Length: %s\r\n\r\n" % len(as_bytes)).encode("ascii"))
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


Traceback (most recent call last):


OSError: [Errno 9] Bad file descriptor



During handling of the above exception, another exception occurred:



  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 422, in _on_run
    cmd.send(self.sock)
    ~~~~~~~~^^^^^^^^^^^


Traceback (most recent call last):


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command.py", line 111, in send
    sock.sendall(("Content-Length: %s\r\n\r\n" % len(as_bytes)).encode("ascii"))
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_daemon_thread.py", line 53, in run
    self._on_run()
    ~~~~~~~~~~~~^^


OSError: [Errno 9] Bad file descriptor


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 432, in _on_run
    self.py_db.dispose_and_kill_all_pydevd_threads()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^



During handling of the above exception, another exception occurred:



  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/pydevd.py", line 2558, in dispose_and_kill_all_pydevd_threads
    self._client_socket.close()
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^


Traceback (most recent call last):


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 505, in close
    self._real_close()
    ~~~~~~~~~~~~~~~~^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_daemon_thread.py", line 53, in run
    self._on_run()
    ~~~~~~~~~~~~^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 432, in _on_run
    self.py_db.dispose_and_kill_all_pydevd_threads()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 499, in _real_close
    _ss.close(self)
    ~~~~~~~~~^^^^^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/pydevd.py", line 2558, in dispose_and_kill_all_pydevd_threads
    self._client_socket.close()
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^


OSError: [Errno 9] Bad file descriptor


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 505, in close
    self._real_close()
    ~~~~~~~~~~~~~~~~^^


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 499, in _real_close
    _ss.close(self)
    ~~~~~~~~~^^^^^^


OSError: [Errno 9] Bad file descriptor


Traceback (most recent call last):


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 422, in _on_run
    cmd.send(self.sock)
    ~~~~~~~~^^^^^^^^^^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_net_command.py", line 111, in send
    sock.sendall(("Content-Length: %s\r\n\r\n" % len(as_bytes)).encode("ascii"))
    ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


OSError: [Errno 9] Bad file descriptor



During handling of the above exception, another exception occurred:



Traceback (most recent call last):


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_daemon_thread.py", line 53, in run
    self._on_run()
    ~~~~~~~~~~~~^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/_pydevd_bundle/pydevd_comm.py", line 432, in _on_run
    self.py_db.dispose_and_kill_all_pydevd_threads()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^


  File "/Users/bassel27/.vscode/extensions/ms-python.debugpy-2025.6.0-darwin-arm64/bundled/libs/debugpy/_vendored/pydevd/pydevd.py", line 2558, in dispose_and_kill_all_pydevd_threads
    self._client_socket.close()
    ~~~~~~~~~~~~~~~~~~~~~~~~~^^


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 505, in close
    self._real_close()
    ~~~~~~~~~~~~~~~~^^


  File "/opt/homebrew/Cellar/python@3.13/3.13.1/Frameworks/Python.framework/Versions/3.13/lib/python3.13/socket.py", line 499, in _real_close
    _ss.close(self)
    ~~~~~~~~~^^^^^^


OSError: [Errno 9] Bad file descriptor


Mean R² Score: 0.20 (±0.28)
Mean Pearson Correlation: 0.54 (±0.16)


# Save the Model

In [11]:
from joblib import dump, load

dump(fitted_pipeline, os.path.join(SAVED_MODELS_PATH,f'{target_column}.joblib'))

['./regression_models/saved_models/RecommendHiring.joblib']