In [2]:
import numpy as np
import pandas as pd



ESS_data = pd.read_csv("C:/Users/grego/Desktop/wta/ESS11.csv", low_memory=False)


All subsequently steps are repeated from the main file and their role is to prepare data for modelling. 

In [3]:
df = ESS_data[[
    "imsmetn", "imdfetn", "impcntr",
"ipcrtiva","impricha","ipeqopta","ipshabta",
"impsafea","impdiffa","ipfrulea","ipudrsta","ipmodsta","ipgdtima",
"impfreea","iphlppla","ipsucesa","ipstrgva","ipadvnta",
"ipbhprpa","iprspota","iplylfra","impenva","imptrada","impfuna",


    "gndr", "brncntr", "maritalb", "domicil",
    "eisced", "hincfel", "pdwrk",
    "agea", "lrscale", "rlgdgr", "hhmmb",
    "polintr",
    
    "trstprl", "trstlgl", "trstplc", "trstplt", "trstprt", "trstep", "trstun", "ppltrst",
    "feethngr", "cntry"
]].copy()


In [4]:
# === Questions needed to calculate the target variable ===
#imsmetn: Now, using this card, to what extent do you think [country] should allow people of the same race or ethnic group as most [country]'s people to come and live here?
#imdfetn: How about people of a different race or ethnic group from most [country] people?
#impcntr: How about people from the poorer countries outside Europe?

#1	Allow many to come and live here
#2	Allow some
#3	Allow a few
#4	Allow none
#7	Refusal*
#8	Don't know*
#9	No answer*
#*) Missing Value - all the questions with '*' are considered missing values according to ESS codebook

#unique values
print("Unique values in imsmetn:", df["imsmetn"].unique())
print("Unique values in imdfetn:", df["imdfetn"].unique())
print("Unique values in impcntr:", df["impcntr"].unique())

#changing invalid responses to None
cols = ["imsmetn", "imdfetn", "impcntr"]
df[cols] = df[cols].replace([7, 8, 9, "7", "8", "9"], None)
print('====================')
print("Unique values in imsmetn:", df["imsmetn"].unique())
print("Unique values in imdfetn:", df["imdfetn"].unique())
print("Unique values in impcntr:", df["impcntr"].unique())





Unique values in imsmetn: [2 1 4 3 7 8 9]
Unique values in imdfetn: [2 1 3 4 7 8 9]
Unique values in impcntr: [3 2 1 4 7 8 9]
Unique values in imsmetn: [2 1 4 3 None]
Unique values in imdfetn: [2 1 3 4 None]
Unique values in impcntr: [3 2 1 4 None]


In [5]:
#### EXPLORING PREDICTORS
## CATEGORICAL VARIABLES
#1 - gndr: Gender

## Checking unique values
print("Unique values in gndr before recoding:", df["gndr"].unique())

# Renaming column to Gender
df = df.rename(columns={"gndr": "Gender"})

# Replacing numeric codes with text labels
df["Gender"] = df["Gender"].replace({ 1: "Male", 2: "Female"})

# Converting to categorical data type
df["Gender"] = df["Gender"].astype("category")

# Checking values after recoding
print("Unique values in Gender after recoding:", df["Gender"].unique())


Unique values in gndr before recoding: [1 2]
Unique values in Gender after recoding: ['Male', 'Female']
Categories (2, object): ['Female', 'Male']


In [6]:
# cntry - Country code of respondent
# ==============================================================

# Checking unique country codes
print("Unique values in cntry:", df["cntry"].unique())
df = df.rename(columns={"cntry": "Country"})
df["Country"] = df["Country"].replace({"AT":"Austria","BE":"Belgium","BG":"Bulgaria","CH":"Switzerland","CY":"Cyprus","DE":"Germany","ES":"Spain",
                                       "FI":"Finland","FR":"France","GB":"United Kingdom","GR":"Greece","HR":"Croatia","HU":"Hungary","IE":"Ireland",
                                       "IL":"Israel","IS":"Iceland","IT":"Italy","LT":"Lithuania","LV":"Latvia","ME":"Montenegro","NL":"Netherlands",
                                       "NO":"Norway","PL":"Poland","PT":"Portugal","RS":"Serbia","SE":"Sweden","SI":"Slovenia","SK":"Slovakia"})

df = df[df["Country"] != "Israel"]




Unique values in cntry: ['AT' 'BE' 'BG' 'CH' 'CY' 'DE' 'ES' 'FI' 'FR' 'GB' 'GR' 'HR' 'HU' 'IE'
 'IL' 'IS' 'IT' 'LT' 'LV' 'ME' 'NL' 'NO' 'PL' 'PT' 'RS' 'SE' 'SI' 'SK']


In [7]:
#brncntr - Were you born in [country]?
# ============================================================
# 1 = Yes
# 2 = No
# 7 = Refusal*
# 8 = Don't know*
# 9 = No answer*

# Checking unique values
print("Unique values in brncntr before recoding:", df["brncntr"].unique())

df = df.rename(columns={"brncntr": "Born_in_Country"})

# Replacing numeric codes with descriptive labels
df["Born_in_Country"] = df["Born_in_Country"].replace({1:"Yes", 2:"No", 7:None, 8:None, 9:None})

# Converting to categorical data type
df["Born_in_Country"] = df["Born_in_Country"].astype("category")
# Checking values after recoding
print("Unique values in Born_in_Country after recoding:", df["Born_in_Country"].unique())


Unique values in brncntr before recoding: [1 2 7 8 9]
Unique values in Born_in_Country after recoding: ['Yes', 'No', NaN]
Categories (2, object): ['No', 'Yes']


In [8]:
# maritalb: Marital status

## Checking unique values
print("Unique values in maritalb before recoding:", df["maritalb"].unique())

# Renaming column to Marital_Status
df = df.rename(columns={"maritalb": "Marital_Status"})

# Replacing numeric codes with labels
df["Marital_Status"] = df["Marital_Status"].replace({
    1: "Legally married",
    2: "In a legally registered civil union",
    3: "Legally separated",
    4: "Legally divorced/Civil union dissolved",
    5: "Widowed/Civil partner died",
    6: "None of these (never married or in legally registered civil union)",
    77: None,  # Refusal
    88: None,  # Don't know
    99: None   # No answer
})

# Converting to categorical data type
df["Marital_Status"] = df["Marital_Status"].astype("category")

# Checking values after recoding
print("Unique values in Marital_Status after recoding:", df["Marital_Status"].unique())


Unique values in maritalb before recoding: [ 1  6  4  5  2 77 88 99  3]
Unique values in Marital_Status after recoding: ['Legally married', 'None of these (never married or in legally re..., 'Legally divorced/Civil union dissolved', 'Widowed/Civil partner died', 'In a legally registered civil union', NaN, 'Legally separated']
Categories (6, object): ['In a legally registered civil union', 'Legally divorced/Civil union dissolved', 'Legally married', 'Legally separated', 'None of these (never married or in legally re..., 'Widowed/Civil partner died']


In [9]:
# @title
#3 - domicil: Which phrase on this card best describes the area where you live?

## Checking unique values
print("Unique values in domicil before recoding:", df["domicil"].unique())

# Renaming column to Domicile
df = df.rename(columns={"domicil": "Domicile"})
# Replacing numeric codes with descriptive labels
df["Domicile"] = df["Domicile"].replace({
    1: "A big city",
    2: "Suburbs or outskirts of big city",
    3: "Town or small city",
    4: "Country village",
    5: "Farm or home in countryside",
    7: None,  # Refusal
    8: None,  # Don't know
    9: None   # No answer
})

# Converting to categorical data type
df["Domicile"] = df["Domicile"].astype("category")

# Checking values after recoding
print("Unique values in Domicile after recoding:", df["Domicile"].unique())


Unique values in domicil before recoding: [3 1 4 5 2 8 7 9]
Unique values in Domicile after recoding: ['Town or small city', 'A big city', 'Country village', 'Farm or home in countryside', 'Suburbs or outskirts of big city', NaN]
Categories (5, object): ['A big city', 'Country village', 'Farm or home in countryside', 'Suburbs or outskirts of big city', 'Town or small city']


In [10]:
# Eisced: Education level (ES-ISCED classification)
  #0:  "Not possible to harmonise into ES-ISCED",
  #1:  "ES-ISCED I, less than lower secondary",
  #2:  "ES-ISCED II, lower secondary",
  #3:  "ES-ISCED IIIb, lower tier upper secondary",
  #4:  "ES-ISCED IIIa, upper tier upper secondary",
  #5:  "ES-ISCED IV, advanced vocational, sub-degree",
  #6:  "ES-ISCED V1, lower tertiary education (BA level)",
  #7:  "ES-ISCED V2, higher tertiary education (≥ MA level)",
  #55: "Other"
  #77: Refusal*
  #88: Don't know*
  #99: No answer*

## Checking unique values
print("Unique values in eisced before recoding:", df["eisced"].unique())

# Renaming column to Education_Level
df = df.rename(columns={"eisced": "Education_Level"})

# Replacing numeric codes with descriptive labels and shorter names to better visualisation
df["Education_Level"] = df["Education_Level"].replace({
    0:  "Not possible to harmonise into ES-ISCED",
    1:  "Less than lower secondary",
    2:  "Lower secondary",
    3:  "Lower tier upper secondary",
    4:  "Upper tier upper secondary",
    5:  "Advanced vocational, sub-degree",
    6:  "Lower tertiary education (BA level)",
    7:  "Higher tertiary education (≥ MA level)",
    55: "Other",
    77: None,  # Refusal
    88: None,  # Don't know
    99: None   # No answer
})

# Converting to categorical data type
df["Education_Level"] = df["Education_Level"].astype("category")

# Checking values after recoding
print("Unique values in Education_Level after recoding:", df["Education_Level"].unique())

#no single instance of "Not possible to harmonise into ES-ISCED"


Unique values in eisced before recoding: [ 3  5  6  4  2  7 55  1 77 88 99]
Unique values in Education_Level after recoding: ['Lower tier upper secondary', 'Advanced vocational, sub-degree', 'Lower tertiary education (BA level)', 'Upper tier upper secondary', 'Lower secondary', 'Higher tertiary education (≥ MA level)', 'Other', 'Less than lower secondary', NaN]
Categories (8, object): ['Advanced vocational, sub-degree', 'Higher tertiary education (≥ MA level)', 'Less than lower secondary', 'Lower secondary', 'Lower tertiary education (BA level)', 'Lower tier upper secondary', 'Other', 'Upper tier upper secondary']


In [11]:
#6 - pdwrk: Doing last 7 days - paid work

# Checking unique values
print("Unique values in pdwrk before recoding:", df["pdwrk"].unique())
# Renaming column to Working
df = df.rename(columns={"pdwrk": "Working"})
# Replacing numeric codes with descriptive labels
df["Working"] = df["Working"].replace({0:"No", 1:"Yes"})
# Converting to categorical data type
df["Working"] = df["Working"].astype("category")

# Checking values after recoding
print("Unique values in Working after recoding:", df["Working"].unique())


Unique values in pdwrk before recoding: [0 1]
Unique values in Working after recoding: ['No', 'Yes']
Categories (2, object): ['No', 'Yes']


In [None]:
# HUMAN VALUES VARIABLES
# ipeqopta  - Important that people are treated equally and have equal opportunities
# ipudrsta  - Important to understand different people
# impenva   - Important to care for nature and environment
# iphlppla  - Important to help people and care for others’ well-being
# iplylfra  - Important to be loyal to friends and devote to people close
# ipmodsta  - Important to be humble and modest, not draw attention
# imptrada  - Important to follow traditions and customs
# ipbhprpa  - Important to behave properly
# ipfrulea  - Important to do what is told and follow rules
# impsafea  - Important to live in secure and safe surroundings
# ipstrgva  - Important that government is strong and ensures safety
 #
 # 
# "ipcrtiva",  # Important to think new ideas / be creative
   # "impricha",  # Important to be rich
   # "ipshabta",  # Important to show abilities and be admired
  #  "impdiffa",  # Important to try new and different things
  #  "ipgdtima",  # Important to have a good time
  #  "impfreea",  # Important to make own decisions / be free
# "ipsucesa",  # Important to be successful
    #"ipadvnta",  # Important to seek adventures / exciting life
   # "iprspota",  # Important to get respect from others
  #  "impfuna"    # Important to seek fun and things that give pleasure

#Answers:
#1	Very much like me
#2	Like me
#3	Somewhat like me
#4	A little like me
#5	Not like me
#6	Not like me at all
#66	Not applicable*
#77	Refusal*
#88	Don't know*
#99	No answer*


# Check unique values

schwartz_cols = [
    "impsafea","ipstrgva","ipfrulea","ipbhprpa","ipmodsta","imptrada",
    "ipeqopta","ipudrsta","impenva","iphlppla","iplylfra",
    "ipcrtiva",  
    "impricha",  
    "ipshabta",  
    "impdiffa",  
    "ipgdtima",  
    "impfreea",  
    "ipsucesa",  
    "ipadvnta",  
    "iprspota",  
    "impfuna"    
]
for col in schwartz_cols:
    print(f"Unique values in {col}:", df[col].unique())

# Replace invalid responses (66, 77, 88, 99) with None

for col in schwartz_cols:
    df[col] = df[col].replace({66: None, 77: None, 88: None, 99: None})


Unique values in impsafea: [ 2  4  1  3 88 77  5  6 66 99]
Unique values in ipstrgva: [ 2  3  1  4 88 77  5  6 66 99]
Unique values in ipfrulea: [ 2  4  3  5  1 88  6 77 66 99]
Unique values in ipbhprpa: [ 2  3  1 88  4  6 77  5 66 99]
Unique values in ipmodsta: [ 2  3  4  1  5 88  6 77 66 99]
Unique values in imptrada: [ 3  4  2  1 88  5  6 77 66 99]
Unique values in ipeqopta: [ 2  1  3 88  4  5  6 77 66 99]
Unique values in ipudrsta: [ 2  1  3  4 88  5  6 77 66 99]
Unique values in impenva: [ 2  1  3  4 88  5 77 66  6 99]
Unique values in iphlppla: [ 2  1  3  4 88  5 77 66  6 99]
Unique values in iplylfra: [ 2  1 88  3  4  5 77 66  6 99]
Unique values in ipcrtiva: [ 3  2  1  4 88  6  5 77 66 99]
Unique values in impricha: [ 5  4  2  6  3 88 77  1 66 99]
Unique values in ipshabta: [ 2  4  3  1  5 88  6 77 66 99]
Unique values in impdiffa: [ 4  2  3  1 88  6  5 77 66 99]
Unique values in ipgdtima: [ 2  3  4  1 88  5 77  6 66 99]
Unique values in impfreea: [ 2  3  1  4 88  5 77  6 66 99

In [13]:
# ==============================================================
# agea - Age of respondent (calculated)
# ==============================================================
#numerical variable
#999* - Not available

df["agea"] = df["agea"].replace({999: None})
df = df.rename(columns={"agea": "Age"})
df["Age"].unique()

array([65, 21, 53, 78, 64, 59, 77, 69, 52, 75, 44, 49, 63, 76, 30, 67, 41,
       82, 26, 42, 85, 72, 31, 47, 50, 60, 43, 35, 36, 46, 81, 57, 38, 58,
       79, 25, 34, 74, 28, 16, 71, 20, 80, 51, 87, 61, 89, 68, 24, 32, 45,
       33, 27, 73, 62, 90, 70, 18, 83, 86, 39, 48, 54, 23, 55, 17, 29, 56,
       19, 66, 40, 84, 22, 88, 37, 15, None], dtype=object)

In [14]:
# hincfel - Feeling about household's income nowadays
# 1 = Living comfortably on present income
# 2 = Coping on present income
# 3 = Difficult on present income
# 4 = Very difficult on present income
# 7 = Refusal*
# 8 = Don't know*
# 9 = No answer*
df["hincfel"].unique()
# Replacing invalid codes (7, 8, 9) with None
df["hincfel"] = df["hincfel"].replace({7: None, 8: None, 9: None})
df["hincfel"] = df["hincfel"].map({1:4, 2:3, 3:2, 4:1})
df = df.rename(columns={"hincfel": "Household_income_feeling"})
print("Unique values in hincfel (after):", df["Household_income_feeling"].unique())




Unique values in hincfel (after): [ 4.  3.  1.  2. nan]


In [15]:
# lrscale - Placement on left-right political scale
# --------------------------------------------------------------
# 0 = Left
# 1–9 = Intermediate positions
# 10 = Right
# 77 = Refusal*
# 88 = Don't know*
# 99 = No answer*
print("Unique before:", df["lrscale"].unique())
# Replacing invalid codes (77, 88, 99) with None
df["lrscale"] = df["lrscale"].replace({77: None, 88: None, 99: None})
df = df.rename(columns={"lrscale": "Left–Right_self-placement"})


Unique before: [ 5  0  3  2  4 77  6 88  9  7  8 10  1 99]


In [None]:
# rlgdgr - How religious are you
# 0 = Not at all religious
# 1–9 = Intermediate positions
# 10 = Very religious
# 77 = Refusal*
# 88 = Don't know*
# 99 = No answer*
# ==============================================================

print("Unique before:", df["rlgdgr"].unique())

# Replacing invalid codes (77, 88, 99) with None
df["rlgdgr"] = df["rlgdgr"].replace({77: None, 88: None, 99: None})
df = df.rename(columns={"rlgdgr": "Religiosity"})




Unique before: [ 5  0  8  6  1  3 10  9  7  4  2 77 88 99]


In [17]:
#polintr - How interested would you say you are in politics - are you...
#1	Very interested
#2	Quite interested
#3	Hardly interested
#4	Not at all interested
#7	Refusal*
#8	Don't know*
#9	No answer*
#*) Missing Value

df["polintr"].unique()
# Replacing invalid codes (7, 8, 9) with None
df["polintr"] = df["polintr"].replace({7: None, 8: None, 9: None})
df["polintr"] = df["polintr"].map({1:4, 2:3, 3:2, 4:1})
df = df.rename(columns={"polintr": "Interest_in_politics"})
print("Unique values in polintr (after):", df["Interest_in_politics"].unique())

Unique values in polintr (after): [ 4.  3.  2.  1. nan]


In [None]:

# hhmmb - Number of people living in household
# Numeric value = total number of household members
# 77 = Refusal*
# 88 = Don't know*
# 99 = No answer*
print("Unique before:", df["hhmmb"].unique())

# Replacing invalid codes (77, 88, 99) with None
df["hhmmb"] = df["hhmmb"].replace({77: None, 88: None, 99: None})
df = df.rename(columns={"hhmmb": "Household size"})



Unique before: [ 2  1  3  4  5  7  0  6  8 77 12  9 88 99 10 11]


In [None]:
#since it is impossible that 0 people live in the household, instances containing these values are removed

zero_cnt = (df["Household size"] == 0).sum()
print("Number of instances with Household size == 0:", zero_cnt)
#only 6 instances with '0', so they are removed


df = df[df["Household size"] != 0].copy()


Number of instances with Household size == 0: 7


In [20]:
# ==============================================================
# TRUST VARIABLES
# --------------------------------------------------------------
# trstprl  - Trust in country's parliament
# trstlgl  - Trust in the legal system
# trstplc  - Trust in the police
# trstplt  - Trust in politicians
# trstprt  - Trust in political parties
# trstep   - Trust in the European Parliament
# trstun   - Trust in the United Nations
# ppltrst  - Most people can be trusted or you can't be too careful       
# --------------------------------------------------------------
# Invalid codes: 77 = Refusal*, 88 = Don't know*, 99 = No answer*
# ==============================================================

trust_cols = ["trstprl", "trstlgl", "trstplc", "trstplt","trstprt", "trstep", "trstun", "ppltrst"]
for col in trust_cols:
    df[col] = df[col].replace({77: None, 88: None, 99: None})

df = df.rename(columns={"trstprl": "Trust in country's parliament"})
df = df.rename(columns={"trstlgl": "Trust in the legal system"})
df = df.rename(columns={"trstplc": "Trust in the police"})
df = df.rename(columns={"trstplt": "Trust in politicians"})
df = df.rename(columns={"trstprt": "Trust in political parties"})
df = df.rename(columns={"trstep": "Trust in the European Parliament"})
df = df.rename(columns={"trstun": "Trust in the United Nations"})
df = df.rename(columns={"ppltrst": "Social trust"})


In [21]:

# feethngr - Feel part of same race or ethnic group as most people in country
# --------------------------------------------------------------
# 1 = Yes
# 2 = No
# 7 = Refusal*
# 8 = Don't know*
# 9 = No answer*
# Checking unique values before recoding
print("Unique values in feethngr before recoding:", df["feethngr"].unique())

# Renaming column to more descriptive name
df = df.rename(columns={"feethngr": "Ethnic_majority"})

# Replacing numeric codes with descriptive labels
df["Ethnic_majority"] = df["Ethnic_majority"].replace({1: "Yes",2: "No",7: None,8: None,9: None})
# Converting to categorical data type
df["Ethnic_majority"] = df["Ethnic_majority"].astype("category")
# Checking values after recoding
print("Unique values in Ethnic_majority after recoding:", df["Ethnic_majority"].unique())



Unique values in feethngr before recoding: [1 8 2 7 9]
Unique values in Ethnic_majority after recoding: ['Yes', NaN, 'No']
Categories (2, object): ['No', 'Yes']


In [22]:
# ==============================================================
# Missing values report (percentage per column)
# ==============================================================

missing_report = (df.isna().mean().round(3) * 100)

missing_report = missing_report.sort_values(ascending=False)

print("Percentage of missing values per column")
print(missing_report)


Percentage of missing values per column
Left–Right_self-placement           13.3
Trust in the United Nations          7.3
Trust in the European Parliament     6.2
ipfrulea                             2.4
ipstrgva                             2.3
impcntr                              2.2
imdfetn                              2.1
imsmetn                              2.1
Trust in the legal system            2.1
iprspota                             2.0
ipsucesa                             1.9
ipbhprpa                             1.9
ipudrsta                             1.9
Trust in country's parliament        1.9
ipcrtiva                             1.8
Trust in political parties           1.8
ipadvnta                             1.7
ipmodsta                             1.7
ipshabta                             1.7
impdiffa                             1.7
ipeqopta                             1.7
ipgdtima                             1.7
impfuna                              1.7
imptrada         

In [23]:
#Deleting invalid responsed incolumns required to compute the target feature and calculating target
#imsmetn: Now, using this card, to what extent do you think [country] should allow people of the same race or ethnic group as most [country]'s people to come and live here?
#imdfetn: How about people of a different race or ethnic group from most [country] people?
#impcntr: How about people from the poorer countries outside Europe?
#1	Allow many to come and live here
#2	Allow some
#3	Allow a few
#4	Allow none
#7	Refusal* - already as None
#8	Don't know* - already as None
#9	No answer* - already as None

cols = ["imsmetn", "imdfetn", "impcntr"]
df = df.dropna(subset=cols).copy()

# Target calculation
df["reject_mean"] = df[cols].mean(axis=1, skipna=False)
df["reject"] = (df["reject_mean"] > 2.5).astype(int)
# 1 - negative attitude towards immigration
# 2 - positive attitude towards immigration

# Deleting rows required to compute the target
df = df.drop(columns=["reject_mean"] + cols)

# Target proportions
print("Counts")
print(df["reject"].value_counts())
print("\n Proportions")
print(df["reject"].value_counts(normalize=True).round(3))


Counts
reject
0    26795
1    16951
Name: count, dtype: int64

 Proportions
reject
0    0.613
1    0.387
Name: proportion, dtype: float64


In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd

# 1. Prepare X (features) and y (target)
target = "reject"
X = df.drop(columns=[target])
y = df[target].astype(int)  # make sure target is integer (0/1)


# Lists to store country level splits
X_train_list = []
X_test_list = []
y_train_list = []
y_test_list = []


# 2. Split data inside each country separately
for country, idx in X.groupby("Country").groups.items():
    X_c = X.loc[idx]      # rows for one country (features)
    y_c = y.loc[idx]      # rows for one country (target)

    # stratified train/test split for this country
    X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
        X_c,
        y_c,
        test_size=0.20,      # 20% test set inside each country
        stratify=y_c,        # keep class balance in train and test
        random_state=42
    )

    # store results in the lists
    X_train_list.append(X_train_c)
    X_test_list.append(X_test_c)
    y_train_list.append(y_train_c)
    y_test_list.append(y_test_c)


# 3. Combine all country splits into one global train and test set
#    (we keep original indices, no reset)
X_train = pd.concat(X_train_list, axis=0)
X_test = pd.concat(X_test_list, axis=0)
y_train = pd.concat(y_train_list, axis=0)
y_test = pd.concat(y_test_list, axis=0)


In [25]:
col_cat = ["Gender", "Born_in_Country", "Marital_Status", "Domicile", "Education_Level", "Working", "Ethnic_majority", "Country"]

col_num = ["Left–Right_self-placement","Trust in the United Nations","Trust in the European Parliament","ipfrulea","ipstrgva","Trust in the legal system","Trust in country's parliament","ipbhprpa","ipudrsta","ipmodsta","imptrada","Trust in political parties","ipeqopta","impenva","iphlppla","iplylfra","impsafea","Trust in politicians","Household_income_feeling","Trust in the police","Age","Religiosity","Social trust","Household size","Interest_in_politics","ipcrtiva","impricha","ipshabta","impdiffa","ipgdtima","impfreea","ipsucesa","ipadvnta","iprspota","impfuna"]


In [None]:
import pandas as pd

# ============================
# 1. NUMERICAL: median per country (computed on TRAIN)
# ============================
for col in col_num:
    # compute country specific medians in TRAIN
    medians = X_train.groupby("Country")[col].median()

    # impute in TRAIN: NA -> country median from TRAIN
    X_train[col] = X_train[col].fillna(X_train["Country"].map(medians))

    # impute in TEST: NA -> country median from TRAIN
    X_test[col] = X_test[col].fillna(X_test["Country"].map(medians))


# ============================
# 2. CATEGORICAL: mode per country (computed on TRAIN)
# ============================
for col in col_cat:
    # compute country specific mode in TRAIN
    modes = (
        X_train.groupby("Country")[col]
        .agg(lambda s: s.mode().iloc[0])  # take the first mode if there are several
    )

    # impute in TRAIN
    X_train[col] = X_train[col].fillna(X_train["Country"].map(modes))

    # impute in TEST (still using TRAIN modes)
    X_test[col] = X_test[col].fillna(X_test["Country"].map(modes))


  X_train[col] = X_train[col].fillna(X_train["Country"].map(medians))
  X_test[col] = X_test[col].fillna(X_test["Country"].map(medians))
  X_train[col] = X_train[col].fillna(X_train["Country"].map(medians))
  X_test[col] = X_test[col].fillna(X_test["Country"].map(medians))
  X_train[col] = X_train[col].fillna(X_train["Country"].map(medians))
  X_test[col] = X_test[col].fillna(X_test["Country"].map(medians))
  X_train[col] = X_train[col].fillna(X_train["Country"].map(medians))
  X_test[col] = X_test[col].fillna(X_test["Country"].map(medians))
  X_train[col] = X_train[col].fillna(X_train["Country"].map(medians))
  X_test[col] = X_test[col].fillna(X_test["Country"].map(medians))
  X_train[col] = X_train[col].fillna(X_train["Country"].map(medians))
  X_test[col] = X_test[col].fillna(X_test["Country"].map(medians))
  X_train[col] = X_train[col].fillna(X_train["Country"].map(medians))
  X_test[col] = X_test[col].fillna(X_test["Country"].map(medians))
  X_train[col] = X_train[col].fillna(X_tr

In [None]:
import pandas as pd

# ============================
# 1. PVQ ITEM LISTS (21 items)
# ============================
pvq_21 = [
    "ipcrtiva","impricha","ipeqopta","ipshabta","impsafea","impdiffa",
    "ipfrulea","ipudrsta","ipmodsta","ipgdtima","impfreea","iphlppla",
    "ipsucesa","ipstrgva","ipadvnta","ipbhprpa","iprspota","iplylfra",
    "impenva","imptrada","impfuna"
]

# 5 basic values that form ST and Conservation
universalism_items = ["ipeqopta", "ipudrsta", "impenva"]
benevolence_items  = ["iphlppla", "iplylfra"]
security_items     = ["impsafea", "ipstrgva"]
conformity_items   = ["ipfrulea", "ipbhprpa"]
tradition_items    = ["imptrada", "ipmodsta"]


# ============================
# 2. FUNCTION ADDING PVQ FEATURES
# ============================
def add_pvq_features(X: pd.DataFrame) -> pd.DataFrame:
    X = X.copy()

    # --- reverse scale 1–6 -> 6–1 ---
    X[pvq_21] = X[pvq_21].apply(pd.to_numeric, errors="coerce")
    X[pvq_21] = 7 - X[pvq_21]

    # --- raw basic values ---
    X["universalism_raw"] = X[universalism_items].mean(axis=1)
    X["benevolence_raw"]  = X[benevolence_items].mean(axis=1)
    X["security_raw"]     = X[security_items].mean(axis=1)
    X["conformity_raw"]   = X[conformity_items].mean(axis=1)
    X["tradition_raw"]    = X[tradition_items].mean(axis=1)

    # --- mean of all 21 items ---
    X["mean_all_items"] = X[pvq_21].mean(axis=1)

    # --- centering basic values ---
    X["universalism_c"] = X["universalism_raw"] - X["mean_all_items"]
    X["benevolence_c"]  = X["benevolence_raw"]  - X["mean_all_items"]
    X["security_c"]     = X["security_raw"]     - X["mean_all_items"]
    X["conformity_c"]   = X["conformity_raw"]   - X["mean_all_items"]
    X["tradition_c"]    = X["tradition_raw"]    - X["mean_all_items"]

    # --- two higher order dimensions ---
    X["Self_Transcendence"] = X[["universalism_c", "benevolence_c"]].mean(axis=1)
    X["Conservation"]       = X[["security_c", "conformity_c", "tradition_c"]].mean(axis=1)

    # --- cleanup: keep only ST and Conservation ---
    X = X.drop(
        columns=pvq_21 + [
            "universalism_raw","benevolence_raw","security_raw",
            "conformity_raw","tradition_raw",
            "universalism_c","benevolence_c","security_c",
            "conformity_c","tradition_c",
            "mean_all_items"
        ],
        errors="ignore"
    )

    return X


# ============================
# 3. APPLYING TO  DATASETS
# ============================


X_train = add_pvq_features(X_train)
X_test  = add_pvq_features(X_test)


In [None]:
#preparing household varaible from numerical to categorical

import numpy as np
import pandas as pd

def add_hhmmb_band(D):
    hh = D["Household size"]
    labels = np.empty(len(hh), dtype=object); labels[:] = None
    labels[hh == 1] = "1 person"
    labels[hh == 2] = "2 people"
    labels[(hh >= 3) & (hh <= 5)] = "3–5 people"
    labels[hh >= 6] = "6+ people"

    D["Household size"] = pd.Categorical(
        labels,
        categories=["1 person", "2 people", "3–5 people", "6+ people"],
        ordered=True)
    return D


X_train = add_hhmmb_band(X_train)
X_test  = add_hhmmb_band(X_test)


In [None]:
#preparing seperate test and train sets, with and without trust related columns --> from now on: X_train; X_test --> datasets with trust-related features;  X_train_notrust; X_test_notrust --> without trust data


trust_cols_named = [
    "Social trust",
    "Trust in the United Nations",
    "Trust in the European Parliament",
    "Trust in the legal system",
    "Trust in country's parliament",
    "Trust in political parties",
    "Trust in politicians",
    "Trust in the police",
]

X_train_notrust = X_train.drop(columns=[c for c in trust_cols_named if c in X_train.columns], errors="ignore").copy()
X_test_notrust  = X_test.drop(columns=[c for c in trust_cols_named if c in X_test.columns], errors="ignore").copy()


In [30]:
cat_columns = X_train.select_dtypes(include=["object", "category"]).columns
num_columns = X_train.select_dtypes(include=["int64", "float64"]).columns


In [None]:
# RANDOM FOREST – ROC AUC 
import numpy as np
import optuna
from optuna.samplers import TPESampler

import pandas as pd

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import (
    f1_score,
    precision_score,
    recall_score,
    accuracy_score,
    roc_auc_score,
)
from joblib import dump

from sklearn.ensemble import RandomForestClassifier


# ============================================================
# RANDOM FOREST (NO TRUST) – "rf_auc"
# ============================================================

# 1. Column split
cat_cols = [
    c for c in X_train_notrust.columns
    if str(X_train_notrust[c].dtype) in ("object", "category")
]
num_cols = [c for c in X_train_notrust.columns if c not in cat_cols]

# 2. Preprocessing: OHE + MinMax
ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=False)

preprocess = ColumnTransformer(
    transformers=[
        ("num", MinMaxScaler(), num_cols),
        ("cat", ohe, cat_cols),
    ],
    remainder="drop",
)

# 3. Seeds
seeds = [0, 7, 42, 123, 999]

# Aggregated lists for global statistics
all_outer_f1_rf = []
all_outer_prec_rf = []
all_outer_rec_rf = []
all_outer_acc_rf = []
all_outer_auc_rf = []

# Best model by ROC AUC
best_global_auc_rf = -1.0
best_global_params_rf = None

# Detailed results: seed + fold
rows_rf = []


# ============================================================
# NESTED CV – RANDOM FOREST (NO TRUST, ROC AUC)
# ============================================================
for seed in seeds:
    print("\n" + "=" * 70)
    print(f"=== NESTED CV – SEED: {seed} (RANDOM FOREST, NO TRUST, ROC AUC) ===")
    print("=" * 70)

    outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

    outer_f1_scores = []
    outer_precisions = []
    outer_recalls = []
    outer_accuracies = []
    outer_aucs = []

    for fold, (train_idx, val_idx) in enumerate(
        outer_cv.split(X_train_notrust, y_train),
        start=1
    ):
        X_tr = X_train_notrust.iloc[train_idx].copy()
        X_val = X_train_notrust.iloc[val_idx].copy()
        y_tr = y_train.iloc[train_idx]
        y_val = y_train.iloc[val_idx]

        inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)

        # ----------------- Optuna objective -----------------
        def objective_rf_nested(trial):
            params = {
                # tuned
                "n_estimators":      trial.suggest_int("n_estimators", 100, 1500),
                "max_depth":         trial.suggest_int("max_depth", 3, 30),
                "min_samples_split": trial.suggest_int("min_samples_split", 2, 40),
                "min_samples_leaf":  trial.suggest_int("min_samples_leaf", 1, 20),
                # fixed
                "class_weight":      "balanced",
                "n_jobs":            -1,
                "random_state":      seed,
            }

            clf = RandomForestClassifier(**params)
            pipe = Pipeline([
                ("pre", preprocess),
                ("clf", clf),
            ])

            score = cross_val_score(
                pipe,
                X_tr,
                y_tr,
                cv=inner_cv,
                scoring="roc_auc",   # ROC AUC
                n_jobs=-1,
            ).mean()

            return score
        # ----------------------------------------------------

        study_nested_rf = optuna.create_study(
            direction="maximize",
            sampler=TPESampler(seed=seed),
        )
        study_nested_rf.optimize(
            objective_rf_nested,
            n_trials=50,
            show_progress_bar=False
        )

        best_params_rf = study_nested_rf.best_params.copy()
        best_params_full_rf = best_params_rf.copy()
        best_params_full_rf.update({
            "class_weight": "balanced",
            "n_jobs":       -1,
            "random_state": seed,
        })

        # Model for this outer fold
        rf_fold_model = Pipeline([
            ("pre", preprocess),
            ("clf", RandomForestClassifier(**best_params_full_rf)),
        ])
        rf_fold_model.fit(X_tr, y_tr)

        y_val_pred = rf_fold_model.predict(X_val)
        y_val_proba = rf_fold_model.predict_proba(X_val)[:, 1]

        f1_val = f1_score(y_val, y_val_pred)
        prec_val = precision_score(y_val, y_val_pred, zero_division=0)
        rec_val = recall_score(y_val, y_val_pred, zero_division=0)
        acc_val = accuracy_score(y_val, y_val_pred)
        auc_val = roc_auc_score(y_val, y_val_proba)

        outer_f1_scores.append(f1_val)
        outer_precisions.append(prec_val)
        outer_recalls.append(rec_val)
        outer_accuracies.append(acc_val)
        outer_aucs.append(auc_val)

        all_outer_f1_rf.append(f1_val)
        all_outer_prec_rf.append(prec_val)
        all_outer_rec_rf.append(rec_val)
        all_outer_acc_rf.append(acc_val)
        all_outer_auc_rf.append(auc_val)

        rows_rf.append({
            "seed": seed,
            "fold": fold,
            "F1": f1_val,
            "Precision": prec_val,
            "Recall": rec_val,
            "Accuracy": acc_val,
            "ROC_AUC": auc_val,
        })

        if auc_val > best_global_auc_rf:
            best_global_auc_rf = auc_val
            best_global_params_rf = best_params_full_rf.copy()

        print(f"\nSeed {seed} | Outer fold {fold}")
        print(
            f"  F1={f1_val:.4f}, Prec={prec_val:.4f}, Rec={rec_val:.4f}, "
            f"Acc={acc_val:.4f}, ROC AUC={auc_val:.4f}"
        )

        param_str = []
        for k, v in best_params_full_rf.items():
            if isinstance(v, float):
                param_str.append(f"{k}={v:.4f}")
            else:
                param_str.append(f"{k}={v}")
        param_str = ", ".join(param_str)
        print("  Best params:", param_str)
        # =====================================================

    # Summary for this seed
    outer_f1_scores = np.array(outer_f1_scores)
    outer_precisions = np.array(outer_precisions)
    outer_recalls = np.array(outer_recalls)
    outer_accuracies = np.array(outer_accuracies)
    outer_aucs = np.array(outer_aucs)

    print("\n--- Summary for seed", seed, "(RANDOM FOREST, NO TRUST, ROC AUC) ---")
    print(f"  F1:        mean={outer_f1_scores.mean():.4f}, std={outer_f1_scores.std():.4f}")
    print(f"  Precision: mean={outer_precisions.mean():.4f}, std={outer_precisions.std():.4f}")
    print(f"  Recall:    mean={outer_recalls.mean():.4f}, std={outer_recalls.std():.4f}")
    print(f"  Accuracy:  mean={outer_accuracies.mean():.4f}, std={outer_accuracies.std():.4f}")
    print(f"  ROC AUC:   mean={outer_aucs.mean():.4f}, std={outer_aucs.std():.4f}")


# ============================================================
# GLOBAL SUMMARY – RANDOM FOREST (ROC AUC)
# ============================================================
all_outer_f1_rf = np.array(all_outer_f1_rf)
all_outer_prec_rf = np.array(all_outer_prec_rf)
all_outer_rec_rf = np.array(all_outer_rec_rf)
all_outer_acc_rf = np.array(all_outer_acc_rf)
all_outer_auc_rf = np.array(all_outer_auc_rf)

print("\n" + "=" * 70)
print("===== GLOBAL SUMMARY – RANDOM FOREST, NO TRUST, 5 SEEDS x 5 OUTER FOLDS (ROC AUC) =====")
print("=" * 70)
print(f"Global F1:        mean={all_outer_f1_rf.mean():.4f}, std={all_outer_f1_rf.std():.4f}")
print(f"Global Precision: mean={all_outer_prec_rf.mean():.4f}, std={all_outer_prec_rf.std():.4f}")
print(f"Global Recall:    mean={all_outer_rec_rf.mean():.4f}, std={all_outer_rec_rf.std():.4f}")
print(f"Global Accuracy:  mean={all_outer_acc_rf.mean():.4f}, std={all_outer_acc_rf.std():.4f}")
print(f"Global ROC AUC:   mean={all_outer_auc_rf.mean():.4f}, std={all_outer_auc_rf.std():.4f}")

print("\n===== BEST HYPERPARAMETERS ACROSS ALL SEEDS AND FOLDS (by ROC AUC) – RANDOM FOREST =====")
print(f"Best ROC AUC: {best_global_auc_rf:.4f}")
print("Best hyperparameters:")
for k, v in best_global_params_rf.items():
    print(f"{k}: {v}")

# Detailed table of results: seed + fold
results_rf_df = pd.DataFrame(rows_rf)
print("\n===== DETAILED RESULTS PER SEED AND FOLD (RANDOM FOREST, ROC AUC) =====")
print(results_rf_df)

results_rf_df.to_csv("random_forest_nested_cv_detailed_results_roc_auc.csv", index=False)


[I 2025-11-28 07:32:05,418] A new study created in memory with name: no-name-095037c3-cd95-4799-a4da-20ead3e941db



=== NESTED CV – SEED: 0 (RANDOM FOREST, NO TRUST, ROC AUC) ===


[I 2025-11-28 07:32:24,622] Trial 0 finished with value: 0.768210458461469 and parameters: {'n_estimators': 868, 'max_depth': 23, 'min_samples_split': 25, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.768210458461469.
[I 2025-11-28 07:32:37,626] Trial 1 finished with value: 0.7653980550222651 and parameters: {'n_estimators': 693, 'max_depth': 21, 'min_samples_split': 19, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.768210458461469.
[I 2025-11-28 07:32:59,064] Trial 2 finished with value: 0.7649744705350697 and parameters: {'n_estimators': 1450, 'max_depth': 13, 'min_samples_split': 32, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.768210458461469.
[I 2025-11-28 07:33:19,863] Trial 3 finished with value: 0.7710464860014513 and parameters: {'n_estimators': 895, 'max_depth': 28, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.7710464860014513.
[I 2025-11-28 07:33:22,146] Trial 4 finished with value: 0.7647286985651002 and parameter


Seed 0 | Outer fold 1
  F1=0.6360, Prec=0.5981, Rec=0.6792, Acc=0.6988, ROC AUC=0.7763
  Best params: n_estimators=986, max_depth=23, min_samples_split=36, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=0


[I 2025-11-28 07:42:34,070] Trial 0 finished with value: 0.7704901718716783 and parameters: {'n_estimators': 868, 'max_depth': 23, 'min_samples_split': 25, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.7704901718716783.
[I 2025-11-28 07:42:44,999] Trial 1 finished with value: 0.7676554888710858 and parameters: {'n_estimators': 693, 'max_depth': 21, 'min_samples_split': 19, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.7704901718716783.
[I 2025-11-28 07:43:05,649] Trial 2 finished with value: 0.7673917488706966 and parameters: {'n_estimators': 1450, 'max_depth': 13, 'min_samples_split': 32, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.7704901718716783.
[I 2025-11-28 07:43:26,499] Trial 3 finished with value: 0.7745231102456632 and parameters: {'n_estimators': 895, 'max_depth': 28, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.7745231102456632.
[I 2025-11-28 07:43:28,770] Trial 4 finished with value: 0.7675521462823184 and param


Seed 0 | Outer fold 2
  F1=0.6223, Prec=0.6080, Rec=0.6372, Acc=0.7002, ROC AUC=0.7672
  Best params: n_estimators=908, max_depth=27, min_samples_split=15, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=0


[I 2025-11-28 07:55:33,760] Trial 0 finished with value: 0.7681682249672955 and parameters: {'n_estimators': 868, 'max_depth': 23, 'min_samples_split': 25, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.7681682249672955.
[I 2025-11-28 07:55:44,488] Trial 1 finished with value: 0.7646908133570395 and parameters: {'n_estimators': 693, 'max_depth': 21, 'min_samples_split': 19, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.7681682249672955.
[I 2025-11-28 07:56:04,842] Trial 2 finished with value: 0.764565606598914 and parameters: {'n_estimators': 1450, 'max_depth': 13, 'min_samples_split': 32, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.7681682249672955.
[I 2025-11-28 07:56:25,417] Trial 3 finished with value: 0.7716951159136128 and parameters: {'n_estimators': 895, 'max_depth': 28, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.7716951159136128.
[I 2025-11-28 07:56:27,653] Trial 4 finished with value: 0.7640395801132239 and parame


Seed 0 | Outer fold 3
  F1=0.6414, Prec=0.6240, Rec=0.6599, Acc=0.7142, ROC AUC=0.7820
  Best params: n_estimators=1033, max_depth=30, min_samples_split=14, min_samples_leaf=2, class_weight=balanced, n_jobs=-1, random_state=0


[I 2025-11-28 08:08:35,238] Trial 0 finished with value: 0.7690061286124396 and parameters: {'n_estimators': 868, 'max_depth': 23, 'min_samples_split': 25, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.7690061286124396.
[I 2025-11-28 08:08:45,909] Trial 1 finished with value: 0.7658874696385268 and parameters: {'n_estimators': 693, 'max_depth': 21, 'min_samples_split': 19, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.7690061286124396.
[I 2025-11-28 08:09:06,305] Trial 2 finished with value: 0.7658079116591 and parameters: {'n_estimators': 1450, 'max_depth': 13, 'min_samples_split': 32, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.7690061286124396.
[I 2025-11-28 08:09:27,036] Trial 3 finished with value: 0.7735785349205333 and parameters: {'n_estimators': 895, 'max_depth': 28, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.7735785349205333.
[I 2025-11-28 08:09:29,226] Trial 4 finished with value: 0.7651912453356405 and paramete


Seed 0 | Outer fold 4
  F1=0.6172, Prec=0.6129, Rec=0.6217, Acc=0.7012, ROC AUC=0.7678
  Best params: n_estimators=1121, max_depth=28, min_samples_split=11, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=0


[I 2025-11-28 08:22:18,654] Trial 0 finished with value: 0.7667685616552268 and parameters: {'n_estimators': 868, 'max_depth': 23, 'min_samples_split': 25, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.7667685616552268.
[I 2025-11-28 08:22:29,318] Trial 1 finished with value: 0.764347995658618 and parameters: {'n_estimators': 693, 'max_depth': 21, 'min_samples_split': 19, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.7667685616552268.
[I 2025-11-28 08:22:49,514] Trial 2 finished with value: 0.7637600355383337 and parameters: {'n_estimators': 1450, 'max_depth': 13, 'min_samples_split': 32, 'min_samples_leaf': 11}. Best is trial 0 with value: 0.7667685616552268.
[I 2025-11-28 08:23:10,101] Trial 3 finished with value: 0.7702304337672604 and parameters: {'n_estimators': 895, 'max_depth': 28, 'min_samples_split': 4, 'min_samples_leaf': 2}. Best is trial 3 with value: 0.7702304337672604.
[I 2025-11-28 08:23:12,293] Trial 4 finished with value: 0.7640296250297663 and parame


Seed 0 | Outer fold 5
  F1=0.6474, Prec=0.6263, Rec=0.6700, Acc=0.7172, ROC AUC=0.7820
  Best params: n_estimators=910, max_depth=28, min_samples_split=21, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=0

--- Summary for seed 0 (RANDOM FOREST, NO TRUST, ROC AUC) ---
  F1:        mean=0.6329, std=0.0114
  Precision: mean=0.6139, std=0.0104
  Recall:    mean=0.6536, std=0.0212
  Accuracy:  mean=0.7063, std=0.0077
  ROC AUC:   mean=0.7751, std=0.0065

=== NESTED CV – SEED: 7 (RANDOM FOREST, NO TRUST, ROC AUC) ===


[I 2025-11-28 08:35:11,462] Trial 0 finished with value: 0.7665587503072812 and parameters: {'n_estimators': 206, 'max_depth': 24, 'min_samples_split': 19, 'min_samples_leaf': 15}. Best is trial 0 with value: 0.7665587503072812.
[I 2025-11-28 08:35:37,379] Trial 1 finished with value: 0.771772067323016 and parameters: {'n_estimators': 1470, 'max_depth': 18, 'min_samples_split': 21, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.771772067323016.
[I 2025-11-28 08:35:44,555] Trial 2 finished with value: 0.7655580574187361 and parameters: {'n_estimators': 476, 'max_depth': 16, 'min_samples_split': 28, 'min_samples_leaf': 17}. Best is trial 1 with value: 0.771772067323016.
[I 2025-11-28 08:35:49,426] Trial 3 finished with value: 0.7407857775747665 and parameters: {'n_estimators': 633, 'max_depth': 4, 'min_samples_split': 13, 'min_samples_leaf': 19}. Best is trial 1 with value: 0.771772067323016.
[I 2025-11-28 08:35:55,686] Trial 4 finished with value: 0.769640379974196 and parameters


Seed 7 | Outer fold 1
  F1=0.6264, Prec=0.6170, Rec=0.6361, Acc=0.7059, ROC AUC=0.7730
  Best params: n_estimators=1111, max_depth=26, min_samples_split=12, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=7


[I 2025-11-28 08:49:24,961] Trial 0 finished with value: 0.7681630864009682 and parameters: {'n_estimators': 206, 'max_depth': 24, 'min_samples_split': 19, 'min_samples_leaf': 15}. Best is trial 0 with value: 0.7681630864009682.
[I 2025-11-28 08:49:50,742] Trial 1 finished with value: 0.7740906591513076 and parameters: {'n_estimators': 1470, 'max_depth': 18, 'min_samples_split': 21, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.7740906591513076.
[I 2025-11-28 08:49:57,890] Trial 2 finished with value: 0.767127033756705 and parameters: {'n_estimators': 476, 'max_depth': 16, 'min_samples_split': 28, 'min_samples_leaf': 17}. Best is trial 1 with value: 0.7740906591513076.
[I 2025-11-28 08:50:02,776] Trial 3 finished with value: 0.7420640523722332 and parameters: {'n_estimators': 633, 'max_depth': 4, 'min_samples_split': 13, 'min_samples_leaf': 19}. Best is trial 1 with value: 0.7740906591513076.
[I 2025-11-28 08:50:09,042] Trial 4 finished with value: 0.7718857439310112 and parame


Seed 7 | Outer fold 2
  F1=0.6239, Prec=0.6118, Rec=0.6364, Acc=0.7026, ROC AUC=0.7662
  Best params: n_estimators=1412, max_depth=28, min_samples_split=12, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=7


[I 2025-11-28 09:06:29,947] Trial 0 finished with value: 0.7663126288689067 and parameters: {'n_estimators': 206, 'max_depth': 24, 'min_samples_split': 19, 'min_samples_leaf': 15}. Best is trial 0 with value: 0.7663126288689067.
[I 2025-11-28 09:06:55,864] Trial 1 finished with value: 0.770962750755532 and parameters: {'n_estimators': 1470, 'max_depth': 18, 'min_samples_split': 21, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.770962750755532.
[I 2025-11-28 09:07:03,187] Trial 2 finished with value: 0.7645804810520761 and parameters: {'n_estimators': 476, 'max_depth': 16, 'min_samples_split': 28, 'min_samples_leaf': 17}. Best is trial 1 with value: 0.770962750755532.
[I 2025-11-28 09:07:08,023] Trial 3 finished with value: 0.739675371028876 and parameters: {'n_estimators': 633, 'max_depth': 4, 'min_samples_split': 13, 'min_samples_leaf': 19}. Best is trial 1 with value: 0.770962750755532.
[I 2025-11-28 09:07:14,303] Trial 4 finished with value: 0.7687485061015588 and parameters


Seed 7 | Outer fold 3
  F1=0.6390, Prec=0.6106, Rec=0.6702, Acc=0.7066, ROC AUC=0.7789
  Best params: n_estimators=1414, max_depth=28, min_samples_split=26, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=7


[I 2025-11-28 09:21:57,856] Trial 0 finished with value: 0.7655920998599428 and parameters: {'n_estimators': 206, 'max_depth': 24, 'min_samples_split': 19, 'min_samples_leaf': 15}. Best is trial 0 with value: 0.7655920998599428.
[I 2025-11-28 09:22:23,740] Trial 1 finished with value: 0.7706827756890898 and parameters: {'n_estimators': 1470, 'max_depth': 18, 'min_samples_split': 21, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.7706827756890898.
[I 2025-11-28 09:22:30,945] Trial 2 finished with value: 0.7642801438921398 and parameters: {'n_estimators': 476, 'max_depth': 16, 'min_samples_split': 28, 'min_samples_leaf': 17}. Best is trial 1 with value: 0.7706827756890898.
[I 2025-11-28 09:22:35,765] Trial 3 finished with value: 0.7394915239034323 and parameters: {'n_estimators': 633, 'max_depth': 4, 'min_samples_split': 13, 'min_samples_leaf': 19}. Best is trial 1 with value: 0.7706827756890898.
[I 2025-11-28 09:22:42,007] Trial 4 finished with value: 0.7682091408225539 and param


Seed 7 | Outer fold 4
  F1=0.6439, Prec=0.6197, Rec=0.6700, Acc=0.7127, ROC AUC=0.7823
  Best params: n_estimators=297, max_depth=30, min_samples_split=27, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=7


[I 2025-11-28 09:35:43,188] Trial 0 finished with value: 0.7657119824174407 and parameters: {'n_estimators': 206, 'max_depth': 24, 'min_samples_split': 19, 'min_samples_leaf': 15}. Best is trial 0 with value: 0.7657119824174407.
[I 2025-11-28 09:36:09,018] Trial 1 finished with value: 0.7713639758995572 and parameters: {'n_estimators': 1470, 'max_depth': 18, 'min_samples_split': 21, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.7713639758995572.
[I 2025-11-28 09:36:16,198] Trial 2 finished with value: 0.7649476027899462 and parameters: {'n_estimators': 476, 'max_depth': 16, 'min_samples_split': 28, 'min_samples_leaf': 17}. Best is trial 1 with value: 0.7713639758995572.
[I 2025-11-28 09:36:21,013] Trial 3 finished with value: 0.7404614970228917 and parameters: {'n_estimators': 633, 'max_depth': 4, 'min_samples_split': 13, 'min_samples_leaf': 19}. Best is trial 1 with value: 0.7713639758995572.
[I 2025-11-28 09:36:27,233] Trial 4 finished with value: 0.7689110079155129 and param


Seed 7 | Outer fold 5
  F1=0.6375, Prec=0.6260, Rec=0.6493, Acc=0.7137, ROC AUC=0.7810
  Best params: n_estimators=1230, max_depth=27, min_samples_split=13, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=7

--- Summary for seed 7 (RANDOM FOREST, NO TRUST, ROC AUC) ---
  F1:        mean=0.6341, std=0.0077
  Precision: mean=0.6170, std=0.0056
  Recall:    mean=0.6524, std=0.0152
  Accuracy:  mean=0.7083, std=0.0042
  ROC AUC:   mean=0.7763, std=0.0059

=== NESTED CV – SEED: 42 (RANDOM FOREST, NO TRUST, ROC AUC) ===


[I 2025-11-28 09:52:35,968] Trial 0 finished with value: 0.7660258876906633 and parameters: {'n_estimators': 624, 'max_depth': 29, 'min_samples_split': 30, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7660258876906633.
[I 2025-11-28 09:52:39,469] Trial 1 finished with value: 0.7507537489578837 and parameters: {'n_estimators': 318, 'max_depth': 7, 'min_samples_split': 4, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.7660258876906633.
[I 2025-11-28 09:52:54,024] Trial 2 finished with value: 0.7630534572697424 and parameters: {'n_estimators': 942, 'max_depth': 22, 'min_samples_split': 2, 'min_samples_leaf': 20}. Best is trial 0 with value: 0.7660258876906633.
[I 2025-11-28 09:53:08,281] Trial 3 finished with value: 0.7557460618070367 and parameters: {'n_estimators': 1266, 'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7660258876906633.
[I 2025-11-28 09:53:17,145] Trial 4 finished with value: 0.7674480170683218 and parameter


Seed 42 | Outer fold 1
  F1=0.6454, Prec=0.6092, Rec=0.6862, Acc=0.7078, ROC AUC=0.7784
  Best params: n_estimators=1313, max_depth=29, min_samples_split=29, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=42


[I 2025-11-28 10:07:15,101] Trial 0 finished with value: 0.7674790123694128 and parameters: {'n_estimators': 624, 'max_depth': 29, 'min_samples_split': 30, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7674790123694128.
[I 2025-11-28 10:07:18,587] Trial 1 finished with value: 0.7527966232323763 and parameters: {'n_estimators': 318, 'max_depth': 7, 'min_samples_split': 4, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.7674790123694128.
[I 2025-11-28 10:07:33,031] Trial 2 finished with value: 0.7649017188069314 and parameters: {'n_estimators': 942, 'max_depth': 22, 'min_samples_split': 2, 'min_samples_leaf': 20}. Best is trial 0 with value: 0.7674790123694128.
[I 2025-11-28 10:07:47,049] Trial 3 finished with value: 0.7579161237794505 and parameters: {'n_estimators': 1266, 'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7674790123694128.
[I 2025-11-28 10:07:55,905] Trial 4 finished with value: 0.7692412241800078 and parameter


Seed 42 | Outer fold 2
  F1=0.6343, Prec=0.5992, Rec=0.6737, Acc=0.6989, ROC AUC=0.7703
  Best params: n_estimators=727, max_depth=23, min_samples_split=24, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=42


[I 2025-11-28 10:19:48,193] Trial 0 finished with value: 0.7686581300857078 and parameters: {'n_estimators': 624, 'max_depth': 29, 'min_samples_split': 30, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7686581300857078.
[I 2025-11-28 10:19:51,562] Trial 1 finished with value: 0.7533941017662977 and parameters: {'n_estimators': 318, 'max_depth': 7, 'min_samples_split': 4, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.7686581300857078.
[I 2025-11-28 10:20:05,933] Trial 2 finished with value: 0.7658039935551351 and parameters: {'n_estimators': 942, 'max_depth': 22, 'min_samples_split': 2, 'min_samples_leaf': 20}. Best is trial 0 with value: 0.7686581300857078.
[I 2025-11-28 10:20:19,890] Trial 3 finished with value: 0.7581888543173472 and parameters: {'n_estimators': 1266, 'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7686581300857078.
[I 2025-11-28 10:20:28,690] Trial 4 finished with value: 0.7700301935266379 and parameter


Seed 42 | Outer fold 3
  F1=0.6307, Prec=0.6091, Rec=0.6540, Acc=0.7033, ROC AUC=0.7731
  Best params: n_estimators=462, max_depth=28, min_samples_split=20, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=42


[I 2025-11-28 10:31:34,598] Trial 0 finished with value: 0.769188957011 and parameters: {'n_estimators': 624, 'max_depth': 29, 'min_samples_split': 30, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.769188957011.
[I 2025-11-28 10:31:38,152] Trial 1 finished with value: 0.7533837766032135 and parameters: {'n_estimators': 318, 'max_depth': 7, 'min_samples_split': 4, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.769188957011.
[I 2025-11-28 10:31:52,858] Trial 2 finished with value: 0.7660168910424167 and parameters: {'n_estimators': 942, 'max_depth': 22, 'min_samples_split': 2, 'min_samples_leaf': 20}. Best is trial 0 with value: 0.769188957011.
[I 2025-11-28 10:32:07,083] Trial 3 finished with value: 0.7586482823478319 and parameters: {'n_estimators': 1266, 'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.769188957011.
[I 2025-11-28 10:32:15,969] Trial 4 finished with value: 0.7700161025105289 and parameters: {'n_estimators': 


Seed 42 | Outer fold 4
  F1=0.6367, Prec=0.6143, Rec=0.6608, Acc=0.7077, ROC AUC=0.7738
  Best params: n_estimators=1313, max_depth=30, min_samples_split=26, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=42


[I 2025-11-28 10:46:26,267] Trial 0 finished with value: 0.7661735058927653 and parameters: {'n_estimators': 624, 'max_depth': 29, 'min_samples_split': 30, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7661735058927653.
[I 2025-11-28 10:46:29,698] Trial 1 finished with value: 0.7508181411265817 and parameters: {'n_estimators': 318, 'max_depth': 7, 'min_samples_split': 4, 'min_samples_leaf': 18}. Best is trial 0 with value: 0.7661735058927653.
[I 2025-11-28 10:46:44,198] Trial 2 finished with value: 0.7631934669801795 and parameters: {'n_estimators': 942, 'max_depth': 22, 'min_samples_split': 2, 'min_samples_leaf': 20}. Best is trial 0 with value: 0.7661735058927653.
[I 2025-11-28 10:46:58,478] Trial 3 finished with value: 0.7552539546453493 and parameters: {'n_estimators': 1266, 'max_depth': 8, 'min_samples_split': 9, 'min_samples_leaf': 4}. Best is trial 0 with value: 0.7661735058927653.
[I 2025-11-28 10:47:07,453] Trial 4 finished with value: 0.767275207150092 and parameters


Seed 42 | Outer fold 5
  F1=0.6490, Prec=0.6207, Rec=0.6799, Acc=0.7149, ROC AUC=0.7842
  Best params: n_estimators=1122, max_depth=30, min_samples_split=31, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=42

--- Summary for seed 42 (RANDOM FOREST, NO TRUST, ROC AUC) ---
  F1:        mean=0.6392, std=0.0069
  Precision: mean=0.6105, std=0.0071
  Recall:    mean=0.6709, std=0.0119
  Accuracy:  mean=0.7065, std=0.0053
  ROC AUC:   mean=0.7760, std=0.0049

=== NESTED CV – SEED: 123 (RANDOM FOREST, NO TRUST, ROC AUC) ===


[I 2025-11-28 10:58:53,698] Trial 0 finished with value: 0.7617997913172546 and parameters: {'n_estimators': 1075, 'max_depth': 11, 'min_samples_split': 10, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7617997913172546.
[I 2025-11-28 10:59:09,618] Trial 1 finished with value: 0.7642103483126048 and parameters: {'n_estimators': 1107, 'max_depth': 14, 'min_samples_split': 40, 'min_samples_leaf': 14}. Best is trial 1 with value: 0.7642103483126048.
[I 2025-11-28 10:59:20,514] Trial 2 finished with value: 0.7631646429937949 and parameters: {'n_estimators': 773, 'max_depth': 13, 'min_samples_split': 15, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7642103483126048.
[I 2025-11-28 10:59:25,842] Trial 3 finished with value: 0.7401654721018293 and parameters: {'n_estimators': 714, 'max_depth': 4, 'min_samples_split': 17, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7642103483126048.
[I 2025-11-28 10:59:29,704] Trial 4 finished with value: 0.7520110349349961 and par


Seed 123 | Outer fold 1
  F1=0.6355, Prec=0.6159, Rec=0.6563, Acc=0.7082, ROC AUC=0.7775
  Best params: n_estimators=1027, max_depth=28, min_samples_split=19, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=123


[I 2025-11-28 11:14:59,333] Trial 0 finished with value: 0.7626088594795902 and parameters: {'n_estimators': 1075, 'max_depth': 11, 'min_samples_split': 10, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7626088594795902.
[I 2025-11-28 11:15:15,338] Trial 1 finished with value: 0.7648832253020226 and parameters: {'n_estimators': 1107, 'max_depth': 14, 'min_samples_split': 40, 'min_samples_leaf': 14}. Best is trial 1 with value: 0.7648832253020226.
[I 2025-11-28 11:15:26,274] Trial 2 finished with value: 0.7640411412339488 and parameters: {'n_estimators': 773, 'max_depth': 13, 'min_samples_split': 15, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7648832253020226.
[I 2025-11-28 11:15:31,761] Trial 3 finished with value: 0.7402091574911154 and parameters: {'n_estimators': 714, 'max_depth': 4, 'min_samples_split': 17, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7648832253020226.
[I 2025-11-28 11:15:35,559] Trial 4 finished with value: 0.7534182558176958 and par


Seed 123 | Outer fold 2
  F1=0.6339, Prec=0.6254, Rec=0.6427, Acc=0.7123, ROC AUC=0.7787
  Best params: n_estimators=1038, max_depth=29, min_samples_split=15, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=123


[I 2025-11-28 11:29:44,260] Trial 0 finished with value: 0.762729176281823 and parameters: {'n_estimators': 1075, 'max_depth': 11, 'min_samples_split': 10, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.762729176281823.
[I 2025-11-28 11:30:00,149] Trial 1 finished with value: 0.7652791124991354 and parameters: {'n_estimators': 1107, 'max_depth': 14, 'min_samples_split': 40, 'min_samples_leaf': 14}. Best is trial 1 with value: 0.7652791124991354.
[I 2025-11-28 11:30:12,106] Trial 2 finished with value: 0.7643667374187292 and parameters: {'n_estimators': 773, 'max_depth': 13, 'min_samples_split': 15, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7652791124991354.
[I 2025-11-28 11:30:17,465] Trial 3 finished with value: 0.7406889149767223 and parameters: {'n_estimators': 714, 'max_depth': 4, 'min_samples_split': 17, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7652791124991354.
[I 2025-11-28 11:30:21,292] Trial 4 finished with value: 0.7533954569225512 and param


Seed 123 | Outer fold 3
  F1=0.6387, Prec=0.6097, Rec=0.6706, Acc=0.7060, ROC AUC=0.7721
  Best params: n_estimators=1183, max_depth=30, min_samples_split=20, min_samples_leaf=2, class_weight=balanced, n_jobs=-1, random_state=123


[I 2025-11-28 11:43:43,465] Trial 0 finished with value: 0.7621115736694074 and parameters: {'n_estimators': 1075, 'max_depth': 11, 'min_samples_split': 10, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7621115736694074.
[I 2025-11-28 11:43:59,279] Trial 1 finished with value: 0.7644964667617211 and parameters: {'n_estimators': 1107, 'max_depth': 14, 'min_samples_split': 40, 'min_samples_leaf': 14}. Best is trial 1 with value: 0.7644964667617211.
[I 2025-11-28 11:44:10,171] Trial 2 finished with value: 0.7636432186321144 and parameters: {'n_estimators': 773, 'max_depth': 13, 'min_samples_split': 15, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7644964667617211.
[I 2025-11-28 11:44:15,645] Trial 3 finished with value: 0.7401201670668995 and parameters: {'n_estimators': 714, 'max_depth': 4, 'min_samples_split': 17, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7644964667617211.
[I 2025-11-28 11:44:19,465] Trial 4 finished with value: 0.7527457379907054 and par


Seed 123 | Outer fold 4
  F1=0.6375, Prec=0.6155, Rec=0.6611, Acc=0.7086, ROC AUC=0.7788
  Best params: n_estimators=868, max_depth=30, min_samples_split=19, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=123


[I 2025-11-28 11:58:09,324] Trial 0 finished with value: 0.7646105121120103 and parameters: {'n_estimators': 1075, 'max_depth': 11, 'min_samples_split': 10, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7646105121120103.
[I 2025-11-28 11:58:25,246] Trial 1 finished with value: 0.7668811162910446 and parameters: {'n_estimators': 1107, 'max_depth': 14, 'min_samples_split': 40, 'min_samples_leaf': 14}. Best is trial 1 with value: 0.7668811162910446.
[I 2025-11-28 11:58:36,232] Trial 2 finished with value: 0.7659854763581494 and parameters: {'n_estimators': 773, 'max_depth': 13, 'min_samples_split': 15, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7668811162910446.
[I 2025-11-28 11:58:41,803] Trial 3 finished with value: 0.7427838116604616 and parameters: {'n_estimators': 714, 'max_depth': 4, 'min_samples_split': 17, 'min_samples_leaf': 15}. Best is trial 1 with value: 0.7668811162910446.
[I 2025-11-28 11:58:45,643] Trial 4 finished with value: 0.7557007917435405 and par


Seed 123 | Outer fold 5
  F1=0.6210, Prec=0.6264, Rec=0.6158, Acc=0.7087, ROC AUC=0.7711
  Best params: n_estimators=1492, max_depth=30, min_samples_split=10, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=123

--- Summary for seed 123 (RANDOM FOREST, NO TRUST, ROC AUC) ---
  F1:        mean=0.6333, std=0.0064
  Precision: mean=0.6186, std=0.0064
  Recall:    mean=0.6493, std=0.0190
  Accuracy:  mean=0.7088, std=0.0020
  ROC AUC:   mean=0.7756, std=0.0034

=== NESTED CV – SEED: 999 (RANDOM FOREST, NO TRUST, ROC AUC) ===


[I 2025-11-28 12:12:02,828] Trial 0 finished with value: 0.7673233628735296 and parameters: {'n_estimators': 1225, 'max_depth': 17, 'min_samples_split': 6, 'min_samples_leaf': 13}. Best is trial 0 with value: 0.7673233628735296.
[I 2025-11-28 12:12:06,215] Trial 1 finished with value: 0.7646426008595664 and parameters: {'n_estimators': 227, 'max_depth': 12, 'min_samples_split': 18, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7673233628735296.
[I 2025-11-28 12:12:23,690] Trial 2 finished with value: 0.7723880705385623 and parameters: {'n_estimators': 980, 'max_depth': 22, 'min_samples_split': 32, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.7723880705385623.
[I 2025-11-28 12:12:30,211] Trial 3 finished with value: 0.7576496403173066 and parameters: {'n_estimators': 580, 'max_depth': 8, 'min_samples_split': 29, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.7723880705385623.
[I 2025-11-28 12:12:50,583] Trial 4 finished with value: 0.7671137361018806 and paramet


Seed 999 | Outer fold 1
  F1=0.6461, Prec=0.6036, Rec=0.6951, Acc=0.7049, ROC AUC=0.7753
  Best params: n_estimators=1347, max_depth=29, min_samples_split=28, min_samples_leaf=2, class_weight=balanced, n_jobs=-1, random_state=999


[I 2025-11-28 12:26:04,730] Trial 0 finished with value: 0.7654082879246831 and parameters: {'n_estimators': 1225, 'max_depth': 17, 'min_samples_split': 6, 'min_samples_leaf': 13}. Best is trial 0 with value: 0.7654082879246831.
[I 2025-11-28 12:26:08,074] Trial 1 finished with value: 0.762315694634891 and parameters: {'n_estimators': 227, 'max_depth': 12, 'min_samples_split': 18, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7654082879246831.
[I 2025-11-28 12:26:25,546] Trial 2 finished with value: 0.7708971347421958 and parameters: {'n_estimators': 980, 'max_depth': 22, 'min_samples_split': 32, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.7708971347421958.
[I 2025-11-28 12:26:32,068] Trial 3 finished with value: 0.7559516666216609 and parameters: {'n_estimators': 580, 'max_depth': 8, 'min_samples_split': 29, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.7708971347421958.
[I 2025-11-28 12:26:52,453] Trial 4 finished with value: 0.764840105913386 and parameter


Seed 999 | Outer fold 2
  F1=0.6411, Prec=0.6109, Rec=0.6744, Acc=0.7073, ROC AUC=0.7784
  Best params: n_estimators=848, max_depth=28, min_samples_split=27, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=999


[I 2025-11-28 12:37:22,537] Trial 0 finished with value: 0.7679745021543113 and parameters: {'n_estimators': 1225, 'max_depth': 17, 'min_samples_split': 6, 'min_samples_leaf': 13}. Best is trial 0 with value: 0.7679745021543113.
[I 2025-11-28 12:37:25,950] Trial 1 finished with value: 0.7645744473801862 and parameters: {'n_estimators': 227, 'max_depth': 12, 'min_samples_split': 18, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7679745021543113.
[I 2025-11-28 12:37:43,429] Trial 2 finished with value: 0.7731219663859298 and parameters: {'n_estimators': 980, 'max_depth': 22, 'min_samples_split': 32, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.7731219663859298.
[I 2025-11-28 12:37:50,071] Trial 3 finished with value: 0.7583212724426782 and parameters: {'n_estimators': 580, 'max_depth': 8, 'min_samples_split': 29, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.7731219663859298.
[I 2025-11-28 12:38:10,760] Trial 4 finished with value: 0.7675712625048657 and paramet


Seed 999 | Outer fold 3
  F1=0.6279, Prec=0.6138, Rec=0.6426, Acc=0.7049, ROC AUC=0.7734
  Best params: n_estimators=1242, max_depth=28, min_samples_split=13, min_samples_leaf=2, class_weight=balanced, n_jobs=-1, random_state=999


[I 2025-11-28 12:51:16,484] Trial 0 finished with value: 0.7661785781297392 and parameters: {'n_estimators': 1225, 'max_depth': 17, 'min_samples_split': 6, 'min_samples_leaf': 13}. Best is trial 0 with value: 0.7661785781297392.
[I 2025-11-28 12:51:19,858] Trial 1 finished with value: 0.7622393996626848 and parameters: {'n_estimators': 227, 'max_depth': 12, 'min_samples_split': 18, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7661785781297392.
[I 2025-11-28 12:51:37,298] Trial 2 finished with value: 0.771344902586573 and parameters: {'n_estimators': 980, 'max_depth': 22, 'min_samples_split': 32, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.771344902586573.
[I 2025-11-28 12:51:43,816] Trial 3 finished with value: 0.756181441515074 and parameters: {'n_estimators': 580, 'max_depth': 8, 'min_samples_split': 29, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.771344902586573.
[I 2025-11-28 12:52:04,130] Trial 4 finished with value: 0.7656217122753004 and parameters:


Seed 999 | Outer fold 4
  F1=0.6369, Prec=0.6131, Rec=0.6626, Acc=0.7072, ROC AUC=0.7729
  Best params: n_estimators=900, max_depth=30, min_samples_split=30, min_samples_leaf=1, class_weight=balanced, n_jobs=-1, random_state=999


[I 2025-11-28 13:03:36,446] Trial 0 finished with value: 0.7663089008808533 and parameters: {'n_estimators': 1225, 'max_depth': 17, 'min_samples_split': 6, 'min_samples_leaf': 13}. Best is trial 0 with value: 0.7663089008808533.
[I 2025-11-28 13:03:39,828] Trial 1 finished with value: 0.7628000261030808 and parameters: {'n_estimators': 227, 'max_depth': 12, 'min_samples_split': 18, 'min_samples_leaf': 12}. Best is trial 0 with value: 0.7663089008808533.
[I 2025-11-28 13:03:57,328] Trial 2 finished with value: 0.7715661127622925 and parameters: {'n_estimators': 980, 'max_depth': 22, 'min_samples_split': 32, 'min_samples_leaf': 3}. Best is trial 2 with value: 0.7715661127622925.
[I 2025-11-28 13:04:03,953] Trial 3 finished with value: 0.7569573649380562 and parameters: {'n_estimators': 580, 'max_depth': 8, 'min_samples_split': 29, 'min_samples_leaf': 1}. Best is trial 2 with value: 0.7715661127622925.
[I 2025-11-28 13:04:24,705] Trial 4 finished with value: 0.7658811188832716 and paramet


Seed 999 | Outer fold 5
  F1=0.6350, Prec=0.6069, Rec=0.6659, Acc=0.7033, ROC AUC=0.7773
  Best params: n_estimators=959, max_depth=29, min_samples_split=29, min_samples_leaf=2, class_weight=balanced, n_jobs=-1, random_state=999

--- Summary for seed 999 (RANDOM FOREST, NO TRUST, ROC AUC) ---
  F1:        mean=0.6374, std=0.0061
  Precision: mean=0.6096, std=0.0039
  Recall:    mean=0.6681, std=0.0170
  Accuracy:  mean=0.7055, std=0.0015
  ROC AUC:   mean=0.7754, std=0.0021

===== GLOBAL SUMMARY – RANDOM FOREST, NO TRUST, 5 SEEDS x 5 OUTER FOLDS (ROC AUC) =====
Global F1:        mean=0.6354, std=0.0083
Global Precision: mean=0.6139, std=0.0078
Global Recall:    mean=0.6589, std=0.0193
Global Accuracy:  mean=0.7071, std=0.0049
Global ROC AUC:   mean=0.7757, std=0.0049

===== BEST HYPERPARAMETERS ACROSS ALL SEEDS AND FOLDS (by ROC AUC) – RANDOM FOREST =====
Best ROC AUC: 0.7842
Best hyperparameters:
n_estimators: 1122
max_depth: 30
min_samples_split: 31
min_samples_leaf: 1
class_weight:

In [None]:
# CATBOOST (NO TRUST, OHE) – ROC AUC       
import numpy as np
import optuna
from optuna.samplers import TPESampler

import pandas as pd

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import (
    f1_score,
    precision_score,
    recall_score,
    accuracy_score,
    roc_auc_score,
)
from joblib import dump

from catboost import CatBoostClassifier


# ============================================================
# CATBOOST (NO TRUST, OHE) – "catboost_ohe_auc"
# ============================================================

# 1. Column split
cat_cols = [
    c for c in X_train_notrust.columns
    if str(X_train_notrust[c].dtype) in ("object", "category")
]
num_cols = [c for c in X_train_notrust.columns if c not in cat_cols]

# 2. Preprocessing: OHE + MinMax
ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=False)

preprocess = ColumnTransformer(
    transformers=[
        ("num", MinMaxScaler(), num_cols),
        ("cat", ohe, cat_cols),
    ],
    remainder="drop",
)

# 3. scale_pos_weight (neg/pos)
pos = int((y_train == 1).sum())
neg = int((y_train == 0).sum())
base_spw = neg / max(pos, 1)

# 4. Seeds
seeds = [0, 7, 42, 123, 999]

# Aggregated lists for global statistics
all_outer_f1_catboost = []
all_outer_prec_catboost = []
all_outer_rec_catboost = []
all_outer_acc_catboost = []
all_outer_auc_catboost = []

# Best model by ROC AUC (across all seeds and folds)
best_global_auc_catboost = -1.0
best_global_params_catboost = None

# Detailed results: seed + fold
rows_catboost = []


# ============================================================
# NESTED CV – CATBOOST_OHE (ROC AUC)
# ============================================================
for seed in seeds:
    print("\n" + "=" * 70)
    print(f"=== NESTED CV – SEED: {seed} (CATBOOST_OHE, NO TRUST, ROC AUC) ===")
    print("=" * 70)

    outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

    outer_f1_scores = []
    outer_precisions = []
    outer_recalls = []
    outer_accuracies = []
    outer_aucs = []

    for fold, (train_idx, val_idx) in enumerate(
        outer_cv.split(X_train_notrust, y_train),
        start=1
    ):
        X_tr = X_train_notrust.iloc[train_idx].copy()
        X_val = X_train_notrust.iloc[val_idx].copy()
        y_tr = y_train.iloc[train_idx]
        y_val = y_train.iloc[val_idx]

        inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)

        # ----------------- Optuna objective -----------------
        def objective_catboost_nested(trial):
            tune_params_catboost = {
                "iterations":        trial.suggest_int("iterations", 100, 1500),
                "learning_rate":     trial.suggest_float("learning_rate", 0.01, 0.5, log=True),
                "depth":             trial.suggest_int("depth", 4, 10),
                "l2_leaf_reg":       trial.suggest_float("l2_leaf_reg", 1e-4, 1.0, log=True),
                "subsample":         trial.suggest_float("subsample", 0.5, 1.0),
                "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.5, 1.0),
            }

            fixed_params_catboost = {
                "scale_pos_weight": base_spw,
                "random_state":     seed,
                "verbose":          0,
                "loss_function":    "Logloss",
                "eval_metric":      "Logloss",
            }

            model = CatBoostClassifier(
                **tune_params_catboost,
                **fixed_params_catboost
            )

            pipe = Pipeline([
                ("pre", preprocess),
                ("clf", model),
            ])

            # ROC AUC as scoring metric
            score = cross_val_score(
                pipe,
                X_tr,
                y_tr,
                cv=inner_cv,
                scoring="roc_auc",
                n_jobs=-1,
            ).mean()

            return score
        # ----------------------------------------------------

        study_nested_catboost = optuna.create_study(
            direction="maximize",
            sampler=TPESampler(seed=seed),
        )
        study_nested_catboost.optimize(
            objective_catboost_nested,
            n_trials=50,
            show_progress_bar=False
        )

        best_params_catboost = study_nested_catboost.best_params.copy()
        best_params_full_catboost = best_params_catboost.copy()
        best_params_full_catboost.update({
            "scale_pos_weight": base_spw,
            "random_state":     seed,
            "verbose":          0,
            "loss_function":    "Logloss",
            "eval_metric":      "Logloss",
        })

        # Model for this outer fold
        catboost_fold_model = Pipeline([
            ("pre", preprocess),
            ("clf", CatBoostClassifier(**best_params_full_catboost)),
        ])
        catboost_fold_model.fit(X_tr, y_tr)

        y_val_pred = catboost_fold_model.predict(X_val)
        y_val_proba = catboost_fold_model.predict_proba(X_val)[:, 1]

        f1_val = f1_score(y_val, y_val_pred)  # class 1 is the positive class by default
        prec_val = precision_score(y_val, y_val_pred, zero_division=0)
        rec_val = recall_score(y_val, y_val_pred, zero_division=0)
        acc_val = accuracy_score(y_val, y_val_pred)
        auc_val = roc_auc_score(y_val, y_val_proba)

        outer_f1_scores.append(f1_val)
        outer_precisions.append(prec_val)
        outer_recalls.append(rec_val)
        outer_accuracies.append(acc_val)
        outer_aucs.append(auc_val)

        all_outer_f1_catboost.append(f1_val)
        all_outer_prec_catboost.append(prec_val)
        all_outer_rec_catboost.append(rec_val)
        all_outer_acc_catboost.append(acc_val)
        all_outer_auc_catboost.append(auc_val)

        rows_catboost.append({
            "seed": seed,
            "fold": fold,
            "F1": f1_val,
            "Precision": prec_val,
            "Recall": rec_val,
            "Accuracy": acc_val,
            "ROC_AUC": auc_val,
        })

        if auc_val > best_global_auc_catboost:
            best_global_auc_catboost = auc_val
            best_global_params_catboost = best_params_full_catboost.copy()

        print(f"\nSeed {seed} | Outer fold {fold}")
        print(
            f"  F1={f1_val:.4f}, Prec={prec_val:.4f}, Rec={rec_val:.4f}, "
            f"Acc={acc_val:.4f}, ROC AUC={auc_val:.4f}"
        )
        print(
            "  Params: "
            f"iterations={best_params_catboost['iterations']}, "
            f"depth={best_params_catboost['depth']}, "
            f"lr={best_params_catboost['learning_rate']:.4f}, "
            f"l2_leaf_reg={best_params_catboost['l2_leaf_reg']:.4f}, "
            f"subsample={best_params_catboost['subsample']:.3f}, "
            f"colsample_bylevel={best_params_catboost['colsample_bylevel']:.3f}"
        )

    # Summary for this seed
    outer_f1_scores = np.array(outer_f1_scores)
    outer_precisions = np.array(outer_precisions)
    outer_recalls = np.array(outer_recalls)
    outer_accuracies = np.array(outer_accuracies)
    outer_aucs = np.array(outer_aucs)

    print("\n--- Summary for seed", seed, "(CATBOOST_OHE, NO TRUST, ROC AUC) ---")
    print(f"  F1:        mean={outer_f1_scores.mean():.4f}, std={outer_f1_scores.std():.4f}")
    print(f"  Precision: mean={outer_precisions.mean():.4f}, std={outer_precisions.std():.4f}")
    print(f"  Recall:    mean={outer_recalls.mean():.4f}, std={outer_recalls.std():.4f}")
    print(f"  Accuracy:  mean={outer_accuracies.mean():.4f}, std={outer_accuracies.std():.4f}")
    print(f"  ROC AUC:   mean={outer_aucs.mean():.4f}, std={outer_aucs.std():.4f}")


# ============================================================
# GLOBAL SUMMARY – CATBOOST_OHE (ROC AUC)
# ============================================================
all_outer_f1_catboost = np.array(all_outer_f1_catboost)
all_outer_prec_catboost = np.array(all_outer_prec_catboost)
all_outer_rec_catboost = np.array(all_outer_rec_catboost)
all_outer_acc_catboost = np.array(all_outer_acc_catboost)
all_outer_auc_catboost = np.array(all_outer_auc_catboost)

print("\n" + "=" * 70)
print("===== GLOBAL SUMMARY – CATBOOST_OHE, NO TRUST, 5 SEEDS x 5 OUTER FOLDS (ROC AUC) =====")
print("=" * 70)
print(f"Global F1:        mean={all_outer_f1_catboost.mean():.4f}, std={all_outer_f1_catboost.std():.4f}")
print(f"Global Precision: mean={all_outer_prec_catboost.mean():.4f}, std={all_outer_prec_catboost.std():.4f}")
print(f"Global Recall:    mean={all_outer_rec_catboost.mean():.4f}, std={all_outer_rec_catboost.std():.4f}")
print(f"Global Accuracy:  mean={all_outer_acc_catboost.mean():.4f}, std={all_outer_acc_catboost.std():.4f}")
print(f"Global ROC AUC:   mean={all_outer_auc_catboost.mean():.4f}, std={all_outer_auc_catboost.std():.4f}")

print("\n===== BEST HYPERPARAMETERS ACROSS ALL SEEDS AND FOLDS (by ROC AUC) – CATBOOST_OHE =====")
print(f"Best ROC AUC: {best_global_auc_catboost:.4f}")
print("Best hyperparameters:")
for k, v in best_global_params_catboost.items():
    print(f"{k}: {v}")

# Detailed table of results: seed + fold
results_catboost_df = pd.DataFrame(rows_catboost)
print("\n===== DETAILED RESULTS PER SEED AND FOLD (CATBOOST_OHE, ROC AUC) =====")
print(results_catboost_df)

results_catboost_df.to_csv("catboost_ohe_nested_cv_detailed_results_roc_auc.csv", index=False)


[I 2025-11-28 13:13:50,906] A new study created in memory with name: no-name-19759dd5-d48d-4c60-80bd-dfea97a9fbe3



=== NESTED CV – SEED: 0 (CATBOOST_OHE, NO TRUST, ROC AUC) ===


[I 2025-11-28 13:14:06,260] Trial 0 finished with value: 0.7385129456458696 and parameters: {'iterations': 868, 'learning_rate': 0.16409119617016144, 'depth': 8, 'l2_leaf_reg': 0.01511933646764101, 'subsample': 0.7118273996694524, 'colsample_bylevel': 0.8229470565333281}. Best is trial 0 with value: 0.7385129456458696.
[I 2025-11-28 13:14:31,505] Trial 1 finished with value: 0.7183431691909457 and parameters: {'iterations': 713, 'learning_rate': 0.3274127435518369, 'depth': 10, 'l2_leaf_reg': 0.0034179529120610124, 'subsample': 0.8958625190413323, 'colsample_bylevel': 0.7644474598764522}. Best is trial 0 with value: 0.7385129456458696.
[I 2025-11-28 13:14:38,817] Trial 2 finished with value: 0.7365847505522266 and parameters: {'iterations': 895, 'learning_rate': 0.37373255040743125, 'depth': 4, 'l2_leaf_reg': 0.00022310905607443037, 'subsample': 0.5101091987201629, 'colsample_bylevel': 0.916309922773969}. Best is trial 0 with value: 0.7385129456458696.
[I 2025-11-28 13:15:21,511] Trial


Seed 0 | Outer fold 1
  F1=0.6495, Prec=0.5961, Rec=0.7135, Acc=0.7016, ROC AUC=0.7810
  Params: iterations=1349, depth=5, lr=0.0272, l2_leaf_reg=0.9690, subsample=0.641, colsample_bylevel=0.659


[I 2025-11-28 13:26:00,997] Trial 0 finished with value: 0.7366876383965039 and parameters: {'iterations': 868, 'learning_rate': 0.16409119617016144, 'depth': 8, 'l2_leaf_reg': 0.01511933646764101, 'subsample': 0.7118273996694524, 'colsample_bylevel': 0.8229470565333281}. Best is trial 0 with value: 0.7366876383965039.
[I 2025-11-28 13:26:26,959] Trial 1 finished with value: 0.7229725651395006 and parameters: {'iterations': 713, 'learning_rate': 0.3274127435518369, 'depth': 10, 'l2_leaf_reg': 0.0034179529120610124, 'subsample': 0.8958625190413323, 'colsample_bylevel': 0.7644474598764522}. Best is trial 0 with value: 0.7366876383965039.
[I 2025-11-28 13:26:34,676] Trial 2 finished with value: 0.7326344253301489 and parameters: {'iterations': 895, 'learning_rate': 0.37373255040743125, 'depth': 4, 'l2_leaf_reg': 0.00022310905607443037, 'subsample': 0.5101091987201629, 'colsample_bylevel': 0.916309922773969}. Best is trial 0 with value: 0.7366876383965039.
[I 2025-11-28 13:27:18,900] Trial


Seed 0 | Outer fold 2
  F1=0.6398, Prec=0.5861, Rec=0.7043, Acc=0.6926, ROC AUC=0.7703
  Params: iterations=1350, depth=5, lr=0.0243, l2_leaf_reg=0.1475, subsample=0.695, colsample_bylevel=0.659


[I 2025-11-28 13:37:54,094] Trial 0 finished with value: 0.7327345672224616 and parameters: {'iterations': 868, 'learning_rate': 0.16409119617016144, 'depth': 8, 'l2_leaf_reg': 0.01511933646764101, 'subsample': 0.7118273996694524, 'colsample_bylevel': 0.8229470565333281}. Best is trial 0 with value: 0.7327345672224616.
[I 2025-11-28 13:38:20,351] Trial 1 finished with value: 0.7163144454235987 and parameters: {'iterations': 713, 'learning_rate': 0.3274127435518369, 'depth': 10, 'l2_leaf_reg': 0.0034179529120610124, 'subsample': 0.8958625190413323, 'colsample_bylevel': 0.7644474598764522}. Best is trial 0 with value: 0.7327345672224616.
[I 2025-11-28 13:38:28,250] Trial 2 finished with value: 0.7344253021095009 and parameters: {'iterations': 895, 'learning_rate': 0.37373255040743125, 'depth': 4, 'l2_leaf_reg': 0.00022310905607443037, 'subsample': 0.5101091987201629, 'colsample_bylevel': 0.916309922773969}. Best is trial 2 with value: 0.7344253021095009.
[I 2025-11-28 13:39:13,513] Trial


Seed 0 | Outer fold 3
  F1=0.6536, Prec=0.5991, Rec=0.7189, Acc=0.7047, ROC AUC=0.7843
  Params: iterations=1451, depth=6, lr=0.0174, l2_leaf_reg=0.0567, subsample=0.623, colsample_bylevel=0.572


[I 2025-11-28 13:50:11,939] Trial 0 finished with value: 0.7376664008613759 and parameters: {'iterations': 868, 'learning_rate': 0.16409119617016144, 'depth': 8, 'l2_leaf_reg': 0.01511933646764101, 'subsample': 0.7118273996694524, 'colsample_bylevel': 0.8229470565333281}. Best is trial 0 with value: 0.7376664008613759.
[I 2025-11-28 13:50:37,585] Trial 1 finished with value: 0.7167604703703917 and parameters: {'iterations': 713, 'learning_rate': 0.3274127435518369, 'depth': 10, 'l2_leaf_reg': 0.0034179529120610124, 'subsample': 0.8958625190413323, 'colsample_bylevel': 0.7644474598764522}. Best is trial 0 with value: 0.7376664008613759.
[I 2025-11-28 13:50:45,313] Trial 2 finished with value: 0.7372171990139907 and parameters: {'iterations': 895, 'learning_rate': 0.37373255040743125, 'depth': 4, 'l2_leaf_reg': 0.00022310905607443037, 'subsample': 0.5101091987201629, 'colsample_bylevel': 0.916309922773969}. Best is trial 0 with value: 0.7376664008613759.
[I 2025-11-28 13:51:29,175] Trial


Seed 0 | Outer fold 4
  F1=0.6460, Prec=0.5854, Rec=0.7205, Acc=0.6939, ROC AUC=0.7764
  Params: iterations=1428, depth=6, lr=0.0137, l2_leaf_reg=0.9455, subsample=0.606, colsample_bylevel=0.515


[I 2025-11-28 14:02:42,674] Trial 0 finished with value: 0.7346648526248368 and parameters: {'iterations': 868, 'learning_rate': 0.16409119617016144, 'depth': 8, 'l2_leaf_reg': 0.01511933646764101, 'subsample': 0.7118273996694524, 'colsample_bylevel': 0.8229470565333281}. Best is trial 0 with value: 0.7346648526248368.
[I 2025-11-28 14:03:08,275] Trial 1 finished with value: 0.7243270609026213 and parameters: {'iterations': 713, 'learning_rate': 0.3274127435518369, 'depth': 10, 'l2_leaf_reg': 0.0034179529120610124, 'subsample': 0.8958625190413323, 'colsample_bylevel': 0.7644474598764522}. Best is trial 0 with value: 0.7346648526248368.
[I 2025-11-28 14:03:16,118] Trial 2 finished with value: 0.7343415107516075 and parameters: {'iterations': 895, 'learning_rate': 0.37373255040743125, 'depth': 4, 'l2_leaf_reg': 0.00022310905607443037, 'subsample': 0.5101091987201629, 'colsample_bylevel': 0.916309922773969}. Best is trial 0 with value: 0.7346648526248368.
[I 2025-11-28 14:04:00,885] Trial


Seed 0 | Outer fold 5
  F1=0.6602, Prec=0.6043, Rec=0.7275, Acc=0.7097, ROC AUC=0.7851
  Params: iterations=1495, depth=7, lr=0.0128, l2_leaf_reg=0.6536, subsample=0.586, colsample_bylevel=0.530

--- Summary for seed 0 (CATBOOST_OHE, NO TRUST, ROC AUC) ---
  F1:        mean=0.6498, std=0.0069
  Precision: mean=0.5942, std=0.0074
  Recall:    mean=0.7169, std=0.0078
  Accuracy:  mean=0.7005, std=0.0065
  ROC AUC:   mean=0.7794, std=0.0055

=== NESTED CV – SEED: 7 (CATBOOST_OHE, NO TRUST, ROC AUC) ===


[I 2025-11-28 14:15:51,206] Trial 0 finished with value: 0.7503553985369038 and parameters: {'iterations': 206, 'learning_rate': 0.2113774399969529, 'depth': 7, 'l2_leaf_reg': 0.0783178418343345, 'subsample': 0.9889947559983013, 'colsample_bylevel': 0.7692479352052168}. Best is trial 0 with value: 0.7503553985369038.
[I 2025-11-28 14:15:59,554] Trial 1 finished with value: 0.7705577012544328 and parameters: {'iterations': 802, 'learning_rate': 0.013256006682886073, 'depth': 5, 'l2_leaf_reg': 0.00998918377985214, 'subsample': 0.8396149980604702, 'colsample_bylevel': 0.9018695180521877}. Best is trial 1 with value: 0.7705577012544328.
[I 2025-11-28 14:16:06,982] Trial 2 finished with value: 0.7697807037310519 and parameters: {'iterations': 633, 'learning_rate': 0.012942669925656009, 'depth': 6, 'l2_leaf_reg': 0.434884682155233, 'subsample': 0.6066926767899578, 'colsample_bylevel': 0.7260619809088416}. Best is trial 1 with value: 0.7705577012544328.
[I 2025-11-28 14:16:30,597] Trial 3 fin


Seed 7 | Outer fold 1
  F1=0.6477, Prec=0.5918, Rec=0.7153, Acc=0.6985, ROC AUC=0.7789
  Params: iterations=996, depth=6, lr=0.0212, l2_leaf_reg=0.6112, subsample=0.911, colsample_bylevel=0.543


[I 2025-11-28 14:28:16,762] Trial 0 finished with value: 0.7511921729237317 and parameters: {'iterations': 206, 'learning_rate': 0.2113774399969529, 'depth': 7, 'l2_leaf_reg': 0.0783178418343345, 'subsample': 0.9889947559983013, 'colsample_bylevel': 0.7692479352052168}. Best is trial 0 with value: 0.7511921729237317.
[I 2025-11-28 14:28:25,168] Trial 1 finished with value: 0.771031424626338 and parameters: {'iterations': 802, 'learning_rate': 0.013256006682886073, 'depth': 5, 'l2_leaf_reg': 0.00998918377985214, 'subsample': 0.8396149980604702, 'colsample_bylevel': 0.9018695180521877}. Best is trial 1 with value: 0.771031424626338.
[I 2025-11-28 14:28:32,593] Trial 2 finished with value: 0.7705360582005557 and parameters: {'iterations': 633, 'learning_rate': 0.012942669925656009, 'depth': 6, 'l2_leaf_reg': 0.434884682155233, 'subsample': 0.6066926767899578, 'colsample_bylevel': 0.7260619809088416}. Best is trial 1 with value: 0.771031424626338.
[I 2025-11-28 14:28:56,216] Trial 3 finish


Seed 7 | Outer fold 2
  F1=0.6446, Prec=0.5912, Rec=0.7087, Acc=0.6972, ROC AUC=0.7736
  Params: iterations=1423, depth=7, lr=0.0150, l2_leaf_reg=0.1782, subsample=0.826, colsample_bylevel=0.659


[I 2025-11-28 14:41:49,756] Trial 0 finished with value: 0.7494183798422959 and parameters: {'iterations': 206, 'learning_rate': 0.2113774399969529, 'depth': 7, 'l2_leaf_reg': 0.0783178418343345, 'subsample': 0.9889947559983013, 'colsample_bylevel': 0.7692479352052168}. Best is trial 0 with value: 0.7494183798422959.
[I 2025-11-28 14:41:58,003] Trial 1 finished with value: 0.7692517530559417 and parameters: {'iterations': 802, 'learning_rate': 0.013256006682886073, 'depth': 5, 'l2_leaf_reg': 0.00998918377985214, 'subsample': 0.8396149980604702, 'colsample_bylevel': 0.9018695180521877}. Best is trial 1 with value: 0.7692517530559417.
[I 2025-11-28 14:42:05,475] Trial 2 finished with value: 0.7685235501634965 and parameters: {'iterations': 633, 'learning_rate': 0.012942669925656009, 'depth': 6, 'l2_leaf_reg': 0.434884682155233, 'subsample': 0.6066926767899578, 'colsample_bylevel': 0.7260619809088416}. Best is trial 1 with value: 0.7692517530559417.
[I 2025-11-28 14:42:28,850] Trial 3 fin


Seed 7 | Outer fold 3
  F1=0.6514, Prec=0.5967, Rec=0.7171, Acc=0.7026, ROC AUC=0.7802
  Params: iterations=1395, depth=6, lr=0.0151, l2_leaf_reg=0.1484, subsample=0.807, colsample_bylevel=0.523


[I 2025-11-28 14:54:32,405] Trial 0 finished with value: 0.7496133291240864 and parameters: {'iterations': 206, 'learning_rate': 0.2113774399969529, 'depth': 7, 'l2_leaf_reg': 0.0783178418343345, 'subsample': 0.9889947559983013, 'colsample_bylevel': 0.7692479352052168}. Best is trial 0 with value: 0.7496133291240864.
[I 2025-11-28 14:54:40,874] Trial 1 finished with value: 0.7694577731552044 and parameters: {'iterations': 802, 'learning_rate': 0.013256006682886073, 'depth': 5, 'l2_leaf_reg': 0.00998918377985214, 'subsample': 0.8396149980604702, 'colsample_bylevel': 0.9018695180521877}. Best is trial 1 with value: 0.7694577731552044.
[I 2025-11-28 14:54:48,319] Trial 2 finished with value: 0.7692144729340858 and parameters: {'iterations': 633, 'learning_rate': 0.012942669925656009, 'depth': 6, 'l2_leaf_reg': 0.434884682155233, 'subsample': 0.6066926767899578, 'colsample_bylevel': 0.7260619809088416}. Best is trial 1 with value: 0.7694577731552044.
[I 2025-11-28 14:55:12,472] Trial 3 fin


Seed 7 | Outer fold 4
  F1=0.6518, Prec=0.6007, Rec=0.7124, Acc=0.7050, ROC AUC=0.7855
  Params: iterations=1483, depth=6, lr=0.0151, l2_leaf_reg=0.5377, subsample=0.846, colsample_bylevel=0.648


[I 2025-11-28 15:08:04,097] Trial 0 finished with value: 0.7492782665775207 and parameters: {'iterations': 206, 'learning_rate': 0.2113774399969529, 'depth': 7, 'l2_leaf_reg': 0.0783178418343345, 'subsample': 0.9889947559983013, 'colsample_bylevel': 0.7692479352052168}. Best is trial 0 with value: 0.7492782665775207.
[I 2025-11-28 15:08:12,310] Trial 1 finished with value: 0.7700090243450516 and parameters: {'iterations': 802, 'learning_rate': 0.013256006682886073, 'depth': 5, 'l2_leaf_reg': 0.00998918377985214, 'subsample': 0.8396149980604702, 'colsample_bylevel': 0.9018695180521877}. Best is trial 1 with value: 0.7700090243450516.
[I 2025-11-28 15:08:19,626] Trial 2 finished with value: 0.7693325764073865 and parameters: {'iterations': 633, 'learning_rate': 0.012942669925656009, 'depth': 6, 'l2_leaf_reg': 0.434884682155233, 'subsample': 0.6066926767899578, 'colsample_bylevel': 0.7260619809088416}. Best is trial 1 with value: 0.7700090243450516.
[I 2025-11-28 15:08:43,146] Trial 3 fin


Seed 7 | Outer fold 5
  F1=0.6538, Prec=0.5994, Rec=0.7190, Acc=0.7049, ROC AUC=0.7820
  Params: iterations=451, depth=5, lr=0.0579, l2_leaf_reg=0.2770, subsample=0.814, colsample_bylevel=0.505

--- Summary for seed 7 (CATBOOST_OHE, NO TRUST, ROC AUC) ---
  F1:        mean=0.6499, std=0.0033
  Precision: mean=0.5960, std=0.0039
  Recall:    mean=0.7145, std=0.0036
  Accuracy:  mean=0.7016, std=0.0032
  ROC AUC:   mean=0.7800, std=0.0039

=== NESTED CV – SEED: 42 (CATBOOST_OHE, NO TRUST, ROC AUC) ===


[I 2025-11-28 15:18:07,857] Trial 0 finished with value: 0.7161758431204014 and parameters: {'iterations': 624, 'learning_rate': 0.4123206532618726, 'depth': 9, 'l2_leaf_reg': 0.024810409748678097, 'subsample': 0.5780093202212182, 'colsample_bylevel': 0.5779972601681014}. Best is trial 0 with value: 0.7161758431204014.
[I 2025-11-28 15:18:11,488] Trial 1 finished with value: 0.7286092999500892 and parameters: {'iterations': 181, 'learning_rate': 0.29621516588303487, 'depth': 8, 'l2_leaf_reg': 0.06796578090758151, 'subsample': 0.5102922471479012, 'colsample_bylevel': 0.9849549260809971}. Best is trial 1 with value: 0.7286092999500892.
[I 2025-11-28 15:18:24,069] Trial 2 finished with value: 0.7732662312036855 and parameters: {'iterations': 1266, 'learning_rate': 0.022948683681130556, 'depth': 5, 'l2_leaf_reg': 0.0005415244119402539, 'subsample': 0.6521211214797689, 'colsample_bylevel': 0.762378215816119}. Best is trial 2 with value: 0.7732662312036855.
[I 2025-11-28 15:18:35,975] Trial 


Seed 42 | Outer fold 1
  F1=0.6495, Prec=0.5931, Rec=0.7179, Acc=0.6998, ROC AUC=0.7853
  Params: iterations=1496, depth=6, lr=0.0192, l2_leaf_reg=0.4773, subsample=0.631, colsample_bylevel=0.526


[I 2025-11-28 15:29:30,036] Trial 0 finished with value: 0.7217616265594611 and parameters: {'iterations': 624, 'learning_rate': 0.4123206532618726, 'depth': 9, 'l2_leaf_reg': 0.024810409748678097, 'subsample': 0.5780093202212182, 'colsample_bylevel': 0.5779972601681014}. Best is trial 0 with value: 0.7217616265594611.
[I 2025-11-28 15:29:33,675] Trial 1 finished with value: 0.7277044101912354 and parameters: {'iterations': 181, 'learning_rate': 0.29621516588303487, 'depth': 8, 'l2_leaf_reg': 0.06796578090758151, 'subsample': 0.5102922471479012, 'colsample_bylevel': 0.9849549260809971}. Best is trial 1 with value: 0.7277044101912354.
[I 2025-11-28 15:29:45,677] Trial 2 finished with value: 0.7745864466368033 and parameters: {'iterations': 1266, 'learning_rate': 0.022948683681130556, 'depth': 5, 'l2_leaf_reg': 0.0005415244119402539, 'subsample': 0.6521211214797689, 'colsample_bylevel': 0.762378215816119}. Best is trial 2 with value: 0.7745864466368033.
[I 2025-11-28 15:29:57,826] Trial 


Seed 42 | Outer fold 2
  F1=0.6418, Prec=0.5874, Rec=0.7072, Acc=0.6941, ROC AUC=0.7744
  Params: iterations=1206, depth=6, lr=0.0185, l2_leaf_reg=0.1815, subsample=0.892, colsample_bylevel=0.549


[I 2025-11-28 15:40:27,153] Trial 0 finished with value: 0.7180471094904304 and parameters: {'iterations': 624, 'learning_rate': 0.4123206532618726, 'depth': 9, 'l2_leaf_reg': 0.024810409748678097, 'subsample': 0.5780093202212182, 'colsample_bylevel': 0.5779972601681014}. Best is trial 0 with value: 0.7180471094904304.
[I 2025-11-28 15:40:30,901] Trial 1 finished with value: 0.7289169952597926 and parameters: {'iterations': 181, 'learning_rate': 0.29621516588303487, 'depth': 8, 'l2_leaf_reg': 0.06796578090758151, 'subsample': 0.5102922471479012, 'colsample_bylevel': 0.9849549260809971}. Best is trial 1 with value: 0.7289169952597926.
[I 2025-11-28 15:40:43,089] Trial 2 finished with value: 0.7743704363499887 and parameters: {'iterations': 1266, 'learning_rate': 0.022948683681130556, 'depth': 5, 'l2_leaf_reg': 0.0005415244119402539, 'subsample': 0.6521211214797689, 'colsample_bylevel': 0.762378215816119}. Best is trial 2 with value: 0.7743704363499887.
[I 2025-11-28 15:40:55,205] Trial 


Seed 42 | Outer fold 3
  F1=0.6453, Prec=0.5889, Rec=0.7138, Acc=0.6960, ROC AUC=0.7763
  Params: iterations=1074, depth=6, lr=0.0179, l2_leaf_reg=0.8439, subsample=0.522, colsample_bylevel=0.638


[I 2025-11-28 15:50:17,902] Trial 0 finished with value: 0.7215382435681207 and parameters: {'iterations': 624, 'learning_rate': 0.4123206532618726, 'depth': 9, 'l2_leaf_reg': 0.024810409748678097, 'subsample': 0.5780093202212182, 'colsample_bylevel': 0.5779972601681014}. Best is trial 0 with value: 0.7215382435681207.
[I 2025-11-28 15:50:21,580] Trial 1 finished with value: 0.7280939907276635 and parameters: {'iterations': 181, 'learning_rate': 0.29621516588303487, 'depth': 8, 'l2_leaf_reg': 0.06796578090758151, 'subsample': 0.5102922471479012, 'colsample_bylevel': 0.9849549260809971}. Best is trial 1 with value: 0.7280939907276635.
[I 2025-11-28 15:50:33,914] Trial 2 finished with value: 0.7759038159467536 and parameters: {'iterations': 1266, 'learning_rate': 0.022948683681130556, 'depth': 5, 'l2_leaf_reg': 0.0005415244119402539, 'subsample': 0.6521211214797689, 'colsample_bylevel': 0.762378215816119}. Best is trial 2 with value: 0.7759038159467536.
[I 2025-11-28 15:50:46,160] Trial 


Seed 42 | Outer fold 4
  F1=0.6491, Prec=0.5954, Rec=0.7135, Acc=0.7010, ROC AUC=0.7760
  Params: iterations=1121, depth=5, lr=0.0240, l2_leaf_reg=0.1654, subsample=0.655, colsample_bylevel=0.513


[I 2025-11-28 16:01:06,910] Trial 0 finished with value: 0.7157735631841753 and parameters: {'iterations': 624, 'learning_rate': 0.4123206532618726, 'depth': 9, 'l2_leaf_reg': 0.024810409748678097, 'subsample': 0.5780093202212182, 'colsample_bylevel': 0.5779972601681014}. Best is trial 0 with value: 0.7157735631841753.
[I 2025-11-28 16:01:10,593] Trial 1 finished with value: 0.7267362419151695 and parameters: {'iterations': 181, 'learning_rate': 0.29621516588303487, 'depth': 8, 'l2_leaf_reg': 0.06796578090758151, 'subsample': 0.5102922471479012, 'colsample_bylevel': 0.9849549260809971}. Best is trial 1 with value: 0.7267362419151695.
[I 2025-11-28 16:01:22,722] Trial 2 finished with value: 0.773144264113971 and parameters: {'iterations': 1266, 'learning_rate': 0.022948683681130556, 'depth': 5, 'l2_leaf_reg': 0.0005415244119402539, 'subsample': 0.6521211214797689, 'colsample_bylevel': 0.762378215816119}. Best is trial 2 with value: 0.773144264113971.
[I 2025-11-28 16:01:34,723] Trial 3 


Seed 42 | Outer fold 5
  F1=0.6516, Prec=0.5953, Rec=0.7198, Acc=0.7017, ROC AUC=0.7858
  Params: iterations=1018, depth=4, lr=0.0485, l2_leaf_reg=0.3787, subsample=0.768, colsample_bylevel=0.715

--- Summary for seed 42 (CATBOOST_OHE, NO TRUST, ROC AUC) ---
  F1:        mean=0.6475, std=0.0035
  Precision: mean=0.5920, std=0.0033
  Recall:    mean=0.7144, std=0.0043
  Accuracy:  mean=0.6985, std=0.0030
  ROC AUC:   mean=0.7796, std=0.0049

=== NESTED CV – SEED: 123 (CATBOOST_OHE, NO TRUST, ROC AUC) ===


[I 2025-11-28 16:10:31,050] Trial 0 finished with value: 0.7742792540951573 and parameters: {'iterations': 1075, 'learning_rate': 0.030629657963841296, 'depth': 5, 'l2_leaf_reg': 0.01604202084518248, 'subsample': 0.8597344848927815, 'colsample_bylevel': 0.7115532300622305}. Best is trial 0 with value: 0.7742792540951573.
[I 2025-11-28 16:10:50,787] Trial 1 finished with value: 0.7345074842319291 and parameters: {'iterations': 1474, 'learning_rate': 0.1457153097856983, 'depth': 7, 'l2_leaf_reg': 0.00370228692069884, 'subsample': 0.6715890080754348, 'colsample_bylevel': 0.8645248536920208}. Best is trial 0 with value: 0.7742792540951573.
[I 2025-11-28 16:10:58,842] Trial 2 finished with value: 0.7694497526395973 and parameters: {'iterations': 714, 'learning_rate': 0.012629639558632224, 'depth': 6, 'l2_leaf_reg': 0.08953268791955236, 'subsample': 0.59124586522675, 'colsample_bylevel': 0.5877258780737462}. Best is trial 0 with value: 0.7742792540951573.
[I 2025-11-28 16:11:13,803] Trial 3 


Seed 123 | Outer fold 1
  F1=0.6486, Prec=0.5947, Rec=0.7131, Acc=0.7005, ROC AUC=0.7813
  Params: iterations=1492, depth=5, lr=0.0195, l2_leaf_reg=0.5615, subsample=0.930, colsample_bylevel=0.503


[I 2025-11-28 16:22:01,355] Trial 0 finished with value: 0.7733621556173179 and parameters: {'iterations': 1075, 'learning_rate': 0.030629657963841296, 'depth': 5, 'l2_leaf_reg': 0.01604202084518248, 'subsample': 0.8597344848927815, 'colsample_bylevel': 0.7115532300622305}. Best is trial 0 with value: 0.7733621556173179.
[I 2025-11-28 16:22:21,190] Trial 1 finished with value: 0.7347338179925119 and parameters: {'iterations': 1474, 'learning_rate': 0.1457153097856983, 'depth': 7, 'l2_leaf_reg': 0.00370228692069884, 'subsample': 0.6715890080754348, 'colsample_bylevel': 0.8645248536920208}. Best is trial 0 with value: 0.7733621556173179.
[I 2025-11-28 16:22:29,365] Trial 2 finished with value: 0.7692877575930953 and parameters: {'iterations': 714, 'learning_rate': 0.012629639558632224, 'depth': 6, 'l2_leaf_reg': 0.08953268791955236, 'subsample': 0.59124586522675, 'colsample_bylevel': 0.5877258780737462}. Best is trial 0 with value: 0.7733621556173179.
[I 2025-11-28 16:22:44,416] Trial 3 


Seed 123 | Outer fold 2
  F1=0.6537, Prec=0.6032, Rec=0.7135, Acc=0.7071, ROC AUC=0.7842
  Params: iterations=1268, depth=5, lr=0.0220, l2_leaf_reg=0.6146, subsample=0.561, colsample_bylevel=0.530


[I 2025-11-28 16:32:56,360] Trial 0 finished with value: 0.7741066971305792 and parameters: {'iterations': 1075, 'learning_rate': 0.030629657963841296, 'depth': 5, 'l2_leaf_reg': 0.01604202084518248, 'subsample': 0.8597344848927815, 'colsample_bylevel': 0.7115532300622305}. Best is trial 0 with value: 0.7741066971305792.
[I 2025-11-28 16:33:16,267] Trial 1 finished with value: 0.7345117578519044 and parameters: {'iterations': 1474, 'learning_rate': 0.1457153097856983, 'depth': 7, 'l2_leaf_reg': 0.00370228692069884, 'subsample': 0.6715890080754348, 'colsample_bylevel': 0.8645248536920208}. Best is trial 0 with value: 0.7741066971305792.
[I 2025-11-28 16:33:24,289] Trial 2 finished with value: 0.7703518172774421 and parameters: {'iterations': 714, 'learning_rate': 0.012629639558632224, 'depth': 6, 'l2_leaf_reg': 0.08953268791955236, 'subsample': 0.59124586522675, 'colsample_bylevel': 0.5877258780737462}. Best is trial 0 with value: 0.7741066971305792.
[I 2025-11-28 16:33:39,478] Trial 3 


Seed 123 | Outer fold 3
  F1=0.6451, Prec=0.5862, Rec=0.7171, Acc=0.6943, ROC AUC=0.7768
  Params: iterations=1334, depth=6, lr=0.0183, l2_leaf_reg=0.5938, subsample=0.889, colsample_bylevel=0.684


[I 2025-11-28 16:43:55,398] Trial 0 finished with value: 0.7748995294446726 and parameters: {'iterations': 1075, 'learning_rate': 0.030629657963841296, 'depth': 5, 'l2_leaf_reg': 0.01604202084518248, 'subsample': 0.8597344848927815, 'colsample_bylevel': 0.7115532300622305}. Best is trial 0 with value: 0.7748995294446726.
[I 2025-11-28 16:44:15,107] Trial 1 finished with value: 0.7347742613142559 and parameters: {'iterations': 1474, 'learning_rate': 0.1457153097856983, 'depth': 7, 'l2_leaf_reg': 0.00370228692069884, 'subsample': 0.6715890080754348, 'colsample_bylevel': 0.8645248536920208}. Best is trial 0 with value: 0.7748995294446726.
[I 2025-11-28 16:44:23,244] Trial 2 finished with value: 0.7698146881251665 and parameters: {'iterations': 714, 'learning_rate': 0.012629639558632224, 'depth': 6, 'l2_leaf_reg': 0.08953268791955236, 'subsample': 0.59124586522675, 'colsample_bylevel': 0.5877258780737462}. Best is trial 0 with value: 0.7748995294446726.
[I 2025-11-28 16:44:38,376] Trial 3 


Seed 123 | Outer fold 4
  F1=0.6552, Prec=0.5940, Rec=0.7305, Acc=0.7020, ROC AUC=0.7822
  Params: iterations=1390, depth=7, lr=0.0128, l2_leaf_reg=0.4997, subsample=0.621, colsample_bylevel=0.779


[I 2025-11-28 16:55:13,382] Trial 0 finished with value: 0.7760609777345712 and parameters: {'iterations': 1075, 'learning_rate': 0.030629657963841296, 'depth': 5, 'l2_leaf_reg': 0.01604202084518248, 'subsample': 0.8597344848927815, 'colsample_bylevel': 0.7115532300622305}. Best is trial 0 with value: 0.7760609777345712.
[I 2025-11-28 16:55:33,008] Trial 1 finished with value: 0.737068082909162 and parameters: {'iterations': 1474, 'learning_rate': 0.1457153097856983, 'depth': 7, 'l2_leaf_reg': 0.00370228692069884, 'subsample': 0.6715890080754348, 'colsample_bylevel': 0.8645248536920208}. Best is trial 0 with value: 0.7760609777345712.
[I 2025-11-28 16:55:41,154] Trial 2 finished with value: 0.7728288505920675 and parameters: {'iterations': 714, 'learning_rate': 0.012629639558632224, 'depth': 6, 'l2_leaf_reg': 0.08953268791955236, 'subsample': 0.59124586522675, 'colsample_bylevel': 0.5877258780737462}. Best is trial 0 with value: 0.7760609777345712.
[I 2025-11-28 16:55:56,272] Trial 3 f


Seed 123 | Outer fold 5
  F1=0.6387, Prec=0.5830, Rec=0.7061, Acc=0.6903, ROC AUC=0.7706
  Params: iterations=1343, depth=5, lr=0.0191, l2_leaf_reg=0.3210, subsample=0.885, colsample_bylevel=0.503

--- Summary for seed 123 (CATBOOST_OHE, NO TRUST, ROC AUC) ---
  F1:        mean=0.6482, std=0.0060
  Precision: mean=0.5922, std=0.0071
  Recall:    mean=0.7161, std=0.0080
  Accuracy:  mean=0.6988, std=0.0059
  ROC AUC:   mean=0.7790, std=0.0049

=== NESTED CV – SEED: 999 (CATBOOST_OHE, NO TRUST, ROC AUC) ===


[I 2025-11-28 17:06:18,545] Trial 0 finished with value: 0.7722735156986319 and parameters: {'iterations': 1225, 'learning_rate': 0.07874891104269156, 'depth': 4, 'l2_leaf_reg': 0.03620143424647891, 'subsample': 0.54546263138704, 'colsample_bylevel': 0.6661128403965062}. Best is trial 0 with value: 0.7722735156986319.
[I 2025-11-28 17:06:30,847] Trial 1 finished with value: 0.7520084982186925 and parameters: {'iterations': 698, 'learning_rate': 0.08747518429707113, 'depth': 8, 'l2_leaf_reg': 0.06159873262451665, 'subsample': 0.8949748444406902, 'colsample_bylevel': 0.5659451764473047}. Best is trial 0 with value: 0.7722735156986319.
[I 2025-11-28 17:06:41,544] Trial 2 finished with value: 0.7690216716943276 and parameters: {'iterations': 580, 'learning_rate': 0.022001066456294845, 'depth': 8, 'l2_leaf_reg': 0.00013601821068960908, 'subsample': 0.9546250218040291, 'colsample_bylevel': 0.7025803324412201}. Best is trial 0 with value: 0.7722735156986319.
[I 2025-11-28 17:06:54,563] Trial 


Seed 999 | Outer fold 1
  F1=0.6537, Prec=0.5908, Rec=0.7316, Acc=0.6996, ROC AUC=0.7779
  Params: iterations=830, depth=7, lr=0.0208, l2_leaf_reg=0.1113, subsample=0.550, colsample_bylevel=0.664


[I 2025-11-28 17:19:08,882] Trial 0 finished with value: 0.773725931359237 and parameters: {'iterations': 1225, 'learning_rate': 0.07874891104269156, 'depth': 4, 'l2_leaf_reg': 0.03620143424647891, 'subsample': 0.54546263138704, 'colsample_bylevel': 0.6661128403965062}. Best is trial 0 with value: 0.773725931359237.
[I 2025-11-28 17:19:21,119] Trial 1 finished with value: 0.7540088093764208 and parameters: {'iterations': 698, 'learning_rate': 0.08747518429707113, 'depth': 8, 'l2_leaf_reg': 0.06159873262451665, 'subsample': 0.8949748444406902, 'colsample_bylevel': 0.5659451764473047}. Best is trial 0 with value: 0.773725931359237.
[I 2025-11-28 17:19:31,665] Trial 2 finished with value: 0.7664115204392394 and parameters: {'iterations': 580, 'learning_rate': 0.022001066456294845, 'depth': 8, 'l2_leaf_reg': 0.00013601821068960908, 'subsample': 0.9546250218040291, 'colsample_bylevel': 0.7025803324412201}. Best is trial 0 with value: 0.773725931359237.
[I 2025-11-28 17:19:44,511] Trial 3 fi


Seed 999 | Outer fold 2
  F1=0.6505, Prec=0.5924, Rec=0.7212, Acc=0.6996, ROC AUC=0.7837
  Params: iterations=1156, depth=6, lr=0.0208, l2_leaf_reg=0.4105, subsample=0.594, colsample_bylevel=0.632


[I 2025-11-28 17:32:09,506] Trial 0 finished with value: 0.7745591418582728 and parameters: {'iterations': 1225, 'learning_rate': 0.07874891104269156, 'depth': 4, 'l2_leaf_reg': 0.03620143424647891, 'subsample': 0.54546263138704, 'colsample_bylevel': 0.6661128403965062}. Best is trial 0 with value: 0.7745591418582728.
[I 2025-11-28 17:32:21,863] Trial 1 finished with value: 0.752591575006479 and parameters: {'iterations': 698, 'learning_rate': 0.08747518429707113, 'depth': 8, 'l2_leaf_reg': 0.06159873262451665, 'subsample': 0.8949748444406902, 'colsample_bylevel': 0.5659451764473047}. Best is trial 0 with value: 0.7745591418582728.
[I 2025-11-28 17:32:32,399] Trial 2 finished with value: 0.7682370152799676 and parameters: {'iterations': 580, 'learning_rate': 0.022001066456294845, 'depth': 8, 'l2_leaf_reg': 0.00013601821068960908, 'subsample': 0.9546250218040291, 'colsample_bylevel': 0.7025803324412201}. Best is trial 0 with value: 0.7745591418582728.
[I 2025-11-28 17:32:45,314] Trial 3


Seed 999 | Outer fold 3
  F1=0.6453, Prec=0.5940, Rec=0.7064, Acc=0.6992, ROC AUC=0.7755
  Params: iterations=1371, depth=7, lr=0.0170, l2_leaf_reg=0.2166, subsample=0.621, colsample_bylevel=0.922


[I 2025-11-28 17:44:47,796] Trial 0 finished with value: 0.7718235824773313 and parameters: {'iterations': 1225, 'learning_rate': 0.07874891104269156, 'depth': 4, 'l2_leaf_reg': 0.03620143424647891, 'subsample': 0.54546263138704, 'colsample_bylevel': 0.6661128403965062}. Best is trial 0 with value: 0.7718235824773313.
[I 2025-11-28 17:45:00,287] Trial 1 finished with value: 0.7516283619793267 and parameters: {'iterations': 698, 'learning_rate': 0.08747518429707113, 'depth': 8, 'l2_leaf_reg': 0.06159873262451665, 'subsample': 0.8949748444406902, 'colsample_bylevel': 0.5659451764473047}. Best is trial 0 with value: 0.7718235824773313.
[I 2025-11-28 17:45:10,996] Trial 2 finished with value: 0.7652179513527457 and parameters: {'iterations': 580, 'learning_rate': 0.022001066456294845, 'depth': 8, 'l2_leaf_reg': 0.00013601821068960908, 'subsample': 0.9546250218040291, 'colsample_bylevel': 0.7025803324412201}. Best is trial 0 with value: 0.7718235824773313.
[I 2025-11-28 17:45:23,923] Trial 


Seed 999 | Outer fold 4
  F1=0.6497, Prec=0.6026, Rec=0.7046, Acc=0.7054, ROC AUC=0.7788
  Params: iterations=1371, depth=7, lr=0.0170, l2_leaf_reg=0.2166, subsample=0.621, colsample_bylevel=0.922


[I 2025-11-28 17:57:57,672] Trial 0 finished with value: 0.7728015072622746 and parameters: {'iterations': 1225, 'learning_rate': 0.07874891104269156, 'depth': 4, 'l2_leaf_reg': 0.03620143424647891, 'subsample': 0.54546263138704, 'colsample_bylevel': 0.6661128403965062}. Best is trial 0 with value: 0.7728015072622746.
[I 2025-11-28 17:58:10,009] Trial 1 finished with value: 0.7533617927605586 and parameters: {'iterations': 698, 'learning_rate': 0.08747518429707113, 'depth': 8, 'l2_leaf_reg': 0.06159873262451665, 'subsample': 0.8949748444406902, 'colsample_bylevel': 0.5659451764473047}. Best is trial 0 with value: 0.7728015072622746.
[I 2025-11-28 17:58:20,711] Trial 2 finished with value: 0.7673589516407248 and parameters: {'iterations': 580, 'learning_rate': 0.022001066456294845, 'depth': 8, 'l2_leaf_reg': 0.00013601821068960908, 'subsample': 0.9546250218040291, 'colsample_bylevel': 0.7025803324412201}. Best is trial 0 with value: 0.7728015072622746.
[I 2025-11-28 17:58:33,459] Trial 


Seed 999 | Outer fold 5
  F1=0.6478, Prec=0.5970, Rec=0.7080, Acc=0.7016, ROC AUC=0.7811
  Params: iterations=1214, depth=6, lr=0.0244, l2_leaf_reg=0.5213, subsample=0.550, colsample_bylevel=0.801

--- Summary for seed 999 (CATBOOST_OHE, NO TRUST, ROC AUC) ---
  F1:        mean=0.6494, std=0.0028
  Precision: mean=0.5954, std=0.0042
  Recall:    mean=0.7144, std=0.0104
  Accuracy:  mean=0.7011, std=0.0023
  ROC AUC:   mean=0.7794, std=0.0028

===== GLOBAL SUMMARY – CATBOOST_OHE, NO TRUST, 5 SEEDS x 5 OUTER FOLDS (ROC AUC) =====
Global F1:        mean=0.6490, std=0.0049
Global Precision: mean=0.5940, std=0.0057
Global Recall:    mean=0.7153, std=0.0074
Global Accuracy:  mean=0.7001, std=0.0047
Global ROC AUC:   mean=0.7795, std=0.0045

===== BEST HYPERPARAMETERS ACROSS ALL SEEDS AND FOLDS (by ROC AUC) – CATBOOST_OHE =====
Best ROC AUC: 0.7858
Best hyperparameters:
iterations: 1018
learning_rate: 0.04850224175248138
depth: 4
l2_leaf_reg: 0.37874196345953415
subsample: 0.7682694233957235