In [None]:
# версия MLP
# ─────────────────────────────────────────────────────────────────

# 1. библиотеки
!pip install openpyxl

import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, roc_auc_score

main = pd.read_excel('общая таблица.xlsx')
educ = pd.read_excel('Образование соискателя.xlsx')
exp  = pd.read_excel('experience_data_finished.xlsx')

# 2) предобработка

# образование
data = main.merge(
    educ[['Идентификатор отклика', 'year']],
    on='Идентификатор отклика',
    how='left'
)
data['year'] = data['year'].fillna(data['year'].median())

# опыт из exp: рассчитываем длительности и агрегируем
exp['start'] = pd.to_datetime(exp['start'])
exp['end'] = pd.to_datetime(exp['end'], errors='coerce').fillna(pd.Timestamp.today())
exp['duration_months'] = (exp['end'] - exp['start']).dt.days / 30.0

exp_agg = exp.groupby('Идентификатор отклика').agg(
    num_positions      = ('position',    'nunique'),
    avg_duration       = ('duration_months', 'mean'),
    max_duration       = ('duration_months', 'max'),
    total_desc_length  = ('description', lambda x: x.str.len().sum())
).reset_index()

data = data.merge(exp_agg, on='Идентификатор отклика', how='left')
for c in ['num_positions','avg_duration','max_duration','total_desc_length']:
    data[c] = data[c].fillna(0)

# целевая переменная
data['target'] = (data['Текущее состояние отклика.Название статуса'] == 'Приглашение').astype(int)

# 3. формирование признаков
features = [
    'Общий опыт работы.months',
    'year',
    'num_positions',
    'avg_duration',
    'max_duration',
    'total_desc_length'
]
X = data[features]
y = data['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    stratify=y,
    test_size=0.2,
    random_state=42
)

# 4. Grid Search
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp',    MLPClassifier(max_iter=500, random_state=42))
])

param_grid = {
    'mlp__hidden_layer_sizes': [(64,32), (32,16), (32,)],
    'mlp__alpha':             [1e-4, 1e-3, 1e-2],
    'mlp__learning_rate_init': [1e-3, 1e-2],
    'mlp__activation':        ['relu','tanh']
}

grid = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=2
)
grid.fit(X_train, y_train)

print("Лучшие параметры:", grid.best_params_)

# 5. оценка финальной модели
best_model = grid.best_estimator_

y_pred  = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:,1]

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Не приглашён','Приглашён']))

auc = roc_auc_score(y_test, y_proba)
print(f"\nFinal ROC AUC: {auc:.3f}")

Fitting 5 folds for each of 36 candidates, totalling 180 fits
Лучшие параметры: {'mlp__activation': 'relu', 'mlp__alpha': 0.01, 'mlp__hidden_layer_sizes': (32, 16), 'mlp__learning_rate_init': 0.001}

Classification Report:
              precision    recall  f1-score   support

Не приглашён       0.69      0.13      0.21      1297
   Приглашён       0.69      0.97      0.81      2608

    accuracy                           0.69      3905
   macro avg       0.69      0.55      0.51      3905
weighted avg       0.69      0.69      0.61      3905


Final ROC AUC: 0.617


In [None]:
# эмбеддинги + стекинг LogisticRegression + MLP
# ─────────────────────────────────────────────────────────────────

# 1. библиотеки
!pip install --quiet openpyxl sentence-transformers scikit-learn

import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import roc_auc_score, classification_report, precision_recall_curve

from sentence_transformers import SentenceTransformer

# 2. загрузка данных
main = pd.read_excel('общая таблица.xlsx')
educ = pd.read_excel('Образование соискателя.xlsx')
exp  = pd.read_excel('experience_data_finished.xlsx')

# 3. предобработка и таргет
df = main.merge(
    educ[['Идентификатор отклика','year']],
    on='Идентификатор отклика', how='left'
)
df['year'] = df['year'].fillna(df['year'].median())
df['target'] = (df['Текущее состояние отклика.Название статуса']=='Приглашение').astype(int)

# 4. аггрегация признаков по опыту
exp['start'] = pd.to_datetime(exp['start'])
exp['end']   = pd.to_datetime(exp['end'], errors='coerce').fillna(pd.Timestamp.today())
exp['dur_mo'] = (exp['end'] - exp['start']).dt.days / 30.0

dur_agg = exp.groupby('Идентификатор отклика')['dur_mo'].agg(
    dur_count='count',
    dur_mean='mean',
    dur_max='max'
).reset_index().fillna(0)

df = df.merge(dur_agg, on='Идентификатор отклика', how='left')
df[['dur_count','dur_mean','dur_max']] = df[['dur_count','dur_mean','dur_max']].fillna(0)

# 5. текстовые эмбеддинги (CPU)
embedder = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
job_texts = df['Описание вакансии'].fillna('').tolist()
exp_texts = exp.groupby('Идентификатор отклика')['description'] \
    .apply(lambda x: ' '.join(x.fillna(''))) \
    .reindex(df['Идентификатор отклика']).fillna('').tolist()

job_emb = embedder.encode(job_texts, show_progress_bar=True)
exp_emb = embedder.encode(exp_texts, show_progress_bar=True)

svd = TruncatedSVD(n_components=50, random_state=42)
emb_combined = np.hstack([job_emb, exp_emb])
emb_reduced = svd.fit_transform(emb_combined)

X_tab = df[['Общий опыт работы.months','year','dur_count','dur_mean','dur_max']].values
X = np.hstack([X_tab, emb_reduced])
y = df['target'].values

# 6. разбиение на train/test
idx = np.arange(len(y))
train_idx, test_idx = train_test_split(idx, stratify=y, test_size=0.2, random_state=42)
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]

# 7. стекинг моделей
base_models = [
    ('lr', LogisticRegression(max_iter=500, class_weight='balanced')),
    ('mlp', MLPClassifier(
        hidden_layer_sizes=(64,32),н
        max_iter=300,
        early_stopping=True,
        tol=1e-4,
        random_state=42
    ))
]

stack = StackingClassifier(
    estimators=base_models,
    final_estimator=LogisticRegression(max_iter=500, class_weight='balanced'),
    cv=5,
    n_jobs=-1,
    passthrough=True
)
stack.fit(X_train, y_train)

# 8. оценка качества
y_proba = stack.predict_proba(X_test)[:,1]
print(f"Test ROC AUC: {roc_auc_score(y_test, y_proba):.3f}")

# 9. подбор оптимального порога по F1
prec, rec, thr = precision_recall_curve(y_test, y_proba)
f1 = 2 * prec * rec / (prec + rec + 1e-8)
best_i = np.nanargmax(f1)
opt_thresh = thr[best_i]
print(f"Optimal threshold (F1): {opt_thresh:.2f}")

y_pred = (y_proba >= opt_thresh).astype(int)
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Не приглашён','Приглашён']))

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/611 [00:00<?, ?it/s]

Batches:   0%|          | 0/611 [00:00<?, ?it/s]

Test ROC AUC: 0.777
Optimal threshold (F1): 0.26

Classification Report:
              precision    recall  f1-score   support

Не приглашён       0.69      0.44      0.54      1297
   Приглашён       0.76      0.90      0.83      2608

    accuracy                           0.75      3905
   macro avg       0.73      0.67      0.68      3905
weighted avg       0.74      0.75      0.73      3905



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
# библиотеки
!pip install --quiet openpyxl sentence-transformers scikit-learn lightgbm xgboost

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import roc_auc_score, classification_report, precision_recall_curve
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

try:
    import lightgbm as lgb
    LGBM_AVAILABLE = True
except ImportError:
    LGBM_AVAILABLE = False
    print("LightGBM not installed. Skipping LGBM models.")

try:
    import xgboost as xgb
    XGB_AVAILABLE = True
except ImportError:
    XGB_AVAILABLE = False
    print("XGBoost not installed. Skipping XGB models.")

# 2. загрузка данных
try:
    main_df = pd.read_excel('общая таблица.xlsx')
    educ_df = pd.read_excel('Образование соискателя.xlsx')
    exp_df = pd.read_excel('experience_data_finished.xlsx')
except FileNotFoundError as e:
    raise

# 3. предобработка и определение целевой переменной
df = main_df.merge(
    educ_df[['Идентификатор отклика','year']],
    on='Идентификатор отклика', how='left'
)
df['year'] = df['year'].fillna(df['year'].median())
df['target'] = (df['Текущее состояние отклика.Название статуса']=='Приглашение').astype(int)

# 4. агрегация признаков по опыту работы
exp_df['start'] = pd.to_datetime(exp_df['start'])
exp_df['end'] = pd.to_datetime(exp_df['end'], errors='coerce').fillna(pd.Timestamp.today())
exp_df['dur_mo'] = (exp_df['end'] - exp_df['start']).dt.days / 30.0

dur_agg = exp_df.groupby('Идентификатор отклика')['dur_mo'].agg(
    dur_count='count',
    dur_mean='mean',
    dur_max='max'
).reset_index().fillna(0)

df = df.merge(dur_agg, on='Идентификатор отклика', how='left')
df[['dur_count','dur_mean','dur_max']] = df[['dur_count','dur_mean','dur_max']].fillna(0)

# 5. эмбеддинги
embedder = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')

job_texts = df['Описание вакансии'].fillna('').tolist()
exp_texts_agg = exp_df.groupby('Идентификатор отклика')['description'] \
    .apply(lambda x: ' '.join(x.fillna(''))) \
    .reindex(df['Идентификатор отклика']).fillna('').tolist()

print("Генерация эмбеддингов для описаний вакансий...")
job_emb = embedder.encode(job_texts, show_progress_bar=True, convert_to_numpy=True)
print("Генерация эмбеддингов для описаний опыта работы кандидатов...")
exp_emb = embedder.encode(exp_texts_agg, show_progress_bar=True, convert_to_numpy=True)

# косинусное сходство между эмбеддингами вакансии и опыта
if len(job_emb) == len(exp_emb):
    cos_sim = np.array([cosine_similarity(job_emb[i].reshape(1, -1), exp_emb[i].reshape(1, -1))[0][0] for i in range(len(job_emb))])
    cos_sim = cos_sim.reshape(-1, 1)
else:
    cos_sim = np.zeros((len(df), 1))


svd = TruncatedSVD(n_components=50, random_state=42)
emb_combined = np.hstack([job_emb, exp_emb])
emb_reduced = svd.fit_transform(emb_combined)

# подготовка табличных признаков
X_tab = df[['Общий опыт работы.months','year','dur_count','dur_mean','dur_max']].values
X_tab = np.hstack([X_tab, cos_sim])

X = np.hstack([X_tab, emb_reduced])
y = df['target'].values

# 6. обучающая и тестовую выборки
idx = np.arange(len(y))
train_idx, test_idx = train_test_split(idx, stratify=y, test_size=0.2, random_state=42)
X_train, X_test = X[train_idx], X[test_idx]
y_train, y_test = y[train_idx], y[test_idx]

print(f"Размерность обучающей выборки X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"Размерность тестовой выборки X_test: {X_test.shape}, y_test: {y_test.shape}")
print(f"Пропорции классов в y_train: {np.bincount(y_train) / len(y_train)}")
print(f"Пропорции классов в y_test: {np.bincount(y_test) / len(y_test)}")


# 7. сравнительный анализ различных конфигураций Stacking Classifier
print("\n--- Сравнительный анализ различных конфигураций Stacking Classifier ---")

base_model_options = {
    'lr': LogisticRegression(max_iter=500, class_weight='balanced', random_state=42),
    'mlp': MLPClassifier(
        hidden_layer_sizes=(64,32),
        max_iter=300,
        early_stopping=True,
        tol=1e-4,
        random_state=42,
        solver='adam'
    ),
    'rf': RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        class_weight='balanced', # Учитываем дисбаланс
        n_jobs=-1
    ),
    'gb': GradientBoostingClassifier(
        n_estimators=100,
        random_state=42
    )
}

if LGBM_AVAILABLE:
    base_model_options['lgbm'] = lgb.LGBMClassifier(
        random_state=42,
        class_weight='balanced'
    )
if XGB_AVAILABLE:
    scale_pos_weight_val = (len(y_train) - y_train.sum()) / y_train.sum()
    base_model_options['xgb'] = xgb.XGBClassifier(
        random_state=42,
        use_label_encoder=False,
        eval_metric='logloss',
        scale_pos_weight=scale_pos_weight_val,
        n_jobs=-1
    )


# определение различных комбинаций базовых моделей для StackingClassifier
stacking_configs = {
    'LR + MLP': [('lr', base_model_options['lr']), ('mlp', base_model_options['mlp'])],
    'LR + RF': [('lr', base_model_options['lr']), ('rf', base_model_options['rf'])],
    'LR + GB': [('lr', base_model_options['lr']), ('gb', base_model_options['gb'])],
    'MLP + RF': [('mlp', base_model_options['mlp']), ('rf', base_model_options['rf'])],
    'MLP + GB': [('mlp', base_model_options['mlp']), ('gb', base_model_options['gb'])],
    'RF + GB': [('rf', base_model_options['rf']), ('gb', base_model_options['gb'])],
    'LR + MLP + RF': [('lr', base_model_options['lr']), ('mlp', base_model_options['mlp']), ('rf', base_model_options['rf'])],
    'LR + MLP + GB': [('lr', base_model_options['lr']), ('mlp', base_model_options['mlp']), ('gb', base_model_options['gb'])]
}

if LGBM_AVAILABLE and XGB_AVAILABLE:
    stacking_configs['LR + MLP + RF + GB + LGBM + XGB'] = [
        ('lr', base_model_options['lr']),
        ('mlp', base_model_options['mlp']),
        ('rf', base_model_options['rf']),
        ('gb', base_model_options['gb']),
        ('lgbm', base_model_options['lgbm']),
        ('xgb', base_model_options['xgb'])
    ]
elif LGBM_AVAILABLE:
    stacking_configs['LR + MLP + RF + GB + LGBM'] = [
        ('lr', base_model_options['lr']), ('mlp', base_model_options['mlp']),
        ('rf', base_model_options['rf']), ('gb', base_model_options['gb']),
        ('lgbm', base_model_options['lgbm'])
    ]
elif XGB_AVAILABLE:
    stacking_configs['LR + MLP + RF + GB + XGB'] = [
        ('lr', base_model_options['lr']), ('mlp', base_model_options['mlp']),
        ('rf', base_model_options['rf']), ('gb', base_model_options['gb']),
        ('xgb', base_model_options['xgb'])
    ]

# cловарь для хранения сводных результатов
results = {}
y_proba_collection = {}

final_estimator = LogisticRegression(max_iter=500, class_weight='balanced', random_state=42)

for name, base_estimators in stacking_configs.items():
    print(f"\nОбучение Stacking Classifier с базовыми моделями: {name}")
    stack = StackingClassifier(
        estimators=base_estimators,
        final_estimator=final_estimator,
        cv=5,
        n_jobs=-1,
        passthrough=True
    )

    # обучение модели
    stack.fit(X_train, y_train)

    y_proba = stack.predict_proba(X_test)[:, 1]
    y_proba_collection[name] = y_proba

    # расчет ROC AUC
    roc_auc = roc_auc_score(y_test, y_proba)

    prec_pos, rec_pos, thr_pos = precision_recall_curve(y_test, y_proba)
    f1_pos = 2 * prec_pos * rec_pos / (prec_pos + rec_pos + 1e-8)
    best_i_pos = np.nanargmax(f1_pos)
    opt_thresh_f1_pos = thr_pos[best_i_pos]

    y_pred_at_f1_pos_thresh = (y_proba >= opt_thresh_f1_pos).astype(int)

    # получение полного отчета о классификации
    report_at_f1_pos_thresh = classification_report(y_test, y_pred_at_f1_pos_thresh,
                                                  target_names=['Не приглашён','Приглашён'],
                                                  output_dict=True)
    f1_score_positive_class = report_at_f1_pos_thresh['Приглашён']['f1-score']

    results[name] = {
        'ROC AUC': roc_auc,
        'Оптимальный Порог (F1 Приглашён)': opt_thresh_f1_pos,
        'F1-мера (Приглашён) при этом пороге': f1_score_positive_class
    }

    print(f"Test ROC AUC: {roc_auc:.3f}")
    print(f"Оптимальный порог (F1 Приглашён): {opt_thresh_f1_pos:.2f}")
    print(f"F1-мера (Приглашён): {f1_score_positive_class:.3f}")

print("\n--- Сводка производительности Stacking Classifier ---")
results_df = pd.DataFrame.from_dict(results, orient='index')
print(results_df.sort_values(by='ROC AUC', ascending=False).to_markdown(numalign="left", stralign="left"))

Все необходимые файлы доступны.
Генерация эмбеддингов для описаний вакансий...


Batches:   0%|          | 0/611 [00:00<?, ?it/s]

Генерация эмбеддингов для описаний опыта работы кандидатов...


Batches:   0%|          | 0/611 [00:00<?, ?it/s]

Добавлена фича: Косинусное сходство между описанием вакансии и опытом.
Размерность обучающей выборки X_train: (15617, 56), y_train: (15617,)
Размерность тестовой выборки X_test: (3905, 56), y_test: (3905,)
Пропорции классов в y_train: [0.33207402 0.66792598]
Пропорции классов в y_test: [0.33213828 0.66786172]

--- Сравнительный анализ различных конфигураций Stacking Classifier ---

Обучение Stacking Classifier с базовыми моделями: LR + MLP


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test ROC AUC: 0.776
Оптимальный порог (F1 Приглашён): 0.23
F1-мера (Приглашён): 0.825

Обучение Stacking Classifier с базовыми моделями: LR + RF


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test ROC AUC: 0.817
Оптимальный порог (F1 Приглашён): 0.30
F1-мера (Приглашён): 0.844

Обучение Stacking Classifier с базовыми моделями: LR + GB


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test ROC AUC: 0.833
Оптимальный порог (F1 Приглашён): 0.31
F1-мера (Приглашён): 0.852

Обучение Stacking Classifier с базовыми моделями: MLP + RF


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test ROC AUC: 0.818
Оптимальный порог (F1 Приглашён): 0.26
F1-мера (Приглашён): 0.846

Обучение Stacking Classifier с базовыми моделями: MLP + GB


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test ROC AUC: 0.834
Оптимальный порог (F1 Приглашён): 0.29
F1-мера (Приглашён): 0.851

Обучение Stacking Classifier с базовыми моделями: RF + GB


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test ROC AUC: 0.836
Оптимальный порог (F1 Приглашён): 0.32
F1-мера (Приглашён): 0.853

Обучение Stacking Classifier с базовыми моделями: LR + MLP + RF


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test ROC AUC: 0.819
Оптимальный порог (F1 Приглашён): 0.28
F1-мера (Приглашён): 0.844

Обучение Stacking Classifier с базовыми моделями: LR + MLP + GB


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test ROC AUC: 0.833
Оптимальный порог (F1 Приглашён): 0.31
F1-мера (Приглашён): 0.851

Обучение Stacking Classifier с базовыми моделями: LR + MLP + RF + GB + LGBM + XGB


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Test ROC AUC: 0.838
Оптимальный порог (F1 Приглашён): 0.23
F1-мера (Приглашён): 0.853

--- Сводка производительности Stacking Classifier ---
|                                 | ROC AUC   | Оптимальный Порог (F1 Приглашён)   | F1-мера (Приглашён) при этом пороге   |
|:--------------------------------|:----------|:-----------------------------------|:--------------------------------------|
| LR + MLP + RF + GB + LGBM + XGB | 0.837905  | 0.233345                           | 0.852813                              |
| RF + GB                         | 0.836089  | 0.324209                           | 0.853082                              |
| MLP + GB                        | 0.833938  | 0.289335                           | 0.850651                              |
| LR + GB                         | 0.833399  | 0.313238                           | 0.851763                              |
| LR + MLP + GB                   | 0.832738  | 0.307638                           | 0.851009                