In [None]:
# Модели для классификации
models = {
    'LogisticRegression': LogisticRegression(random_state=42, max_iter=1000),
    'RidgeClassifier': RidgeClassifier(random_state=42),
    'RandomForest': RandomForestClassifier(random_state=42),
    'GradientBoosting': GradientBoostingClassifier(random_state=42),
    'CatBoost': CatBoostClassifier(random_state=42, verbose=0, allow_writing_files=False)
}

In [None]:
def get_model_params(trial, model_name):
    if model_name == 'RidgeClassifier':
        return {
            'alpha': trial.suggest_float('alpha', 0.01, 100.0, log=True),
            'solver': trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg'])
        }
    elif model_name == 'RandomForest':
        return {
            'n_estimators': trial.suggest_int('n_estimators', 50, 300),
            'max_depth': trial.suggest_int('max_depth', 3, 15),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 5),
            'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None])
        }
    elif model_name == 'GradientBoosting':
        return {
            'n_estimators': trial.suggest_int('n_estimators', 50, 300),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10)
        }
    elif model_name == 'CatBoost':
        return {
            'iterations': trial.suggest_int('iterations', 100, 500),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'depth': trial.suggest_int('depth', 3, 10),
            'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
            'border_count': trial.suggest_int('border_count', 32, 255),
            'bagging_temperature': trial.suggest_float('bagging_temperature', 0, 1)
        }
    else:  
        return {}

In [None]:
metrics = {
    'Accuracy': accuracy_score,
    'Precision': lambda y_true, y_pred: precision_score(y_true, y_pred, average='weighted'),
    'Recall': lambda y_true, y_pred: recall_score(y_true, y_pred, average='weighted'),
    'F1': lambda y_true, y_pred: f1_score(y_true, y_pred, average='weighted')
}

In [None]:
best_models = {}
best_scores = {}
pca_versions = {
    'std': X_train_pca_std,
    'minmax': X_train_pca_minmax, 
    'robust': X_train_pca_robust
}

for model_name in models.keys():
    model_best_score = 0
    model_best_version = None
    model_best_params = None
    model_best_instance = None
    
    print(f"\nПодбор гиперпараметров для {model_name}")
    
    for version_name, X_train_data in pca_versions.items():
        print(f"Версия данных: {version_name}")
        
        def objective(trial):
            params = get_model_params(trial, model_name)
            model = clone(models[model_name])
            model.set_params(**params)
            
            kf = KFold(n_splits=2, shuffle=True, random_state=42)
            scores = cross_val_score(model, X_train_data, y_train, cv=kf, scoring='accuracy')
            return scores.mean()
        
        study = optuna.create_study(direction='maximize')
        study.optimize(objective, n_trials=3, show_progress_bar=True)
        
        current_accuracy = study.best_value
        
        print(f"Лучшие параметры: {study.best_params}")
        print(f"Лучшая Accuracy: {current_accuracy:.4f}")
        
        if current_accuracy > model_best_score:
            model_best_score = current_accuracy
            model_best_version = version_name
            model_best_params = study.best_params
            
            final_model = clone(models[model_name])
            final_model.set_params(**study.best_params)
            final_model.fit(X_train_data, y_train)
            model_best_instance = final_model
    
    print(f"Лучшая версия для {model_name}: {model_best_version} с Accuracy: {model_best_score:.4f}")
    
    best_models[model_name] = {
        'model': model_best_instance,
        'version': model_best_version,
        'params': model_best_params
    }
    best_scores[model_name] = model_best_score

In [None]:
pca_data_mapping = {
    'std': (X_train_pca_std, X_test_pca_std),
    'minmax': (X_train_pca_minmax, X_test_pca_minmax),
    'robust': (X_train_pca_robust, X_test_pca_robust)
}

results = []

for model_name, model_info in best_models.items():
    model = model_info['model']
    best_version = model_info['version']
    
    X_train_data, X_test_data = pca_data_mapping[best_version]
    
    y_train_pred = model.predict(X_train_data)
    y_test_pred = model.predict(X_test_data)
    
    train_metrics = {
        'Model': model_name,
        'Dataset': 'Train',
        'PCA_Version': best_version,
        'Accuracy': accuracy_score(y_train, y_train_pred),
        'Precision': precision_score(y_train, y_train_pred, average='weighted'),
        'Recall': recall_score(y_train, y_train_pred, average='weighted'),
        'F1': f1_score(y_train, y_train_pred, average='weighted')
    }
    
    test_metrics = {
        'Model': model_name,
        'Dataset': 'Test',
        'PCA_Version': best_version,
        'Accuracy': accuracy_score(y_test, y_test_pred),
        'Precision': precision_score(y_test, y_test_pred, average='weighted'),
        'Recall': recall_score(y_test, y_test_pred, average='weighted'),
        'F1': f1_score(y_test, y_test_pred, average='weighted')
    }
    
    results.append(train_metrics)
    results.append(test_metrics)

results_df = pd.DataFrame(results)

print("\nТаблица метрик лучших версий PCA по моделям:")
print(f"{'Модель':<20} {'Версия PCA':<12} {'Выборка':<8} {'Accuracy':<10} {'Precision':<10} {'Recall':<10} {'F1':<10}")

results_sorted = results_df.sort_values(['Model', 'Dataset'])

for idx, row in results_sorted.iterrows():
    print(f"{row['Model']:<20} {row['PCA_Version']:<12} {row['Dataset']:<8} "
          f"{row['Accuracy']:9.4f}  {row['Precision']:9.4f}  {row['Recall']:9.4f}  {row['F1']:9.4f}")

test_results = results_df[results_df['Dataset'] == 'Test']
overall_best = test_results.loc[test_results['Accuracy'].idxmax()]

print("\nЛучшая модель на тестовой выборке:")
print(f"Модель: {overall_best['Model']}")
print(f"Версия PCA: {overall_best['PCA_Version']}")
print(f"Accuracy: {overall_best['Accuracy']:.4f}")
print(f"Precision: {overall_best['Precision']:.4f}")
print(f"Recall: {overall_best['Recall']:.4f}")
print(f"F1: {overall_best['F1']:.4f}")