In [4]:
import numpy as np
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report

def train_best_model(X):
    """
    Обучает и выбирает лучшую модель на основе кросс-валидации с учетом групп
    Возвращает обученную модель с наивысшей точностью
    
    Параметры:
    X - матрица признаков (n_samples, n_features)
    y - вектор меток (n_samples,)
    groups - вектор групп для кросс-валидации (n_samples,)
    """

    y = np.array([0]*17 + [1]*17)
    groups = np.repeat(np.arange(17), 2)
    models = {
        'Logistic Regression (L1)': make_pipeline(
            StandardScaler(),
            LogisticRegression(penalty='l1', solver='saga', max_iter=10000, random_state=42)
        ),
        'Linear SVM': make_pipeline(
            StandardScaler(),
            SVC(kernel='linear', random_state=42)
        ),
        'Random Forest': RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42),
        'Gradient Boosting': GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=2, random_state=42),
        'SVM (RBF)': make_pipeline(
            StandardScaler(),
            SVC(kernel='rbf', random_state=42)
        ),
        'MLP': make_pipeline(
            StandardScaler(),
            MLPClassifier(hidden_layer_sizes=(50,), alpha=0.01, max_iter=1000, random_state=42)
        )
    }

    logo = LeaveOneGroupOut()
    best_score = 0
    best_model = None
    best_model_name = ''

    # Перебор моделей и выбор лучшей
    for model_name, model in models.items():
        scores = []
        for train_idx, test_idx in logo.split(X, y, groups):
            X_train, X_test = X[train_idx], X[test_idx]
            y_train, y_test = y[train_idx], y[test_idx]
            
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            scores.append(accuracy_score(y_test, y_pred))
        
        mean_score = np.mean(scores)
        print(f'{model_name} - Средняя точность: {mean_score:.3f}')
        
        if mean_score > best_score:
            best_score = mean_score
            best_model = model
            best_model_name = model_name

    # Финалное обучение лучшей модели на всех данных
    print(f'\nЛучшая модель: {best_model_name} с точностью {best_score:.3f}')
    best_model.fit(X, y)
    
    return models['SVM (RBF)']

# Пример использования:
# X, y, groups = ... # Ваши данные
# best_model = train_best_model(X, y, groups)

In [5]:
test_path = '/home/aaanpilov/diploma/project/numpy_matrixes/average_matrix/test/'
train_path = '/home/aaanpilov/diploma/project/numpy_matrixes/average_matrix/HC/'

In [6]:
options = ['auc', 'max', 'min', 'max_min']

for option in options:
    print(option)
    matrix = np.load(train_path + option + '.npy')
    matrix_test = np.load(test_path + option + '.npy')
    model = train_best_model(matrix)

    n_test = matrix_test.shape[0]    
    labels_test = np.array([0] * (n_test//2) + [1] * (n_test//2))
    print(classification_report(labels_test, model.predict(matrix_test)))
    print(model.predict(matrix_test))
    break

auc
Logistic Regression (L1) - Средняя точность: 0.971
Linear SVM - Средняя точность: 0.941
Random Forest - Средняя точность: 0.882
Gradient Boosting - Средняя точность: 0.912
SVM (RBF) - Средняя точность: 0.971
MLP - Средняя точность: 0.882

Лучшая модель: Logistic Regression (L1) с точностью 0.971
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        10

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

[0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]
