In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import recall_score, classification_report
from sklearn.model_selection import GroupShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GroupShuffleSplit
import numpy as np
from fmri_processing.utils import draw_heat_map
from fmri_processing.functions import funcs
import os

In [19]:
# train_matrix  = '/home/aaanpilov/diploma/project/numpy_matrixes/average_stimulus/HC/max.npy'
# test_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/average_stimulus/test/max.npy'

# train_matrix, test_matrix = test_matrix, train_matrix
def draw_all_subjects(matrix):
    N = matrix.shape[0]  # Длина массива
    sub_num = N // 5

    subjects = np.array_split(matrix, sub_num)
    for idx, sub in enumerate(subjects):
        print(f'sub-{idx:02d}')
        draw_heat_map(subjects[idx])

In [20]:
import numpy as np

def z_score_matrix(matrix):
    shape = matrix.shape

    # 1. Разбиваем матрицу на группы по 5 элементов
    flattened = matrix.flatten()  # Преобразуем в 1D-массив
    num_groups = len(flattened) // 5
    groups = flattened[:num_groups * 5].reshape(-1, 5)  # Группы по 5 элементов

    # 2. Вычисляем z-показатели для каждой группы
    z_scores = np.zeros_like(groups)
    for i in range(groups.shape[0]):
        group = groups[i]
        mean = np.mean(group)
        std = np.std(group)
        if std != 0:
            z_scores[i] = (group - mean) / std
        else:
            z_scores[i] = 0  # Если все элементы одинаковые

    # 3. Собираем обратно в матрицу
    flattened_z = z_scores.flatten()
    # Если исходная длина не делилась на 5, добавляем оставшиеся элементы без изменений
    if len(flattened) % 5 != 0:
        remaining = flattened[num_groups * 5:]
        flattened_z = np.concatenate([flattened_z, remaining])

    # Преобразуем обратно в исходную размерность
    result_matrix = flattened_z.reshape(shape)
    return result_matrix

In [21]:
def prepare_data(train_matrix):
    matrix = np.load(train_matrix)
    N = matrix.shape[0]  # Длина массива
    sub_num = N // 5    # Количество испытуемых

    labels = np.zeros(N, dtype=int)  # Создаем массив из нулей
    labels[3::5] = 1  # Каждый 4-й элемен
    print(matrix.shape)

    X = matrix
    y = labels


    # Группы для разделения
    groups = np.repeat(np.arange(sub_num), 5)  # [0,0,0,0,0, 1,1,1,1,1,...]

    splitter = GroupShuffleSplit(n_splits=1, test_size=0.3, random_state=30)
    train_idx, test_idx = next(splitter.split(X, y, groups))

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]


In [22]:
def train_best_model_by_recall_(train_matrix, target_class=1, test_size=0.3, random_state=42, verbose=True):
    """
    Обучает модели и возвращает лучшую по recall для указанного класса
    
    Параметры:
    X - признаки
    y - целевая переменная
    groups - группы для кросс-валидации
    target_class - класс, для которого оптимизируем recall (по умолчанию 1)
    test_size - доля тестовой выборки
    random_state - для воспроизводимости
    verbose - вывод информации о процессе обучения
    """
    
    matrix = np.load(train_matrix)
    N = matrix.shape[0]  # Длина массива
    sub_num = N // 5    # Количество испытуемых

    labels = np.zeros(N, dtype=int)  # Создаем массив из нулей
    labels[3::5] = 1  # Каждый 4-й элемен
    print(matrix.shape)

    X = matrix
    y = labels


    # Группы для разделения
    groups = np.repeat(np.arange(sub_num), 5)  # [0,0,0,0,0, 1,1,1,1,1,...]

    splitter = GroupShuffleSplit(n_splits=1, test_size=0.3, random_state=30)
    train_idx, test_idx = next(splitter.split(X, y, groups))

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    
    
    # 1. Разделение на train/test с сохранением групп
    splitter = GroupShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state)
    train_idx, test_idx = next(splitter.split(X, y, groups))
    
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    groups_train = groups[train_idx]
    
    # 2. Инициализация моделей
    models = {
        "Logistic Regression": Pipeline([
            ('scaler', StandardScaler()),
            ('model', LogisticRegression(class_weight='balanced', 
                                       max_iter=1000, 
                                       random_state=random_state))
        ]),
        "Random Forest": RandomForestClassifier(class_weight='balanced_subsample', 
                                              random_state=random_state),
        "SVM": Pipeline([
            ('scaler', StandardScaler()),
            ('model', SVC(kernel='rbf', 
                         class_weight='balanced', 
                         probability=True, 
                         random_state=random_state))
        ]),
        "XGBoost": XGBClassifier(
            scale_pos_weight=4,  # Автоматический расчет
            random_state=random_state
        )
    }

    # 3. Кросс-валидация по группам
    gss = GroupShuffleSplit(n_splits=5, test_size=0.3, random_state=random_state)
    model_recalls = []

    for name, model in models.items():
        recall_scores = []
        
        for fold, (train_idx_fold, val_idx_fold) in enumerate(gss.split(X_train, y_train, groups_train)):
            model.fit(X_train[train_idx_fold], y_train[train_idx_fold])
            y_pred = model.predict(X_train[val_idx_fold])
            recall = recall_score(y_train[val_idx_fold], y_pred, pos_label=target_class)
            recall_scores.append(recall)
        
        print(recall_scores)
        mean_recall = np.mean(recall_scores)
        std_recall = np.std(recall_scores)
        model_recalls.append((name, mean_recall, std_recall, model))
        
        if verbose:
            print(f"{name:<20} | Recall (class {target_class}): {mean_recall:.3f} ± {std_recall:.3f}")

        model.fit(X_train, y_train)
    # # 4. Выбор лучшей модели
    # best_name, best_recall, best_std, best_model = max(model_recalls, key=lambda x: x[1])
    
    # # 5. Финальное обучение на полном train наборе
    # best_model.fit(X_train, y_train)
    
    # # 6. Оценка на тестовом наборе
    # test_recall = recall_score(y_test, best_model.predict(X_test), pos_label=target_class)
    
    # if verbose:
    #     print(f"\n{'='*50}")
    #     print(f"BEST MODEL: {best_name}")
    #     print(f"CV Recall (class {target_class}): {best_recall:.3f} ± {best_std:.3f}")
    #     print(f"Test Recall (class {target_class}): {test_recall:.3f}")
    #     print("="*50)

    return models

In [23]:
def train_best_model_by_recall(train_matrix, target_class=1, test_size=0.3, random_state=42, verbose=True):
    """
    Обучает модели и возвращает лучшую по recall для указанного класса
    
    Параметры:
    train_matrix - путь к файлу с матрицей признаков
    target_class - класс, для которого оптимизируем recall
    test_size - доля валидационной выборки в кросс-валидации
    random_state - для воспроизводимости
    verbose - вывод информации о процессе обучения
    
    Возвращает:
    Лучшую модель (по recall на валидации), метрики моделей
    """
    
    # 1. Загрузка данных и подготовка
    matrix = np.load(train_matrix)
    N = matrix.shape[0]
    sub_num = N // 5

    labels = np.zeros(N, dtype=int)
    labels[3::5] = 1
    
    X = matrix
    y = labels
    groups = np.repeat(np.arange(sub_num), 5)

    # 2. Инициализация моделей
    models = {
        "Logistic Regression": Pipeline([
            ('scaler', StandardScaler()),
            ('model', LogisticRegression(class_weight='balanced', 
                                       max_iter=1000, 
                                       random_state=random_state))
        ]),



    }
    # from sklearn.model_selection import RandomizedSearchCV
    # from scipy.stats import loguniform  # для логарифмического распределения C

    # param_dist = {
    #     'model__C': loguniform(1e-4, 100),  # C в диапазоне [0.0001, 100]
    #     'model__penalty': ['l1', 'l2', 'elasticnet', None],
    #     'model__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    #     'model__l1_ratio': [0, 0.25, 0.5, 0.75, 1]  # для elasticnet
    # }

    # random_search = RandomizedSearchCV(
    #     estimator=models['Logistic Regression'],
    #     param_distributions=param_dist,
    #     n_iter=50,  # количество случайных комбинаций
    #     scoring='accuracy',
    #     cv=5,
    #     n_jobs=-1,
    #     verbose=1,
    #     random_state=random_state
    # )

    # models['Logistic Regression'] = random_search

    # 3. Кросс-валидация по группам
    gss = GroupShuffleSplit(n_splits=5, test_size=test_size, random_state=random_state)
    model_recalls = []

    for name, model in models.items():
        recall_scores = []
        
        for train_idx, val_idx in gss.split(X, y, groups):
            model.fit(X[train_idx], y[train_idx])
            y_pred = model.predict(X[val_idx])
            recall = recall_score(y[val_idx], y_pred, pos_label=target_class)
            recall_scores.append(recall)
        
        mean_recall = np.mean(recall_scores)
        std_recall = np.std(recall_scores)
        model_recalls.append((name, mean_recall, std_recall, model))
        
        if verbose:
            print(f"{name:<20} | Recall (class {target_class}): {mean_recall:.3f} ± {std_recall:.3f}")
        model.fit(X, y)
    # # 4. Выбор лучшей модели
    # best_model_info = max(model_recalls, key=lambda x: x[1])
    # best_model = best_model_info[3]
    
    # # 5. Финальное обучение на всех данных
    # best_model.fit(X, y)
    
    return models

In [24]:
import numpy as np
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import recall_score

def train_best_model_by_recall(train_matrix, target_class=1, random_state=42, verbose=True):
    """
    Обучает модели и возвращает лучшую по recall для указанного класса
    
    Параметры:
    train_matrix - путь к файлу с матрицей признаков
    target_class - класс, для которого оптимизируем recall
    random_state - для воспроизводимости
    verbose - вывод информации о процессе обучения
    
    Возвращает:
    Лучшую модель (по recall на валидации), метрики моделей
    """
    
    # 1. Загрузка данных и подготовка
    matrix = np.load(train_matrix)
    N = matrix.shape[0]
    sub_num = N // 5

    labels = np.zeros(N, dtype=int)
    labels[3::5] = 1
    
    X = matrix
    y = labels
    groups = np.repeat(np.arange(sub_num), 5)

    # 2. Инициализация моделей
    models = {
        "Logistic Regression": Pipeline([
            ('scaler', StandardScaler()),
            ('model', LogisticRegression(class_weight='balanced', 
                                       max_iter=1000, 
                                       random_state=random_state))
        ]),
        "Random Forest": RandomForestClassifier(
            class_weight='balanced_subsample', 
            random_state=random_state
        ),
        "SVM": Pipeline([
            ('scaler', StandardScaler()),
            ('model', SVC(kernel='rbf', 
                         class_weight='balanced', 
                         probability=True, 
                         random_state=random_state))
        ]),
        "XGBoost": XGBClassifier(
            scale_pos_weight=(len(y) - sum(y)) / sum(y),
            random_state=random_state,
            eval_metric='logloss'
        )
    }

    # 3. Кросс-валидация с LeaveOneGroupOut
    logo = LeaveOneGroupOut()
    model_recalls = []

    for name, model in models.items():
        recall_scores = []
        
        for train_idx, val_idx in logo.split(X, y, groups=groups):
            model.fit(X[train_idx], y[train_idx])
            y_pred = model.predict(X[val_idx])
            recall = recall_score(y[val_idx], y_pred, pos_label=target_class)
            recall_scores.append(recall)
        
        mean_recall = np.mean(recall_scores)
        std_recall = np.std(recall_scores)
        model_recalls.append((name, mean_recall, std_recall, model))
        
        if verbose:
            print(f"{name:<20} | Recall (class {target_class}): {mean_recall:.3f} ± {std_recall:.3f}")

        model.fit(X,y)
    # # 4. Выбор и финальное обучение лучшей модели
    # best_model_info = max(model_recalls, key=lambda x: x[1])
    # best_model = best_model_info[3].fit(X, y)  # Обучаем на всех данных
    
    return models

In [25]:
def train_and_predict_on_test(train_matrix, test_matrix):
    models = train_best_model_by_recall(train_matrix)
    for name, model in models.items():
        print(f'Model: {name}')
        matrix_test = np.load(test_matrix)
        N_test = matrix_test.shape[0]  # Длина массива
        sub_num_test = N_test // 5

        labels_test = np.zeros(N_test, dtype=int)  # Создаем массив из нулей
        labels_test[3::5] = 1  # Каждый 4-й элемен
        print(classification_report(labels_test, model.predict(matrix_test)))

In [26]:
proportional_train_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/proportional/raw_HC/auc.npy'
proportional_test_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/proportional/raw_test/auc.npy'

reduced_train_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/reduced_ranks/auc_raw_HC.npy'
reduced_test_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/reduced_ranks/auc_test.npy'

ranks_train_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/raw_HC/auc.npy'
ranks_test_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/raw_test/auc.npy'

# train_matrix  = '/home/aaanpilov/diploma/project/numpy_matrixes/average_stimulus/HC/max.npy'
# test_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/average_stimulus/test/max.npy'

train_matrix = proportional_train_matrix
test_matrix = proportional_test_matrix 

# train_matrix = ranks_train_matrix
# test_matrix = ranks_test_matrix

# train_matrix = reduced_train_matrix
# test_matrix = reduced_test_matrix

train_matrix, test_matrix = test_matrix, train_matrix

In [27]:
def train_different_params(train_matrix_base, test_matrix_base):
    for func_name in funcs.keys():
        if func_name in ('max_min', 'min'):
            continue
        train_matrix = os.path.join(train_matrix_base, func_name + '.npy')
        test_matrix = os.path.join(test_matrix_base, func_name + '.npy')

        print('-'*10 + func_name + '-'*10)
        train_and_predict_on_test(train_matrix, test_matrix)
        print('-' * 100, sep='\n\n\n\n\n\n')

        train_matrix, test_matrix = test_matrix, train_matrix

        print('TEST AND TRAIN DATA REVERT' * 10)
        print('-'*10 + func_name + '-'*10)
        train_and_predict_on_test(train_matrix, test_matrix)
        print('-' * 100, sep='\n\n\n\n\n')

# Пропорциональные баллы

In [35]:
train_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/proportional/raw_hc_data'
test_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/proportional/raw_test_data'

schz_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/proportional/raw_schz_data'

cards_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/proportional/raw_card_hc_data'

# train_different_params(train_matrix_base, test_matrix_base)

train_different_params(train_matrix_base, cards_matrix)


----------auc----------
Logistic Regression  | Recall (class 1): 0.765 ± 0.424
Random Forest        | Recall (class 1): 0.235 ± 0.424
SVM                  | Recall (class 1): 0.353 ± 0.478
XGBoost              | Recall (class 1): 0.647 ± 0.478
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        68
           1       0.20      1.00      0.33        17

    accuracy                           0.20        85
   macro avg       0.10      0.50      0.17        85
weighted avg       0.04      0.20      0.07        85

Model: Random Forest
              precision    recall  f1-score   support

           0       0.80      1.00      0.89        68
           1       0.00      0.00      0.00        17

    accuracy                           0.80        85
   macro avg       0.40      0.50      0.44        85
weighted avg       0.64      0.80      0.71        85

Model: SVM
              precision    recall  f1-score

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Logistic Regression  | Recall (class 1): 0.412 ± 0.492
Random Forest        | Recall (class 1): 0.000 ± 0.000
SVM                  | Recall (class 1): 0.059 ± 0.235
XGBoost              | Recall (class 1): 0.294 ± 0.456
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.80      1.00      0.89        68
           1       0.00      0.00      0.00        17

    accuracy                           0.80        85
   macro avg       0.40      0.50      0.44        85
weighted avg       0.64      0.80      0.71        85

Model: Random Forest
              precision    recall  f1-score   support

           0       0.80      1.00      0.89        68
           1       0.00      0.00      0.00        17

    accuracy                           0.80        85
   macro avg       0.40      0.50      0.44        85
weighted avg       0.64      0.80      0.71        85

Model: SVM
              precision    recall  f1-score   support

           0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Logistic Regression  | Recall (class 1): 0.529 ± 0.499
Random Forest        | Recall (class 1): 0.353 ± 0.478
SVM                  | Recall (class 1): 0.529 ± 0.499
XGBoost              | Recall (class 1): 0.588 ± 0.492
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        68
           1       0.20      1.00      0.33        17

    accuracy                           0.20        85
   macro avg       0.10      0.50      0.17        85
weighted avg       0.04      0.20      0.07        85

Model: Random Forest
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        68
           1       0.20      1.00      0.33        17

    accuracy                           0.20        85
   macro avg       0.10      0.50      0.17        85
weighted avg       0.04      0.20      0.07        85

Model: SVM
              precision    recall  f1-score   support

           0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Logistic Regression  | Recall (class 1): 0.353 ± 0.478
Random Forest        | Recall (class 1): 0.000 ± 0.000
SVM                  | Recall (class 1): 0.118 ± 0.322
XGBoost              | Recall (class 1): 0.118 ± 0.322
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.80      1.00      0.89        68
           1       0.00      0.00      0.00        17

    accuracy                           0.80        85
   macro avg       0.40      0.50      0.44        85
weighted avg       0.64      0.80      0.71        85

Model: Random Forest
              precision    recall  f1-score   support

           0       0.80      1.00      0.89        68
           1       0.00      0.00      0.00        17

    accuracy                           0.80        85
   macro avg       0.40      0.50      0.44        85
weighted avg       0.64      0.80      0.71        85

Model: SVM
              precision    recall  f1-score   support

           0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Баллы по 1,2 

In [30]:
reduced_train_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/reduced_ranks/raw_hc_data'
reduced_test_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/reduced_ranks/raw_test_data'


reduced_schz_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/reduced_ranks/raw_schz_data'

# train_different_params(reduced_train_matrix_base, reduced_test_matrix_base)

train_different_params(reduced_train_matrix_base, reduced_schz_matrix_base)

----------auc----------
Logistic Regression  | Recall (class 1): 0.647 ± 0.478
Random Forest        | Recall (class 1): 0.294 ± 0.456
SVM                  | Recall (class 1): 0.471 ± 0.499
XGBoost              | Recall (class 1): 0.588 ± 0.492
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.81      0.91      0.86        56
           1       0.29      0.14      0.19        14

    accuracy                           0.76        70
   macro avg       0.55      0.53      0.52        70
weighted avg       0.70      0.76      0.72        70

Model: Random Forest
              precision    recall  f1-score   support

           0       0.80      0.98      0.88        56
           1       0.00      0.00      0.00        14

    accuracy                           0.79        70
   macro avg       0.40      0.49      0.44        70
weighted avg       0.64      0.79      0.70        70

Model: SVM
              precision    recall  f1-score

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Logistic Regression  | Recall (class 1): 0.588 ± 0.492
Random Forest        | Recall (class 1): 0.471 ± 0.499
SVM                  | Recall (class 1): 0.588 ± 0.492
XGBoost              | Recall (class 1): 0.529 ± 0.499
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.79      0.88      0.83        56
           1       0.12      0.07      0.09        14

    accuracy                           0.71        70
   macro avg       0.46      0.47      0.46        70
weighted avg       0.66      0.71      0.68        70

Model: Random Forest
              precision    recall  f1-score   support

           0       0.80      0.98      0.88        56
           1       0.00      0.00      0.00        14

    accuracy                           0.79        70
   macro avg       0.40      0.49      0.44        70
weighted avg       0.64      0.79      0.70        70

Model: SVM
              precision    recall  f1-score   support

           0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Просто баллы

In [32]:
ranks_train_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/raw_hc_data'
ranks_test_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/raw_test_data'

ranks_schz_matrix = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/raw_schz_data'

# train_different_params(ranks_train_matrix, ranks_test_matrix)

train_different_params(ranks_train_matrix, ranks_schz_matrix)

----------auc----------
Logistic Regression  | Recall (class 1): 0.647 ± 0.478
Random Forest        | Recall (class 1): 0.235 ± 0.424
SVM                  | Recall (class 1): 0.706 ± 0.456
XGBoost              | Recall (class 1): 0.471 ± 0.499
Model: Logistic Regression


ValueError: Object arrays cannot be loaded when allow_pickle=False

In [33]:
train_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/average_stimulus/hc_data'
test_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/average_stimulus/test_data'

schz_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/average_stimulus/schz_data'
train_different_params(train_matrix_base, schz_matrix_base)

----------auc----------
Logistic Regression  | Recall (class 1): 0.588 ± 0.492
Random Forest        | Recall (class 1): 0.471 ± 0.499
SVM                  | Recall (class 1): 0.588 ± 0.492
XGBoost              | Recall (class 1): 0.412 ± 0.492
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.83      0.89      0.86        56
           1       0.40      0.29      0.33        14

    accuracy                           0.77        70
   macro avg       0.62      0.59      0.60        70
weighted avg       0.75      0.77      0.76        70

Model: Random Forest
              precision    recall  f1-score   support

           0       0.82      0.95      0.88        56
           1       0.40      0.14      0.21        14

    accuracy                           0.79        70
   macro avg       0.61      0.54      0.54        70
weighted avg       0.73      0.79      0.74        70

Model: SVM
              precision    recall  f1-score

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Logistic Regression  | Recall (class 1): 0.647 ± 0.478
Random Forest        | Recall (class 1): 0.529 ± 0.499
SVM                  | Recall (class 1): 0.647 ± 0.478
XGBoost              | Recall (class 1): 0.588 ± 0.492
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.84      0.77      0.80        56
           1       0.32      0.43      0.36        14

    accuracy                           0.70        70
   macro avg       0.58      0.60      0.58        70
weighted avg       0.74      0.70      0.72        70

Model: Random Forest
              precision    recall  f1-score   support

           0       0.83      0.95      0.88        56
           1       0.50      0.21      0.30        14

    accuracy                           0.80        70
   macro avg       0.66      0.58      0.59        70
weighted avg       0.76      0.80      0.77        70

Model: SVM
              precision    recall  f1-score   support

           0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [59]:
train_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/prizes/HC'
test_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/ranks_matrix/prizes/test'

train_different_params(train_matrix_base, test_matrix_base)

----------auc----------
Logistic Regression  | Recall (class 1): 0.533 ± 0.125
Random Forest        | Recall (class 1): 0.133 ± 0.125
SVM                  | Recall (class 1): 0.300 ± 0.221
XGBoost              | Recall (class 1): 0.300 ± 0.194
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.86      0.95      0.90        40
           1       0.67      0.40      0.50        10

    accuracy                           0.84        50
   macro avg       0.77      0.68      0.70        50
weighted avg       0.82      0.84      0.82        50

Model: Random Forest
              precision    recall  f1-score   support

           0       0.82      1.00      0.90        40
           1       1.00      0.10      0.18        10

    accuracy                           0.82        50
   macro avg       0.91      0.55      0.54        50
weighted avg       0.85      0.82      0.76        50

Model: SVM
              precision    recall  f1-score

In [16]:
train_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/average_stimulus/HC'
test_matrix_base = '/home/aaanpilov/diploma/project/numpy_matrixes/average_stimulus/card_hc'
for func_name in funcs.keys():
    if func_name in ('max_min', 'min'):
        continue
    train_matrix = os.path.join(train_matrix_base, func_name + '.npy')
    test_matrix = os.path.join(test_matrix_base, func_name + '.npy')

    print('-'*10 + func_name + '-'*10)
    train_and_predict_on_test(train_matrix, test_matrix)
    print('-' * 100, sep='\n\n\n\n\n\n')

----------auc----------
Logistic Regression  | Recall (class 1): 0.706 ± 0.456
Random Forest        | Recall (class 1): 0.353 ± 0.478
SVM                  | Recall (class 1): 0.588 ± 0.492
XGBoost              | Recall (class 1): 0.647 ± 0.478
Model: Logistic Regression
              precision    recall  f1-score   support

           0       0.80      0.82      0.81        68
           1       0.20      0.18      0.19        17

    accuracy                           0.69        85
   macro avg       0.50      0.50      0.50        85
weighted avg       0.68      0.69      0.69        85

Model: Random Forest
              precision    recall  f1-score   support

           0       0.81      0.87      0.84        68
           1       0.25      0.18      0.21        17

    accuracy                           0.73        85
   macro avg       0.53      0.52      0.52        85
weighted avg       0.70      0.73      0.71        85

Model: SVM
              precision    recall  f1-score