In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score, recall_score, precision_score
from sklearn.model_selection import GridSearchCV
import joblib

In [29]:
def prepare_dataset(filepath): #подготовка данных для последующего обучения
  balanced_data = pd.read_parquet(filepath)
  X = balanced_data.drop(['target'], axis=1)
  y = balanced_data['target']
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

  scaler = joblib.load('scaler_80_20_new.joblib')
  pca = joblib.load('pca_80_20_new.joblib')

  X_train_scaled = scaler.transform(X_train)
  X_train_pca = pca.transform(X_train_scaled)

  X_test_scaled = scaler.transform(X_test)
  X_test_pca = pca.transform(X_test_scaled)
  return X_train_pca, y_train, X_test_pca, y_test

In [3]:
def get_metrics(y_test, y_pred): #подсчет метрик
  roc_auc = roc_auc_score(y_test, y_pred)
  f1 = f1_score(y_test, y_pred)
  recall = recall_score(y_test, y_pred)
  precision = precision_score(y_test, y_pred)

  print(f"ROC AUC: {roc_auc}")
  print(f"F1 Score: {f1}")
  print(f"Recall: {recall}")
  print(f"Precision: {precision}")

In [30]:
X_train, y_train, X_test, y_test = prepare_dataset('balanced_data_80_20_new.parquet')

In [31]:
X_train.shape

(64879, 100)

In [32]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import f1_score

# Создание и обучение нейронной сети
model = Sequential()
model.add(Dense(units=128, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(Dense(units=64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(units=32, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(Dropout(0.5))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_accuracy', patience=20, restore_best_weights=True)
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Сохранение модели
model.save('neuron_model_80_20_new.h5')

# Загрузка сохраненной модели
loaded_model = tf.keras.models.load_model('neuron_model_80_20_new.h5')

# Предсказание с загруженной моделью
y_pred_loaded = loaded_model.predict(X_test)
y_pred_binary = (y_pred_loaded > 0.5).astype(int)
get_metrics(y_test, y_pred_binary)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100


  saving_api.save_model(


ROC AUC: 0.5138049784236572
F1 Score: 0.06086510514617883
Recall: 0.03192825112107623
Precision: 0.6496350364963503


In [33]:
y_pred_loaded = loaded_model.predict(X_test)
best_threshold = 0.5
best_f1_score = 0.0

for threshold in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    y_pred_binary = (y_pred_loaded > threshold).astype(int)
    f1 = roc_auc_score(y_test, y_pred_binary)
    if f1 > best_f1_score:
        best_f1_score = f1
        best_threshold = threshold

print(f"Best F1 Score: {best_f1_score} at threshold {best_threshold}")
y_pred_binary = (y_pred_loaded > best_threshold).astype(int)
get_metrics(y_test, y_pred_binary)

Best F1 Score: 0.6516305050536428 at threshold 0.2
ROC AUC: 0.6516305050536428
F1 Score: 0.4305283757338551
Recall: 0.611659192825112
Precision: 0.33216442626144554
