In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import os
import sys

colab = True
if os.getenv("COLAB_RELEASE_TAG"):
   colab = True
else:
   colab = False

if colab:
    module_path = os.path.abspath(os.path.join('./real-hdd-failure/code/'))
    if module_path not in sys.path:
        sys.path.append(module_path)

    helper_path = os.path.abspath(os.path.join('./real-hdd-failure/code/helper/'))
    if helper_path not in sys.path:
        sys.path.append(helper_path)
    !{sys.executable} -m pip install -r ./real-hdd-failure/requirements.txt
else:
    module_path = os.path.abspath(os.path.join('..'))
    if module_path not in sys.path:
        sys.path.append(module_path)

    helper_path = os.path.abspath(os.path.join('../helper'))
    if helper_path not in sys.path:
        sys.path.append(helper_path)
# !{sys.executable} -m pip install -r ../../requirements.txt

In [4]:
from helper.dataclass import HDDDataset
from helper.algorithms.Blending import Blending
from helper.algorithms.Stacking import Stacking
from helper.algorithms.Stacking_sklearn import StackingSK
from helper.preprocessing import *
from helper.metrics import *
from helper.eda import *
from sklearn.model_selection import train_test_split
from helper.metrics import *
from helper.saver import Saver

In [5]:
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint

In [6]:
hdd_dataset = HDDDataset.read_csv('ST14000NM001G.csv')

In [None]:
preprocessor = Preprocessing(hdd_dataset)
preprocessor.clear_unused_data()
preprocessor.add_target_column()
preprocessor.prepare_train_df()
preprocessor.add_time_features()
preprocessor.train_test_val_split(train_size=0.8, test_size=0.1, val_size=0.1) 
preprocessor.normalize_data(method='formulae', inplace=False)
preprocessor.normalize_data(method='YJ', inplace=True)
preprocessor.drop_unimportant_features(drop=14)
preprocessor.rescale_types()

In [8]:
param_distributions = {
    'hidden_layer_sizes': [
        (128, 64), (256, 128, 64), 
    ], 
    'activation': ['tanh', 'relu'],  
    'solver': ['adam', 'lbfgs'],  
    'alpha': uniform(0.0001, 0.5),  # Более широкий диапазон для регуляризации L2
    'learning_rate': ['constant', 'adaptive', 'invscaling'],  # Дополнительно 'invscaling'
    'learning_rate_init': uniform(0.00001, 0.1),  # Диапазон начальной скорости обучения
    'batch_size': randint(128, 512),  # Размер пакета увеличен для больших моделей
    'beta_1': uniform(0.5, 0.5),  # Для Adam оптимизатора
    'beta_2': uniform(0.5, 0.5),  # Для Adam оптимизатора
    'epsilon': uniform(1e-8, 1e-6)
}

In [9]:
mlp = MLPClassifier(max_iter=1500, random_state=42)

In [10]:
random_search = RandomizedSearchCV(
    mlp, param_distributions, n_iter=100, cv=4, random_state=42
)

In [None]:
random_search.fit(preprocessor.X_train, preprocessor.y_train)

# Лучшие параметры
print("Лучшие параметры: ", random_search.best_params_)