In [None]:
!pip install optuna

In [None]:
import pandas as pd
import optuna
from sklearn.preprocessing import MinMaxScaler
import random as rd
import numpy as np
from sklearn.metrics import r2_score
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dropout, Dense, BatchNormalization
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from google.colab import drive

In [None]:
df_taranaki = pd.read_csv('/content/drive/MyDrive/Taranaki_PEF_clear.csv')
df_nopims = pd.read_csv('/content/drive/MyDrive/NOPIMS_clear.csv')

In [None]:
df_taranaki = df_taranaki.rename(columns={'WELLNAME': 'UWI', 'DENS': 'RHOB'})

In [None]:
df_taranaki.set_index(['UWI', 'DEPT'], inplace = True)
df_nopims.set_index(['UWI', 'DEPT'], inplace = True)

In [None]:
df_taranaki = df_taranaki[['RHOB', 'DRHO', 'GR', 'PEF']]
df_nopims = df_nopims[['RHOB', 'DRHO', 'GR', 'PEF']]

In [None]:
print(f'Количество скважин Taranaki = {df_taranaki.index.get_level_values(0).nunique()}')
print(f'Количество скважин Nopims = {df_nopims.index.get_level_values(0).nunique()}')

In [None]:
X_taranaki = df_taranaki.drop(['PEF'], axis = 1)
y_taranaki = df_taranaki['PEF']

X_nopims = df_nopims.drop(['PEF'], axis = 1)
y_nopims = df_nopims['PEF']

In [None]:
df_X = pd.concat([X_taranaki, X_nopims])
df_y = pd.concat([y_taranaki, y_nopims])

In [None]:
X = df_X.reset_index()
y = df_y.reset_index()

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

normalized_values_X = scaler_X.fit_transform(X[['RHOB', 'DRHO', 'GR']])
normalized_values_y = scaler_y.fit_transform(y[['PEF']])

X[['RHOB', 'DRHO', 'GR']] = normalized_values_X
y[['PEF']] = normalized_values_y

X_scaled = X.set_index(['UWI', 'DEPT'])
y_scaled = y.set_index(['UWI', 'DEPT'])

In [None]:
X_nopims = X_scaled.loc[X_nopims.index.get_level_values(0).unique()]
y_nopims = y_scaled.loc[y_nopims.index.get_level_values(0).unique()]

In [None]:
train_part_size = 0.7

In [None]:
rd.seed(12)
train_wells_nopims = rd.sample(
    X_nopims.index.get_level_values(0).unique().tolist(),
    round(len(X_nopims.index.get_level_values(0).unique()) * train_part_size),
)

val_test_wells_nopims = list(set(X_nopims.index.get_level_values(0).unique().tolist()) - set(train_wells_nopims))
val_test_wells_nopims = sorted(val_test_wells_nopims)
rd.shuffle(val_test_wells_nopims)

rd.seed(12)
val_wells_nopims = rd.sample(
    val_test_wells_nopims,
    round(len(val_test_wells_nopims) * 0.5),)

test_wells_nopims = list(set(val_test_wells_nopims) - set(val_wells_nopims))
test_wells_nopims = sorted(test_wells_nopims)
rd.shuffle(test_wells_nopims)

In [None]:
train_X_nopims = X_nopims.loc[train_wells_nopims]
train_y_nopims = y_nopims.loc[train_wells_nopims]

val_X_nopims = X_nopims.loc[val_wells_nopims]
val_y_nopims = y_nopims.loc[val_wells_nopims]

test_X_nopims = X_nopims.loc[test_wells_nopims]
test_y_nopims = y_nopims.loc[test_wells_nopims]

In [None]:
def gen_data_per_well(features, target, time_steps=50):
    Xs, ys = [], []

    # Перебираем уникальные значения скважин
    for well_name in features.index.get_level_values('UWI').unique():
        # Получаем данные для текущей скважины
        well_features = features.xs(well_name, level='UWI')
        well_target = target.xs(well_name, level='UWI')

        # Генерируем X и y для текущей скважины
        for i in range(len(well_features) - time_steps):
            Xs.append(well_features.iloc[i:i + time_steps].values)
            ys.append(well_target.iloc[i + time_steps - 1])

    return np.array(Xs), np.array(ys)

In [None]:
train_X_nopims, train_y_nopims = gen_data_per_well(train_X_nopims, train_y_nopims)
val_X_nopims, val_y_nopims = gen_data_per_well(val_X_nopims, val_y_nopims)
test_X_nopims, test_y_nopims = gen_data_per_well(test_X_nopims, test_y_nopims)

In [None]:
def create_model(trial):
    model = Sequential()
    input_shape = (None, 3)
    # Подбор гиперпараметров для сверточных слоев
    for i in range(trial.suggest_int('conv_layers', 1, 5)):  # Количество сверточных слоев
        filters = trial.suggest_categorical('filters_' + str(i), [32, 64, 128, 256])
        kernel_size = trial.suggest_categorical('kernel_size_' + str(i), [2, 3, 5])

        model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', padding='same', input_shape=input_shape))
        model.add(BatchNormalization())
        model.add(MaxPooling1D(pool_size=2))

    # Подбор гиперпараметров для LSTM слоев
    for j in range(trial.suggest_int('lstm_layers', 1, 5)):  # Количество LSTM слоев
        lstm_units = trial.suggest_categorical('lstm_units_' + str(j), [32, 64, 128, 256])
        model.add(LSTM(lstm_units, activation='relu', return_sequences=True))
        model.add(Dropout(trial.suggest_float('dropout_' + str(j), 0.01, 0.3)))

    # Полносвязный слой
    model.add(Dense(units=1, activation='linear'))

    # Подбор гиперпараметров для оптимизатора
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-7, 1e-3)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    return model

# Функция для оценки модели
def objective(trial):
    model = create_model(trial)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256, 512])

    early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

    model.fit(train_X_nopims, train_y_nopims,
              validation_data=(val_X_nopims, val_y_nopims),
              epochs=20,
              batch_size=batch_size,
              callbacks=[early_stopping],
              verbose=0)

    # Оценка модели на тренировочных данных
    y_pred = model.predict(test_X_nopims)
    r2 = r2_score(test_y_nopims, y_pred)

    return r2

# Запуск подбора гиперпараметров
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

In [None]:
print("Лучшие гиперпараметры:", study.best_params)
print("Лучшее значение R^2:", study.best_value)