In [None]:
!pip install optuna

In [None]:
import pandas as pd
import optuna
from sklearn.preprocessing import MinMaxScaler
import random as rd
import numpy as np
from sklearn.metrics import r2_score
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dropout, Dense, BatchNormalization
from keras.models import Sequential
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from google.colab import drive

In [None]:
drive.mount('/content/drive')

In [None]:
df_force = pd.read_csv('/content/drive/MyDrive/force_logs.csv')
df_taranaki = pd.read_csv('/content/drive/MyDrive/taranaki_logs.csv')
df_kansas = pd.read_csv('/content/drive/MyDrive/kansas_logs.csv')

In [None]:
df_force = df_force.rename(columns={'UWI': 'WELLNAME'})
df_taranaki = df_taranaki.rename(columns={'DENS': 'RHOB', 'NEUT': 'NPHI'})
df_kansas = df_kansas.rename(columns={'UWI': 'WELLNAME'})

In [None]:
df_force.set_index(['WELLNAME', 'DEPT'], inplace = True)
df_taranaki.set_index(['WELLNAME', 'DEPT'], inplace = True)
df_kansas.set_index(['WELLNAME', 'DEPT'], inplace = True)

In [None]:
print(f'Количество скважин Taranaki = {df_taranaki.index.get_level_values(0).nunique()}')
print(f'Количество скважин Force = {df_force.index.get_level_values(0).nunique()}')
print(f'Количество скважин Kansas = {df_kansas.index.get_level_values(0).nunique()}')

In [None]:
X_taranaki = df_taranaki.drop('GR', axis = 1)
y_taranaki = df_taranaki['GR']

X_kansas = df_kansas.drop('GR', axis = 1)
y_kansas = df_kansas['GR']

X_force = df_force.drop(['GR'], axis = 1)
y_force = df_force['GR']

In [None]:
df_X = pd.concat([X_taranaki, X_kansas, X_force])
df_y = pd.concat([y_taranaki, y_kansas, y_force])

In [None]:
X = df_X.reset_index()
y = df_y.reset_index()

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

normalized_values_X = scaler_X.fit_transform(X[['CALI', 'RHOB', 'DRHO', 'NPHI', 'SP']])
normalized_values_y = scaler_y.fit_transform(y[['GR']])

X[['CALI', 'RHOB', 'DRHO', 'NPHI', 'SP']] = normalized_values_X
y[['GR']] = normalized_values_y

X_scaled = X.set_index(['WELLNAME', 'DEPT'])
y_scaled = y.set_index(['WELLNAME', 'DEPT'])

In [None]:
X_kansas = X_scaled.loc[X_kansas.index.get_level_values(0).unique()]
y_kansas = y_scaled.loc[y_kansas.index.get_level_values(0).unique()]

In [None]:
train_part_size = 0.7

rd.seed(10)
train_wells_kansas = rd.sample(
    X_kansas.index.get_level_values(0).unique().tolist(),
    round(len(X_kansas.index.get_level_values(0).unique()) * train_part_size),
)

val_test_wells_kansas = list(set(X_kansas.index.get_level_values(0).unique().tolist()) - set(train_wells_kansas))
val_test_wells_kansas = sorted(val_test_wells_kansas)
rd.shuffle(val_test_wells_kansas)

rd.seed(10)
val_wells_kansas = rd.sample(
    val_test_wells_kansas,
    round(len(val_test_wells_kansas) * 0.5),)

test_wells_kansas = list(set(val_test_wells_kansas) - set(val_wells_kansas))
test_wells_kansas = sorted(test_wells_kansas)
rd.shuffle(test_wells_kansas)

In [None]:
train_X_kansas = X_kansas.loc[train_wells_kansas]
train_y_kansas = y_kansas.loc[train_wells_kansas]

val_X_kansas = X_kansas.loc[val_wells_kansas]
val_y_kansas = y_kansas.loc[val_wells_kansas]

test_X_kansas = X_kansas.loc[test_wells_kansas]
test_y_kansas = y_kansas.loc[test_wells_kansas]

In [None]:
def gen_data_per_well(features, target, time_steps=50):
    Xs, ys = [], []

    # Перебираем уникальные значения скважин
    for well_name in features.index.get_level_values('WELLNAME').unique():
        # Получаем данные для текущей скважины
        well_features = features.xs(well_name, level='WELLNAME')
        well_target = target.xs(well_name, level='WELLNAME')

        # Генерируем X и y для текущей скважины
        for i in range(len(well_features) - time_steps):
            Xs.append(well_features.iloc[i:i + time_steps].values)
            ys.append(well_target.iloc[i + time_steps - 1])

    return np.array(Xs), np.array(ys)

In [None]:
train_X_kansas, train_y_kansas = gen_data_per_well(train_X_kansas, train_y_kansas)
val_X_kansas, val_y_kansas = gen_data_per_well(val_X_kansas, val_y_kansas)
test_X_kansas, test_y_kansas = gen_data_per_well(test_X_kansas, test_y_kansas)

In [None]:
def create_model(trial):
    model = Sequential()
    input_shape = (None, 3)
    # Подбор гиперпараметров для сверточных слоев
    for i in range(trial.suggest_int('conv_layers', 1, 5)):  # Количество сверточных слоев
        filters = trial.suggest_categorical('filters_' + str(i), [32, 64, 128, 256])
        kernel_size = trial.suggest_categorical('kernel_size_' + str(i), [2, 3, 5])

        model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', padding='same', input_shape=input_shape))
        model.add(BatchNormalization())
        model.add(MaxPooling1D(pool_size=2))

    # Подбор гиперпараметров для LSTM слоев
    for j in range(trial.suggest_int('lstm_layers', 1, 5)):  # Количество LSTM слоев
        lstm_units = trial.suggest_categorical('lstm_units_' + str(j), [32, 64, 128, 256])
        model.add(LSTM(lstm_units, activation='relu', return_sequences=True))
        model.add(Dropout(trial.suggest_float('dropout_' + str(j), 0.01, 0.3)))

    # Полносвязный слой
    model.add(Dense(units=1, activation='linear'))

    # Подбор гиперпараметров для оптимизатора
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-7, 1e-3)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')

    return model

# Функция для оценки модели
def objective(trial):
    model = create_model(trial)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256, 512])

    early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

    model.fit(train_X_kansas, train_y_kansas,
              validation_data=(val_X_kansas, val_y_kansas),
              epochs=20,
              batch_size=batch_size,
              callbacks=[early_stopping],
              verbose=0)

    # Оценка модели на тренировочных данных
    y_pred = model.predict(test_X_spe)
    r2 = r2_score(test_y_spe, y_pred)

    return r2

# Запуск подбора гиперпараметров
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

In [None]:
print("Лучшие гиперпараметры:", study.best_params)
print("Лучшее значение R^2:", study.best_value)