In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, SimpleRNN, LayerNormalization
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
# Load dataset
data = pd.read_csv("/content/sample_data/dseats_2024_training_dataset.csv")


In [3]:
# Preprocessing the dataset

# Convert 'PRODUCTION DATE' to datetime format and calculate days elapsed
data['PRODUCTION DATE'] = pd.to_datetime(data['PRODUCTION DATE'], format='%d/%m/%Y %H:%M', errors='coerce')
data['days_elapsed'] = (data['PRODUCTION DATE'] - data['PRODUCTION DATE'].min()).dt.days

# Fill missing values in 'Choke Size' with the median
data['Choke Size'] = data['Choke Size'].fillna(data['Choke Size'].median())

# Select input and output features
input_columns = [
    'Downhole Pressure (PSI)',
    'Downhole Temperature (Kelvin)',
    'Average Tubing Pressure',
    'Annulus Pressure (PSI)',
    'AVG WHP (PSI)',
    'Choke Size',
    'days_elapsed'
]
output_columns = [
    'Oil Production (stb/day)',
    'Gas Volume (scf/day)',
    'Water Production (stb/day)'
]

X = data[input_columns].values
Y = data[output_columns].values


# Normalize features
scaler_X = MinMaxScaler()
scaler_Y = MinMaxScaler()
X = scaler_X.fit_transform(X)
Y = scaler_Y.fit_transform(Y)

# Reshape X for time-series input (samples, time_steps, features)
X = X.reshape((X.shape[0], 1, X.shape[1]))

# Split into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Confirm shapes of the processed data
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape


((5540, 1, 7), (1385, 1, 7), (5540, 3), (1385, 3))

In [4]:
# Build the RNN model for multi-output regression
rnn_model = Sequential([
    SimpleRNN(128, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(3)  # The output layer has as many neurons as output columns
])

rnn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the RNN model
rnn_model.fit(X_train, Y_train, epochs=50, batch_size=32, validation_data=(X_test, Y_test))

# Make predictions
Y_pred = rnn_model.predict(X_test)

# Separate predictions into Oil, Gas, and Water
oil_pred = Y_pred[:, 0]
gas_pred = Y_pred[:, 1]
water_pred = Y_pred[:, 2]

oil_true = Y_test[:, 0]
gas_true = Y_test[:, 1]
water_true = Y_test[:, 2]

# Calculate metrics for Oil
oil_mae = mean_absolute_error(oil_true, oil_pred)
oil_mse = mean_squared_error(oil_true, oil_pred)
oil_rmse = oil_mse ** 0.5
oil_r2 = r2_score(oil_true, oil_pred)

# Calculate metrics for Gas
gas_mae = mean_absolute_error(gas_true, gas_pred)
gas_mse = mean_squared_error(gas_true, gas_pred)
gas_rmse = gas_mse ** 0.5
gas_r2 = r2_score(gas_true, gas_pred)

# Calculate metrics for Water
water_mae = mean_absolute_error(water_true, water_pred)
water_mse = mean_squared_error(water_true, water_pred)
water_rmse = water_mse ** 0.5
water_r2 = r2_score(water_true, water_pred)

# Print metrics
print(f"Oil - MAE: {oil_mae:.4f}, MSE: {oil_mse:.4f}, RMSE: {oil_rmse:.4f}, R2: {oil_r2:.4f}")
print(f"Gas - MAE: {gas_mae:.4f}, MSE: {gas_mse:.4f}, RMSE: {gas_rmse:.4f}, R2: {gas_r2:.4f}")
print(f"Water - MAE: {water_mae:.4f}, MSE: {water_mse:.4f}, RMSE: {water_rmse:.4f}, R2: {water_r2:.4f}")



  super().__init__(**kwargs)


Epoch 1/50
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - loss: 0.0574 - mae: 0.1729 - val_loss: 0.0139 - val_mae: 0.0863
Epoch 2/50
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 0.0168 - mae: 0.0926 - val_loss: 0.0099 - val_mae: 0.0679
Epoch 3/50
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0127 - mae: 0.0788 - val_loss: 0.0094 - val_mae: 0.0671
Epoch 4/50
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0123 - mae: 0.0760 - val_loss: 0.0079 - val_mae: 0.0581
Epoch 5/50
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0103 - mae: 0.0689 - val_loss: 0.0075 - val_mae: 0.0562
Epoch 6/50
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0108 - mae: 0.0697 - val_loss: 0.0072 - val_mae: 0.0544
Epoch 7/50
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step -

In [8]:
# Building the LSTM model
lstm_model = Sequential([
    LSTM(64, activation='tanh', return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(Y_train.shape[1])  # Output features: Oil, Gas, Water
])

# Compile the model
lstm_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
lstm_model.fit(
    X_train, Y_train,
    validation_data=(X_test, Y_test),
    epochs=100,
    batch_size=32,
    verbose=2
)

# Make predictions
Y_pred = lstm_model.predict(X_test)

# Separate predictions for Oil, Gas, and Water
oil_pred = Y_pred[:, 0]
gas_pred = Y_pred[:, 1]
water_pred = Y_pred[:, 2]

oil_true = Y_test[:, 0]
gas_true = Y_test[:, 1]
water_true = Y_test[:, 2]

# Calculate metrics for Oil
oil_mae = mean_absolute_error(oil_true, oil_pred)
oil_mse = mean_squared_error(oil_true, oil_pred)
oil_rmse = oil_mse ** 0.5
r2 = r2_score(oil_true, oil_pred)

# Calculate metrics for Gas
gas_mae = mean_absolute_error(gas_true, gas_pred)
gas_mse = mean_squared_error(gas_true, gas_pred)
gas_rmse = gas_mse ** 0.5
r2 = r2_score(gas_true, gas_pred)

# Calculate metrics for Water
water_mae = mean_absolute_error(water_true, water_pred)
water_mse = mean_squared_error(water_true, water_pred)
water_rmse = water_mse ** 0.5
r2 = r2_score(water_true, water_pred)

# Print metrics
print(f"Oil - MAE: {oil_mae:.4f}, MSE: {oil_mse:.4f}, RMSE: {oil_rmse:.4f}, R2: {r2:.4f} ")
print(f"Gas - MAE: {gas_mae:.4f}, MSE: {gas_mse:.4f}, RMSE: {gas_rmse:.4f}, R2:{r2:.4f}")
print(f"Water - MAE: {water_mae:.4f}, MSE: {water_mse:.4f}, RMSE: {water_rmse:.4f}, R2: {r2:.4f}")



Epoch 1/100


  super().__init__(**kwargs)


174/174 - 3s - 16ms/step - loss: 0.0360 - mae: 0.1442 - val_loss: 0.0170 - val_mae: 0.0943
Epoch 2/100
174/174 - 1s - 5ms/step - loss: 0.0137 - mae: 0.0832 - val_loss: 0.0106 - val_mae: 0.0716
Epoch 3/100
174/174 - 2s - 9ms/step - loss: 0.0103 - mae: 0.0712 - val_loss: 0.0088 - val_mae: 0.0637
Epoch 4/100
174/174 - 1s - 8ms/step - loss: 0.0082 - mae: 0.0623 - val_loss: 0.0087 - val_mae: 0.0624
Epoch 5/100
174/174 - 1s - 6ms/step - loss: 0.0077 - mae: 0.0587 - val_loss: 0.0073 - val_mae: 0.0554
Epoch 6/100
174/174 - 1s - 5ms/step - loss: 0.0072 - mae: 0.0557 - val_loss: 0.0069 - val_mae: 0.0514
Epoch 7/100
174/174 - 1s - 7ms/step - loss: 0.0067 - mae: 0.0527 - val_loss: 0.0066 - val_mae: 0.0493
Epoch 8/100
174/174 - 1s - 7ms/step - loss: 0.0066 - mae: 0.0520 - val_loss: 0.0066 - val_mae: 0.0472
Epoch 9/100
174/174 - 1s - 5ms/step - loss: 0.0063 - mae: 0.0496 - val_loss: 0.0062 - val_mae: 0.0456
Epoch 10/100
174/174 - 1s - 5ms/step - loss: 0.0062 - mae: 0.0491 - val_loss: 0.0063 - val_ma