In [1]:
import torch
import torch.nn as nn
import well_log_analysis.dataset
from torch.utils.data import DataLoader
import numpy as np
import sklearn
import sklearn.metrics
from sklearn.preprocessing import MinMaxScaler, RobustScaler

In [2]:
BATCH_SIZE = 64
SEQUENCE_LENGTH = 16

In [3]:
def train_test(train_wells, test_well):
    train_dataset = well_log_analysis.dataset.WellDataset(train_wells[0], 
                                    ['GR', 'DEN', 'RT', 'DTCO', 'DTSM'], 
                                    ['GR', 'DEN', 'RT', 'DTCO', 'DTSM'], 
                                    sequence_length=SEQUENCE_LENGTH)
    
    for i in range(1, len(train_wells)):
        train_dataset += well_log_analysis.dataset.WellDataset(train_wells[i], 
                                        ['GR', 'DEN', 'RT', 'DTCO', 'DTSM'], 
                                        ['GR', 'DEN', 'RT', 'DTCO', 'DTSM'], 
                                        sequence_length=SEQUENCE_LENGTH)
    test_dataset = well_log_analysis.dataset.WellDataset(test_well,
                                ['GR', 'DEN', 'RT', 'DTCO', 'DTSM'], 
                                ['GR', 'DEN', 'RT', 'DTCO', 'DTSM'], 
                                sequence_length=SEQUENCE_LENGTH)
    return train_dataset, test_dataset

In [4]:
dataset_paths = ['data/dataset1/J10025.las', 'data/dataset1/J10035.las', 'data/dataset1/J10039.las', 'data/dataset1/J10051.las', 'data/dataset1/J10038.las']
train_dataset, test_dataset = train_test(dataset_paths[:-1], dataset_paths[-1])
train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, drop_last=True, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=False)

In [5]:
scaler = RobustScaler()
train_dataset.copy_df = train_dataset.df.copy()
train_dataset.df[train_dataset.return_sites] = scaler.fit_transform(train_dataset.df[train_dataset.return_sites])

In [33]:
class LSTMModel(nn.Module):
    def __init__(self, in_size, out_size):
        super().__init__()
        self.num_layers = 3
        self.hidden_units = 32
        self.fc1 = nn.Linear(3, 32)
        self.lstm = nn.LSTM(32, self.hidden_units, self.num_layers, dropout=0.3, batch_first=True)
        self.fc2 = nn.Linear(self.hidden_units, 2)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        x, (_, _) = self.lstm(x, (h0, c0))
        x = self.fc2(x)
        return x

In [34]:
model = LSTMModel(3, 2)
optim = torch.optim.Adam(model.parameters(), weight_decay=1e-4)

In [35]:
for epoch in range(50):
    total = []
    all_inp = []
    all_pred = []
    for batch in train_loader:
        optim.zero_grad()
        inp, out = batch[:, :, :3].float(), batch[:, :, 3:].float()
        prediction = model(inp)
        loss = nn.SmoothL1Loss()(out, prediction)
        total.append(loss.detach().clone().cpu())
        loss.backward()
        optim.step()
        all_inp += [prediction[:, i, :].detach().clone().cpu() for i in range(len(out[0]))]
        all_pred += [out[:, i, :] for i in range(len(out[0]))]
    #print("LOSS", np.mean(total))
    all_inp = torch.stack(all_inp)[:, 0]
    all_inp = torch.concat([torch.zeros(all_inp.shape[0], 3), all_inp], axis=1).detach().numpy()
    all_pred = torch.stack(all_pred)[:, 0]
    all_pred = torch.concat([torch.zeros(all_pred.shape[0], 3), all_pred], axis=1).detach().numpy()
    
    true = scaler.inverse_transform(all_inp)[:, 3]
    pred = scaler.inverse_transform(all_pred)[:, 3]
    
    print("MAPE", sklearn.metrics.mean_absolute_percentage_error(true, pred))

MAPE 0.068038605
MAPE 0.049532566
MAPE 0.053010453
MAPE 0.04765175
MAPE 0.04736973
MAPE 0.051493946
MAPE 0.044476338
MAPE 0.044518277
MAPE 0.04814416
MAPE 0.044971272
MAPE 0.048812293
MAPE 0.047479782
MAPE 0.04563415
MAPE 0.047763757
MAPE 0.045397338
MAPE 0.044219
MAPE 0.048108775
MAPE 0.045504898
MAPE 0.044788312
MAPE 0.046990328
MAPE 0.043205343
MAPE 0.04786
MAPE 0.042445175
MAPE 0.040494047
MAPE 0.043983266
MAPE 0.04244338
MAPE 0.0405328
MAPE 0.038328588
MAPE 0.04422425
MAPE 0.045477454
MAPE 0.04174903
MAPE 0.03831494
MAPE 0.043692186
MAPE 0.041283272
MAPE 0.041145086
MAPE 0.038506214
MAPE 0.039490063
MAPE 0.04315701
MAPE 0.04215993
MAPE 0.042076193
MAPE 0.039695658
MAPE 0.03943569
MAPE 0.039968736
MAPE 0.039037365
MAPE 0.040812816
MAPE 0.041002125
MAPE 0.042678267
MAPE 0.038464032
MAPE 0.041455656
MAPE 0.03751525


In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(7, 25))
plt.plot(true, range(len(true)), color='blue', linewidth=0.5, label="Ground Truth DTSM")
plt.plot(pred, range(len(pred)), color='red', linewidth=0.5, label="Predicted DTSM")