In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler

In [2]:
data = pd.read_csv(r'../data/all_wells.csv')
data.head()

Unnamed: 0,easting,northing,porosity,permeability,Poisson's ratio,Young's Modulus,water saturation,oil saturation,proppant weight (lbs),pump rate (cubic feet/min),name,cumulative production
0,66100.0,22300.0,0.09,0.033,0.332,9440769.483,0.12474,0.87526,260036.414279,275.737593,Tarragon 4-119H,81324.0
1,66199.0,22300.0,0.12,0.057,0.332,9429043.88,0.124979,0.875021,,,Tarragon 4-119H,81324.0
2,66297.0,22300.0,0.11,0.05,0.332,9417413.01,0.125221,0.874779,429740.754787,324.145032,Tarragon 4-119H,81324.0
3,66396.0,22300.0,0.08,0.024,0.332,9405879.454,0.125469,0.874531,,,Tarragon 4-119H,81324.0
4,66495.0,22300.0,0.08,0.031,0.332,9394445.773,0.12572,0.87428,485657.822229,320.868488,Tarragon 4-119H,81324.0


We scale the features to the same range and then split our data for training and testing. 

In [61]:
scaler = MinMaxScaler()
features = data[['easting', 'northing', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation', 'porosity']].dropna()

scaled = pd.DataFrame(
    scaler.fit_transform(features.values),
    columns=features.columns,
    index=features.index,
)

print(scaled)
x = features[['easting', 'northing', 'Poisson\'s ratio', 'Young\'s Modulus', 'oil saturation']]
y = features['porosity']

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.8, random_state=0)

       easting  northing  Poisson's ratio  Young's Modulus  oil saturation  \
0     0.689370  0.221884         0.510638         0.402293        0.631130   
1     0.690420  0.221884         0.510638         0.400672        0.630425   
2     0.691459  0.221884         0.510638         0.399065        0.629707   
3     0.692509  0.221884         0.510638         0.397472        0.628977   
4     0.693559  0.221884         0.510638         0.395892        0.628233   
...        ...       ...              ...              ...             ...   
9995  0.173117  0.522796         0.212766         0.643922        0.622722   
9996  0.174167  0.522796         0.212766         0.645133        0.622001   
9997  0.175206  0.522796         0.212766         0.646350        0.621281   
9998  0.176245  0.522796         0.212766         0.647573        0.620562   
9999  0.177285  0.522796         0.212766         0.648802        0.619844   

      porosity  
0     0.750000  
1     1.000000  
2     0.9166

Build testing and training datasets in such a way that NaN values can be removed while keeping the predictor and target columns in sync

In [62]:
train = torch.utils.data.TensorDataset(torch.Tensor(np.array(train_x)), torch.Tensor(np.array(train_y)))
train_loader = torch.utils.data.DataLoader(train, batch_size = 64, shuffle = True)

test = torch.utils.data.TensorDataset(torch.Tensor(np.array(test_x)), torch.Tensor(np.array(test_y)))
test_loader = torch.utils.data.DataLoader(dataset=test, batch_size=1)

features, labels = next(iter(train_loader))

We create a feed-forward neural network with a 10->10->5 architecture and ReLU activations.

In [138]:
input_size = 5
output_size = 1

model = nn.Sequential(nn.Linear(input_size, 10),
                      nn.ReLU(),
                      nn.Linear(10, 10),
                      nn.ReLU(),
                      nn.Linear(10, 5),
                      nn.ReLU(),
                      nn.Linear(5, output_size),
                     )

criterion = nn.SmoothL1Loss()

In [147]:
from torch import optim
optimizer = optim.Adam(model.parameters(), lr=0.0001)

We pass forward and backward through the network several times, updating gradients along the way 

In [148]:
epochs = 500
for e in range(epochs):
    running_loss = 0
    for features, labels in train_loader:
        optimizer.zero_grad()
        forward_pass = model(features)
        loss = criterion(forward_pass, labels.type(torch.FloatTensor))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    if (e + 1) % (epochs // 10) == 0:
        print("===================== Epoch %d =====================" % (e + 1))
        print(f"Training loss: %.10f" % (running_loss / len(train_loader)))

  return F.smooth_l1_loss(input, target, reduction=self.reduction, beta=self.beta)
  return F.smooth_l1_loss(input, target, reduction=self.reduction, beta=self.beta)


Training loss: 0.0002820064
Training loss: 0.0002877842
Training loss: 0.0002847975
Training loss: 0.0002825245
Training loss: 0.0002813384
Training loss: 0.0002836084
Training loss: 0.0002863610
Training loss: 0.0002854386
Training loss: 0.0002830290
Training loss: 0.0002814207


In [149]:
y_pred_list = []
with torch.no_grad():
    model.eval()
    for X_batch, _ in test_loader:
        y_test_pred = model(X_batch)
        y_pred_list.append(y_test_pred.cpu().numpy())
        
y_pred_list = [a.squeeze().tolist() for a in y_pred_list]

In [150]:
print(r2_score(test_y, y_pred_list))

-0.00035026681531680204
