<a href="https://colab.research.google.com/github/Querent-ai/pytorch_research/blob/main/Facies_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Facies Prediction using PyTorch and Preprocessed McMurray-Wabiskaw Data
This notebook loads cleaned CSV data with well logs and facies labels and trains a simple MLP classifier using PyTorch.


In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from torch.utils.data import Dataset, DataLoader


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [8]:
df = pd.read_csv("mcmurray_facies_v1.csv")

# Drop unnamed column
df = df.drop(columns=['Unnamed: 0'])

# Show available columns
print("Columns:", df.columns.tolist())

df.head()

  df = pd.read_csv("mcmurray_facies_v1.csv")


Columns: ['CALI', 'COND', 'DELT', 'DEPT', 'DPHI', 'DT', 'GR', 'ILD', 'ILM', 'NPHI', 'PHID', 'RHOB', 'SFL', 'SFLU', 'SN', 'SP', 'UWI', 'SitID', 'lat', 'lng', 'Depth', 'LithID', 'W_Tar', 'SW', 'VSH', 'PHI', 'RW', 'lithName']


Unnamed: 0,CALI,COND,DELT,DEPT,DPHI,DT,GR,ILD,ILM,NPHI,...,lat,lng,Depth,LithID,W_Tar,SW,VSH,PHI,RW,lithName
0,,,,405.09,0.153,,77.86,11.845,,0.458,...,56.015445,-111.333199,405.09,4.0,0.054,0.58,0.131,0.276,0.554,Shale
1,,,,405.34,0.071,,75.765,13.308,,0.407,...,56.015445,-111.333199,405.34,4.0,0.019,0.787,0.096,0.2,0.553,Shale
2,,,,405.59,0.047,,75.085,13.621,,0.308,...,56.015445,-111.333199,405.59,4.0,0.0,1.0,0.085,0.152,0.553,Shale
3,,,,405.84,0.053,,76.87,13.005,,0.264,...,56.015445,-111.333199,405.84,4.0,0.0,1.0,0.114,0.134,0.553,Shale
4,,,,406.09,0.072,,82.159,11.695,,0.289,...,56.015445,-111.333199,406.09,4.0,0.0,1.0,0.203,0.119,0.553,Shale


In [9]:
# Define target and features
target = 'lithName'
features = ['GR', 'RHOB', 'NPHI', 'PHI', 'VSH']

# Drop rows with missing input or labels
df = df.dropna(subset=features + [target])

df.head()

Unnamed: 0,CALI,COND,DELT,DEPT,DPHI,DT,GR,ILD,ILM,NPHI,...,lat,lng,Depth,LithID,W_Tar,SW,VSH,PHI,RW,lithName
10998,,,,375.0,0.297,,109.022,4.435,,0.43,...,55.891636,-111.268187,375.0,2.0,0.0,1.0,0.445,0.2,0.892,ShalySand
10999,,,,375.25,0.281,,107.192,4.522,,0.401,...,55.891636,-111.268187,375.25,2.0,0.0,1.0,0.404,0.214,0.892,ShalySand
11000,,,,375.5,0.27,,105.368,5.065,,0.396,...,55.891636,-111.268187,375.5,2.0,0.0,1.0,0.364,0.229,0.892,ShalySand
11001,,,,375.75,0.269,,106.005,5.889,,0.384,...,55.891636,-111.268187,375.75,2.0,0.0,1.0,0.378,0.224,0.892,ShalySand
11002,,,,376.0,0.279,,107.264,6.472,,0.374,...,55.891636,-111.268187,376.0,2.0,0.0,1.0,0.406,0.214,0.892,ShalySand


In [14]:
X = df[features].values
y = df['lithName'].values

# Scale inputs
from sklearn.preprocessing import StandardScaler, LabelEncoder
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Encode facies labels (e.g., "Shale" -> 0, "Sandstone" -> 1, etc.)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Check classes
print("Facies classes:", le.classes_)


Facies classes: ['CementedSand' 'Coal' 'Sand' 'SandyShale' 'Shale' 'ShalySand' 'Undefined']


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)


In [15]:
import torch
from torch.utils.data import Dataset, DataLoader

class FaciesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self): return len(self.y)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

train_loader = DataLoader(FaciesDataset(X_train, y_train), batch_size=64, shuffle=True)
test_loader = DataLoader(FaciesDataset(X_test, y_test), batch_size=64)


In [16]:
import torch.nn as nn
import torch.optim as optim

class FaciesMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x):
        return self.net(x)

model = FaciesMLP(input_dim=len(features), hidden_dim=64, output_dim=len(le.classes_))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [17]:
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for xb, yb in train_loader:
        preds = model(xb)
        loss = criterion(preds, yb)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader):.4f}")


Epoch 1, Loss: 0.8809
Epoch 2, Loss: 0.4495
Epoch 3, Loss: 0.3540
Epoch 4, Loss: 0.3084
Epoch 5, Loss: 0.2797
Epoch 6, Loss: 0.2618
Epoch 7, Loss: 0.2403
Epoch 8, Loss: 0.2274
Epoch 9, Loss: 0.2168
Epoch 10, Loss: 0.2047


In [18]:
from sklearn.metrics import classification_report, confusion_matrix

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for xb, yb in test_loader:
        preds = model(xb)
        all_preds.extend(torch.argmax(preds, dim=1).numpy())
        all_labels.extend(yb.numpy())

print(classification_report(all_labels, all_preds, target_names=le.classes_))
print("Confusion Matrix:")
print(confusion_matrix(all_labels, all_preds))


              precision    recall  f1-score   support

CementedSand       0.00      0.00      0.00        16
        Coal       0.96      0.96      0.96      1171
        Sand       0.97      0.98      0.98      1604
  SandyShale       0.90      0.72      0.80        92
       Shale       0.95      0.94      0.95      1237
   ShalySand       0.94      0.97      0.96      1178
   Undefined       0.76      0.74      0.75        34

    accuracy                           0.96      5332
   macro avg       0.78      0.76      0.77      5332
weighted avg       0.95      0.96      0.95      5332

Confusion Matrix:
[[   0    7    2    0    4    3    0]
 [   0 1119    0    0   29   23    0]
 [   0    0 1578    1    0   17    8]
 [   0    0   21   66    4    1    0]
 [   0   44    0    4 1166   23    0]
 [   0    0   12    0   24 1142    0]
 [   0    0    6    2    0    1   25]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
