<a href="https://colab.research.google.com/github/Querent-ai/pytorch_research/blob/main/Facies_Across_Wells_1DCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**[Facies Prediction with Class Weighting & Visualization](https://)**

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter


In [9]:
df = pd.read_csv("mcmurray_facies_v1.csv")
df = df.drop(columns=['Unnamed: 0'], errors='ignore')

features = ['GR', 'RHOB', 'NPHI', 'PHI', 'VSH']
df = df.dropna(subset=features + ['lithName', 'UWI'])

# Optional: remove very rare or unclear facies
df = df[df['lithName'] != 'CementedSand']
df = df[df['lithName'] != 'Undefined']  # or whatever the rare class is
df['lithName'].value_counts()


Unnamed: 0_level_0,count
lithName,Unnamed: 1_level_1
Sand,4824
Shale,3593
ShalySand,3454
Coal,3146
SandyShale,240


In [10]:
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

le = LabelEncoder()
df['FaciesEncoded'] = le.fit_transform(df['lithName'])
classes = le.classes_
from collections import Counter
print(Counter(df['FaciesEncoded']))


Counter({1: 4824, 3: 3593, 4: 3454, 0: 3146, 2: 240})


In [11]:
window_size = 10
X_windows, y_windows, uwi_windows = [], [], []

for i in range(len(df) - window_size):
    window = df.iloc[i:i+window_size]
    if len(window['FaciesEncoded'].unique()) == 1:  # target consistency
        X_windows.append(window[features].values)
        y_windows.append(window['FaciesEncoded'].iloc[-1])
        uwi_windows.append(window['UWI'].iloc[-1])

X_windows = np.stack(X_windows)
y_windows = np.array(y_windows)
uwi_windows = np.array(uwi_windows)


In [13]:
class FaciesCNN1D(nn.Module):
    def __init__(self, input_channels, num_classes):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv1d(input_channels, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Dropout(0.2),
            nn.Conv1d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # shape: (batch, features, window)
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)


In [14]:
class FaciesWindowDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self): return len(self.y)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]


In [16]:
from sklearn.utils.multiclass import unique_labels

sgkf = StratifiedGroupKFold(n_splits=3, shuffle=True, random_state=42)

for fold, (train_idx, test_idx) in enumerate(sgkf.split(X_windows, y_windows, groups=uwi_windows)):
    print(f"\nFold {fold+1}")

    X_train, X_test = X_windows[train_idx], X_windows[test_idx]
    y_train, y_test = y_windows[train_idx], y_windows[test_idx]

    train_loader = DataLoader(FaciesWindowDataset(X_train, y_train), batch_size=64, shuffle=True)
    test_loader = DataLoader(FaciesWindowDataset(X_test, y_test), batch_size=64)

    model = FaciesCNN1D(input_channels=len(features), num_classes=len(classes))
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(10):  # Increase if needed
        model.train()
        running_loss = 0.0
        for xb, yb in train_loader:
            preds = model(xb)
            loss = criterion(preds, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader):.4f}")

    # Evaluation
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for xb, yb in test_loader:
            out = model(xb)
            preds = torch.argmax(out, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(yb.cpu().numpy())

    # Filter class labels present in this fold
    labels_present = sorted(unique_labels(all_labels, all_preds))
    label_names_present = [classes[i] for i in labels_present]

    print("\nClassification Report:")
    print(classification_report(
        all_labels,
        all_preds,
        labels=labels_present,
        target_names=label_names_present,
        zero_division=0
    ))



Fold 1
Epoch 1, Loss: 0.6339
Epoch 2, Loss: 0.1432
Epoch 3, Loss: 0.0730
Epoch 4, Loss: 0.0451
Epoch 5, Loss: 0.0265
Epoch 6, Loss: 0.0205
Epoch 7, Loss: 0.0158
Epoch 8, Loss: 0.0130
Epoch 9, Loss: 0.0074
Epoch 10, Loss: 0.0083

Classification Report:
              precision    recall  f1-score   support

        Coal       1.00      0.99      1.00       284
        Sand       1.00      1.00      1.00       607
  SandyShale       0.00      0.00      0.00         0
       Shale       1.00      1.00      1.00       103
   ShalySand       1.00      0.99      0.99        91

    accuracy                           1.00      1085
   macro avg       0.80      0.80      0.80      1085
weighted avg       1.00      1.00      1.00      1085


Fold 2
Epoch 1, Loss: 0.7064
Epoch 2, Loss: 0.1622
Epoch 3, Loss: 0.0705
Epoch 4, Loss: 0.0412
Epoch 5, Loss: 0.0270
Epoch 6, Loss: 0.0234
Epoch 7, Loss: 0.0251
Epoch 8, Loss: 0.0195
Epoch 9, Loss: 0.0115
Epoch 10, Loss: 0.0096

Classification Report:
     