<a href="https://colab.research.google.com/github/SAHIL9581/w2w/blob/main/W2W_WNB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [66]:
#@title 1. Setup Environment & Install Libraries

# --- 1. Install All Required Libraries ---
print("--> Installing all necessary Python libraries (this may take a few minutes)...")
!pip install wandb torch torchvision torchaudio lasio scikit-learn pandas tqdm matplotlib joblib pyyaml -q
print("✅ Library installation complete.")


# --- 2. Define and Change to Project Directory ---
import os

# IMPORTANT: This folder is TEMPORARY. All local files will be DELETED when the Colab session ends.
# Your results and models will be saved to your online W&B account.
PROJECT_PATH = '/content/W2W_Pipeline_WandB'

print(f"\n--> Setting up a temporary project directory at: {PROJECT_PATH}")
os.makedirs(f"{PROJECT_PATH}/data/raw_las_files", exist_ok=True)
os.makedirs(f"{PROJECT_PATH}/artifacts", exist_ok=True)
os.makedirs(f"{PROJECT_PATH}/trained_models/autoencoder", exist_ok=True)
os.makedirs(f"{PROJECT_PATH}/trained_models/boundary_detector", exist_ok=True)

# Change the current working directory to the project path
os.chdir(PROJECT_PATH)
print(f"✅ Current directory changed to: {os.getcwd()}")
print("\n--- Setup Complete ---")

--> Installing all necessary Python libraries (this may take a few minutes)...
✅ Library installation complete.

--> Setting up a temporary project directory at: /content/W2W_Pipeline_WandB
✅ Current directory changed to: /content/W2W_Pipeline_WandB

--- Setup Complete ---


In [67]:
#@title 2. Login to Weights & Biases
import wandb

print("--> ACTION REQUIRED: Please log in to your Weights & Biases account.")
# You will be prompted to paste your W&B API key.
# You can find your key here: https://wandb.ai/authorize
!wandb login

--> ACTION REQUIRED: Please log in to your Weights & Biases account.
[34m[1mwandb[0m: Currently logged in as: [33msahilpareek203[0m ([33msahilpareek203-amrita-vishwa-vidyapeetham[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [68]:
#@title 3. Upload ZIP File with .las Data
from google.colab import files
import os

print(">>> ACTION REQUIRED: Please upload the ZIP file containing your .las files.")
uploaded = files.upload()

if not uploaded:
    print("\n⚠️ Upload was cancelled or failed. Please run this cell again.")
else:
    zip_filename = list(uploaded.keys())[0]
    print(f"\n✅ '{zip_filename}' uploaded successfully.")

    # Unzip into the designated raw data folder
    !unzip -q -o "{zip_filename}" -d data/raw_las_files/

    print("--> ZIP file has been unzipped into 'data/raw_las_files/'.")

    # Clean up the uploaded zip file from the root directory
    os.remove(zip_filename)
    print("\n✅ Data upload is complete. You can now proceed to the next step.")

>>> ACTION REQUIRED: Please upload the ZIP file containing your .las files.


Saving train.zip to train.zip

✅ 'train.zip' uploaded successfully.
--> ZIP file has been unzipped into 'data/raw_las_files/'.

✅ Data upload is complete. You can now proceed to the next step.


In [79]:
#@title 4. Pipeline Configuration (Corrected)
# All settings for the pipeline are controlled from this Python dictionary.

config = {
    "run_data_preparation": True,
    "run_pretraining": True,
    "run_finetuning": True,
    "run_inference": True,

    "paths": {
        "raw_las_folder": "data/raw_las_files/",
        "processed_csv_path": "data/train.csv",
        "label_encoder_path": "artifacts/label_encoder.json",
        "std_scaler_path": "artifacts/StandardScaler.bin",
        "pretrained_encoder_path": "trained_models/autoencoder/best_autoencoder.pt",
        "final_model_path": "trained_models/boundary_detector/final_model.pt"
    },

    "wandb": {
        "project": "W2W_Matcher_Pipeline_Notebook", # Your W&B project name
        "entity": None,                             # Your W&B username or team name (optional)
        "sweep_count": 5                            # Number of hyperparameter combinations to try
    },

    "pretraining_sweep": {
        "name": "Autoencoder-Pre-training-Sweep",
        "method": "random",
        "metric": {"name": "loss", "goal": "minimize"},
        "parameters": {
            "epochs": {"value": 25},
            "optimizer": {"values": ["RMSprop", "AdamW", "Adam"]},
            "lr": {"values": [0.001, 0.0001]},
            "act_name": {"values": ["prelu", "relu"]},
            "batch_size": {"values": [16, 32]},
            # 'in_channels' will be added here automatically by the data prep step
        }
    },

    "finetuning": {
        "learning_rate": 0.0001, "batch_size": 16, "epochs": 100,
        "model_params": {
            # 'in_channels' is REMOVED from here. It will be added automatically.
            "patch_height": 700, "act_name": "prelu",
            "project_in_features": 2048, "hidden_dim": 256, "num_queries": 100,
            "num_heads": 8, "dropout": 0.1, "expansion_factor": 4,
            "num_transformers": 6, "output_size": 3
        },
        "matcher_costs": {"set_cost_class": 1, "set_cost_bbox": 5},
        "loss_weights": {"loss_matching": 1.0, "loss_unmatching": 0.5, "loss_height_constraint": 0.5}
    },

    "inference": {
        # IMPORTANT: Change these to valid well names from your data after running the Data Prep cell.
        "reference_well": "15_9-F-1 A",
        "well_of_interest": "15_9-F-1 B",
        "correlation_threshold": 0.7
    }
}

print("✅ Configuration dictionary created.")

✅ Configuration dictionary created.


In [80]:
#@title 5. Define Data Preparation Function (Corrected)
import pandas as pd
import numpy as np
import lasio
import json
from sklearn.preprocessing import StandardScaler
from joblib import dump

def run_data_preparation(config):
    print("--- LAUNCHING PIPELINE 0: DATA PREPARATION ---")
    paths = config['paths']
    search_folder = paths['raw_las_folder']
    all_wells_df, las_files_found = [], []

    print(f"--> Searching for .las files in '{search_folder}'...")
    for root, dirs, files in os.walk(search_folder):
        for file in files:
            if file.lower().endswith('.las'):
                las_files_found.append(os.path.join(root, file))

    if not las_files_found: raise FileNotFoundError(f"No .las files found in '{search_folder}'.")
    print(f"--> Found {len(las_files_found)} .las files. Reading now...")

    for filepath in las_files_found:
        try:
            las = lasio.read(filepath)
            df = las.df().reset_index()
            df['WELL'] = las.well.WELL.value or os.path.splitext(os.path.basename(filepath))[0]
            df['GROUP'] = 'UNKNOWN'
            for param in las.params:
                if 'GROUP' in param.mnemonic.upper(): df['GROUP'] = param.value
            all_wells_df.append(df)
        except Exception as e: print(f"    - Could not read {filepath}: {e}")

    if not all_wells_df: raise ValueError("Could not process any .las files.")
    master_df = pd.concat(all_wells_df, ignore_index=True)
    if 'DEPT' in master_df.columns: master_df.rename(columns={'DEPT': 'DEPTH_MD'}, inplace=True)
    master_df.to_csv(paths['processed_csv_path'], index=False, sep=';')
    print(f"--> Saved combined data to '{paths['processed_csv_path']}'")

    unique_wells = master_df['WELL'].unique()
    print("\n--- Available Well Names for Inference ---")
    for well in unique_wells: print(f"- {well}")
    print("------------------------------------------")
    print("TIP: Copy/paste two of these names into the 'inference' section of the config cell above.\n")

    label_encoder = {str(g): i for i, g in enumerate(master_df['GROUP'].unique())}
    with open(paths['label_encoder_path'], 'w') as f: json.dump(label_encoder, f, indent=4)
    print(f"--> Saved label encoder to '{paths['label_encoder_path']}'")

    cols_to_drop = ['WELL', 'GROUP'] + [col for col in master_df.columns if 'DEPT' in col.upper()]
    numeric_df = master_df.drop(columns=cols_to_drop, errors='ignore').fillna(0)
    scaler = StandardScaler().fit(numeric_df)
    dump(scaler, paths['std_scaler_path'])
    print(f"--> Saved StandardScaler to '{paths['std_scaler_path']}'")

    # --- CRUCIAL FIX: Dynamically add the number of features to the config ---
    num_features = scaler.n_features_in_
    print(f"\n✅ Automatically detected {num_features} features (input channels) from the data.")
    config['finetuning']['model_params']['in_channels'] = num_features
    config['pretraining_sweep']['parameters']['in_channels'] = {'value': num_features}
    # --- END OF FIX ---

print("✅ Data preparation function defined.")

✅ Data preparation function defined.


In [96]:
#@title 6. Define Dataset Classes (Corrected)
import torch
from torch.utils import data
from joblib import load
import pandas as pd
import numpy as np

# --- CORRECTED: AutoencoderDataset now creates patches ---
class AutoencoderDataset(data.Dataset):
    def __init__(self, c):
        p = c['paths']
        # Use the same patch height as the fine-tuning stage for consistency
        patch_height = c['finetuning']['model_params']['patch_height']

        df = pd.read_csv(p['processed_csv_path'], delimiter=';')
        scaler = load(p['std_scaler_path'])

        self.data_patches = []
        # Group by each well to create contiguous patches
        for well_name, well_df in df.groupby('WELL'):
            cols_to_drop = ['WELL', 'GROUP'] + [col for col in well_df.columns if 'DEPT' in col.upper()]
            well_numeric = well_df.drop(columns=cols_to_drop, errors='ignore').fillna(0)

            # Ensure the scaler is applied with the correct feature names
            if hasattr(scaler, 'feature_names_in_'):
                well_numeric = well_numeric[scaler.feature_names_in_]

            scaled_data = scaler.transform(well_numeric).astype(np.float32)

            # Create patches from this well's data
            for i in range(0, len(scaled_data) - patch_height + 1, patch_height):
                patch = scaled_data[i:i + patch_height]
                # The input to Conv1d should be (channels, length)
                self.data_patches.append(patch.T)

    def __len__(self):
        return len(self.data_patches)

    def __getitem__(self, i):
        patch = self.data_patches[i]
        return torch.from_numpy(patch), torch.from_numpy(patch)

class BoundaryDataset(data.Dataset):
    def __init__(self, c, seed=None):
        self.p, self.d = c['finetuning']['model_params'], c['paths']
        self.s = seed or np.random.randint(2**32 - 1)
        self.x, self.gt = self.get_Xy()

    def get_Xy(self):
        d = pd.read_csv(self.d['processed_csv_path'], delimiter=';')
        np.random.seed(self.s)
        w = d[d['WELL'] == np.random.choice(d.WELL.unique())].copy()

        with open(self.d['label_encoder_path']) as f: le = json.load(f)
        w['GROUP'] = w['GROUP'].astype(str).map(le).bfill().ffill()

        cols_to_drop = ['WELL', 'GROUP'] + [col for col in w.columns if 'DEPT' in col.upper()]
        w_numeric = w.drop(columns=cols_to_drop, errors='ignore').fillna(0)

        scaler = load(self.d['std_scaler_path'])
        if hasattr(scaler, 'feature_names_in_'):
            w_numeric = w_numeric[scaler.feature_names_in_]

        s_d = scaler.transform(w_numeric)

        ph = self.p['patch_height']
        idx = list(range(0, s_d.shape[0], ph))
        x = np.asarray([s_d[i:i + ph] for i in idx if len(s_d[i:i + ph]) == ph], dtype=np.float32)
        y = np.asarray([w['GROUP'].values[i:i + ph] for i in idx if len(w['GROUP'].values[i:i + ph]) == ph])
        return x, self._get_gt_boundaries(y)

    def _get_gt_boundaries(self, y_patches):
        gts = []
        for y in y_patches:
            gt, c = {}, 0
            boundaries = np.where(y[:-1] != y[1:])[0] + 1
            k = np.concatenate(([0], boundaries, [len(y)]))
            for i in range(len(k) - 1):
                top, bottom = k[i], k[i+1]
                gt[c] = {'Group': int(y[top]), 'Top': top, 'Height': bottom - top}; c += 1
            gts.append(gt)
        return gts

    def __len__(self): return len(self.x)
    def __getitem__(self, idx):
        img = np.expand_dims(self.x[idx], 0)
        data = self.gt[idx]
        ph = self.p['patch_height']
        tops = torch.tensor([d['Top'] / ph for d in data.values()], dtype=torch.float32).view(-1, 1)
        heights = torch.tensor([d['Height'] / ph for d in data.values()], dtype=torch.float32).view(-1, 1)
        tgt = {'labels': torch.ones(len(data), dtype=torch.long), 'loc_info': torch.hstack((tops, heights))}
        return torch.from_numpy(img), tgt

print("✅ Dataset classes corrected for patching.")

✅ Dataset classes corrected for patching.


In [97]:
#@title 7. Define Model Architectures (Corrected and Robust)
import torch
import torch.nn as nn
import torch.nn.functional as F

def get_activation(name): return nn.PReLU() if name == 'prelu' else nn.ReLU() if name == 'relu' else nn.GELU()

class Block1D(nn.Module):
    def __init__(self, in_channels, out_channels, stride=2, kernel_size=3, activation='prelu'):
        super().__init__()
        self.b = nn.Sequential(
            nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding=kernel_size//2),
            nn.BatchNorm1d(out_channels), get_activation(activation),
            nn.Conv1d(out_channels, out_channels, kernel_size, 1, padding=kernel_size//2),
            nn.BatchNorm1d(out_channels), get_activation(activation)
        )
    def forward(self, x): return self.b(x)

# --- CORRECTED: UNet1D with robust skip connections ---
class UNet1D(nn.Module):
    def __init__(self, in_channels, activation='prelu'):
        super().__init__()
        self.start = nn.Sequential(nn.Conv1d(in_channels, 32, 3, 1, 1), nn.BatchNorm1d(32), get_activation(activation))
        self.e1 = Block1D(32, 64, 2, a=activation)
        self.e2 = Block1D(64, 128, 2, a=activation)
        self.e3 = Block1D(128, 256, 2, a=activation)
        self.mid = nn.Sequential(nn.Conv1d(256, 512, 3, 1, 1), nn.BatchNorm1d(512), get_activation(activation))
        self.uc3 = nn.ConvTranspose1d(512, 256, 2, 2)
        self.d3 = Block1D(512, 256, 1, a=activation)
        self.uc2 = nn.ConvTranspose1d(256, 128, 2, 2)
        self.d2 = Block1D(256, 128, 1, a=activation)
        self.uc1 = nn.ConvTranspose1d(128, 64, 2, 2)
        self.d1 = Block1D(128, 64, 1, a=activation)
        self.out = nn.Conv1d(64, in_channels, 1)

    def forward(self, x):
        # Input shape from AutoencoderDataset is already [batch, channels, length]
        x1_skip = self.start(x)
        x2_skip = self.e1(x1_skip)
        x3_skip = self.e2(x2_skip)
        x4_skip = self.e3(x3_skip)
        m = self.mid(x4_skip)

        u3 = self.uc3(m)
        # Add resizing to ensure dimensions match
        u3 = F.interpolate(u3, size=x4_skip.shape[2], mode='linear', align_corners=False)
        d3 = self.d3(torch.cat((u3, x4_skip), 1))

        u2 = self.uc2(d3)
        u2 = F.interpolate(u2, size=x3_skip.shape[2], mode='linear', align_corners=False)
        d2 = self.d2(torch.cat((u2, x3_skip), 1))

        u1 = self.uc1(d2)
        u1 = F.interpolate(u1, size=x2_skip.shape[2], mode='linear', align_corners=False)
        d1 = self.d1(torch.cat((u1, x2_skip), 1))

        # There is one more upsampling needed to match the original input size
        out = F.interpolate(d1, size=x1_skip.shape[2], mode='linear', align_corners=False)
        return self.out(out)

class UNetEncoder1D(nn.Module):
    def __init__(self, in_channels, activation='prelu'):
        super().__init__()
        self.start = nn.Sequential(nn.Conv1d(in_channels, 32, 3, 1, 1), nn.BatchNorm1d(32), get_activation(activation))
        self.e1 = Block1D(32, 64, 2, a=activation)
        self.e2 = Block1D(64, 128, 2, a=activation)
        self.e3 = Block1D(128, 256, 2, a=activation)
        self.mid = nn.Sequential(nn.Conv1d(256, 512, 3, 1, 1), nn.BatchNorm1d(512), get_activation(activation))

    def forward(self, x):
        x = x.squeeze(1).permute(0, 2, 1)
        x1 = self.e1(self.start(x))
        x2 = self.e2(x1)
        x3 = self.e3(x2)
        return self.mid(x3)

class Project(nn.Module):
    def __init__(self,i,o): super().__init__(); self.l=nn.Linear(i,o)
    def forward(self,x): return self.l(x.flatten(1))

class Query(nn.Module):
    def __init__(self,s,d): super().__init__(); self.q=nn.Parameter(torch.randn(1,s,d))
    def forward(self,x): return self.q.repeat(x.shape[0],1,1)

class Transformer(nn.Module):
    def __init__(self,i,n,d): super().__init__(); self.t=nn.TransformerEncoderLayer(d_model=i,nhead=n,dropout=d,batch_first=True,dim_feedforward=i*4)
    def forward(self,q,c): return self.t(q)

class W2WTransformerModel(nn.Module):
    def __init__(self,c):
        super().__init__()
        p = c['finetuning']['model_params']
        self.encoder = UNetEncoder1D(p['in_channels'], p['act_name'])
        project_in_features = 512 * (p['patch_height'] // 8)
        self.project = Project(project_in_features, p['hidden_dim'])
        self.query = Query(p['num_queries'], p['hidden_dim'])
        self.transformers = nn.ModuleList([Transformer(p['hidden_dim'],p['num_heads'],p['dropout']) for _ in range(p['num_transformers'])])
        self.finalize = nn.Sequential(nn.Linear(p['hidden_dim'], p['output_size']), get_activation(p['act_name']), nn.LayerNorm(p['output_size']))

    def forward(self,img):
        encoded_features = self.encoder(img)
        # Permute from [B, C, L] to [B, L, C] for projection and transformer
        encoded_features = encoded_features.permute(0, 2, 1)
        projected_seq = self.project(encoded_features)
        q = self.query(projected_seq)
        for t in self.transformers: q = t(q, projected_seq)
        return self.finalize(q)

print("✅ Model architectures corrected and made robust for 1D data.")

✅ Model architectures corrected and made robust for 1D data.


In [98]:
#@title 8. Define Matcher and Loss Functions
from scipy.optimize import linear_sum_assignment
import torch.nn.functional as F

class HungarianMatcher(nn.Module):
    def __init__(self,c,b): super().__init__(); self.c,self.b=c,b
    @torch.no_grad()
    def forward(self,o,t):
        op,ob=o[:,:,:1].flatten(0,1).sigmoid(),o[:,:,1:].flatten(0,1)
        tb=torch.cat([v["loc_info"] for v in t]).to(op.device)
        C=(self.b*torch.cdist(ob,tb,p=1) - self.c*op[:,0]).view(o.shape[0],o.shape[1],-1).cpu()
        return [(torch.as_tensor(i),torch.as_tensor(j)) for i,j in [linear_sum_assignment(c) for c in C.split([len(v["loc_info"]) for v in t],-1)]]
class SetCriterion(nn.Module):
    def __init__(self,c):
        super().__init__(); p=c['finetuning']; self.m=HungarianMatcher(p['matcher_costs']['set_cost_class'],p['matcher_costs']['set_cost_bbox']); self.w=p['loss_weights']; self.nq=p['model_params']['num_queries']
    def loss_match(self,o,t,i): i=self._get_src_p_idx(i); return {'loss_matching':F.l1_loss(o[i],torch.cat([torch.cat((torch.ones_like(v["loc_info"][:,:1]),v["loc_info"]),1) for v,(_,j) in zip(t,i) if len(j)>0]))}
    def loss_unmatch(self,o,t,i): return {'loss_unmatching': torch.cat([out[torch.where(torch.ones(self.nq,dtype=bool))[0],0] for out in o]).mean()}
    def loss_height(self,o,t,i): return {'loss_height_constraint':sum([abs(ht[j].sum()-1) for ht,(j,_) in zip(o[:,:,2],i) if len(j)>0])/o.shape[0]}
    def _get_src_p_idx(self,i): b=torch.cat([torch.full_like(s,k) for k,(s,_) in enumerate(i)]); s=torch.cat([s for s,_ in i]); return b,s
    def forward(self,o,t): i=self.m(o,t); return {ln: l for ln,l in {**self.loss_match(o,t,i),**self.loss_unmatch(o,t,i),**self.loss_height(o,t,i)}.items() if self.w[ln]>0}

print("✅ Matcher and loss functions defined.")

✅ Matcher and loss functions defined.


In [99]:
#@title 9. Define Helper and Utility Functions

def collate_fn(batch):
    images, targets = zip(*batch)
    return torch.stack(images), list(targets)

def load_pretrained_encoder_weights(model, path):
    print(f"--> Loading pre-trained weights from {path}")
    pre_dict = torch.load(path)
    model_dict = model.state_dict()
    enc_dict = {k.replace('module.',''):v for k,v in pre_dict.items() if any(x in k for x in ['e1','e2','e3','mid','start'])}
    enc_dict = {'encoder.'+k:v for k,v in enc_dict.items()}
    model_dict.update(enc_dict)
    model.load_state_dict(model_dict, strict=False)
    print(f"✅ Loaded {len(enc_dict)} pre-trained layers.")
    return model

print("✅ Helper functions defined.")

✅ Helper functions defined.


In [100]:
#@title 10. Define Pre-training Stage Function (W&B Sweep) (Corrected)
from tqdm import tqdm

# A global variable to track the best loss across all sweep runs
best_pretrain_loss = float('inf')

def train_autoencoder_sweep():
    global best_pretrain_loss, config
    with wandb.init() as run:
        sweep_cfg = wandb.config
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # --- CORRECTED: Use the new UNet1D model ---
        model = UNet1D(in_channels=sweep_cfg.in_channels, activation=sweep_cfg.act_name).to(device)

        criterion = nn.MSELoss()
        optimizer = getattr(torch.optim, sweep_cfg.optimizer)(model.parameters(), lr=sweep_cfg.lr)
        train_loader = data.DataLoader(AutoencoderDataset(config), batch_size=sweep_cfg.batch_size, shuffle=True)

        print(f"--- Starting W&B Run with config: {dict(sweep_cfg)} ---")
        for epoch in range(sweep_cfg.epochs):
            model.train()
            total_loss = 0.0
            for img, tgt in train_loader:
                img, tgt = img.to(device), tgt.to(device)
                optimizer.zero_grad()
                output = model(img)
                loss = criterion(output, tgt)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()

            epoch_loss = total_loss / len(train_loader)
            wandb.log({"epoch": epoch, "loss": epoch_loss})

            if epoch_loss < best_pretrain_loss:
                best_pretrain_loss = epoch_loss
                print(f"    *** New best model found! Loss: {best_pretrain_loss:.6f} (Epoch {epoch+1}) ***")
                torch.save(model.state_dict(), config['paths']['pretrained_encoder_path'])
                wandb.summary["best_loss"] = best_pretrain_loss

print("✅ Pre-training (sweep) function defined with 1D U-Net.")

✅ Pre-training (sweep) function defined with 1D U-Net.


In [101]:
#@title 11. Define Fine-tuning Stage Function

def run_finetuning(config):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    ft_params = config['finetuning']
    loader = data.DataLoader(BoundaryDataset(config, seed=42), batch_size=ft_params['batch_size'], shuffle=True, collate_fn=collate_fn)
    model = W2WTransformerModel(config).to(device)
    model = load_pretrained_encoder_weights(model, config['paths']['pretrained_encoder_path'])
    criterion = SetCriterion(config).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=ft_params['learning_rate'])

    for epoch in range(ft_params['epochs']):
        model.train()
        total_loss = 0
        for images, targets in tqdm(loader, desc=f'Epoch {epoch+1}/{ft_params["epochs"]}'):
            images, targets = images.to(device), [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = criterion(model(images), targets)
            losses = sum(loss_dict[k] * criterion.w[k] for k in loss_dict.keys())
            optimizer.zero_grad(); losses.backward(); optimizer.step()
            total_loss += losses.item(); wandb.log({'finetune_batch_loss': losses.item()})

        avg_loss = total_loss / len(loader)
        print(f'Epoch {epoch+1} Average Loss: {avg_loss:.4f}')
        wandb.log({'finetune_epoch_loss': avg_loss, 'epoch': epoch})

    torch.save(model.state_dict(), config['paths']['final_model_path'])
    print(f"✅ Final model saved to {config['paths']['final_model_path']}")

print("✅ Fine-tuning function defined.")

✅ Fine-tuning function defined.


In [102]:
#@title 12. Define Inference and Plotting Functions
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def plot_well_correlation(well1, well2, layers1, layers2, matrix, threshold, path):
    fig, ax = plt.subplots(figsize=(10, 12)); plt.style.use('seaborn-whitegrid')
    if not layers1 or not layers2: print('Warning: One or both wells have no layers to plot.'); return
    max_depth = max(layers1[-1]['bottom'], layers2[-1]['bottom']) if layers1 and layers2 else 1000
    ax.set_ylim(max_depth + 50, -50); ax.set_xlim(-0.5, 2.5)
    n1 = len(set(l['Group'] for l in layers1)); n2 = len(set(l['Group'] for l in layers2))
    for l in layers1: ax.add_patch(patches.Rectangle((0, l['Top']), 1, l['Height'], ec='k', fc=plt.cm.viridis(l['Group']/(n1 if n1>0 else 1)), alpha=0.6))
    for l in layers2: ax.add_patch(patches.Rectangle((1.5, l['Top']), 1, l['Height'], ec='k', fc=plt.cm.viridis(l['Group']/(n2 if n2>0 else 1)), alpha=0.6))
    for i, row in enumerate(matrix):
        for j, sim in enumerate(row):
            if sim >= threshold: ax.add_patch(patches.Polygon([[1,layers1[i]['Top']],[1,layers1[i]['bottom']],[1.5,layers2[j]['bottom']],[1.5,layers2[j]['Top']]], fc=plt.cm.Greens(sim), alpha=0.5))
    ax.set_xticks([0.5, 2]); ax.set_xticklabels([well1, well2], fontsize=14); ax.set_ylabel('Depth', fontsize=12)
    ax.set_title('Well to Well Correlation', fontsize=16); plt.savefig(path); plt.close()
    print(f'--> Correlation plot saved to {path}')

def run_correlation(config):
    inf, p = config['inference'], config['paths']
    full_data = pd.read_csv(p['processed_csv_path'], delimiter=';')
    ref_df, woi_df = full_data[full_data['WELL'] == inf['reference_well']], full_data[full_data['WELL'] == inf['well_of_interest']]
    if ref_df.empty or woi_df.empty: print(f"Error: One/both wells not found: '{inf['reference_well']}', '{inf['well_of_interest']}'."); return

    with open(p['label_encoder_path']) as f: le = json.load(f)
    def get_true_layers(df):
        df = df.copy().reset_index(drop=True)
        df['group_id'] = df['GROUP'].astype(str).map(le).fillna(-1).astype(int)
        b = np.where(df['group_id'].iloc[:-1].values != df['group_id'].iloc[1:].values)[0] + 1
        indices = np.concatenate(([0], b, [len(df)]))
        layers = []
        for i in range(len(indices) - 1):
            s, e = indices[i], indices[i+1]
            layers.append({'Top':df['DEPTH_MD'].iloc[s],'bottom':df['DEPTH_MD'].iloc[e-1],'Height':df['DEPTH_MD'].iloc[e-1]-df['DEPTH_MD'].iloc[s],'Group':df['group_id'].iloc[s]})
        return layers

    ref_layers, woi_layers = get_true_layers(ref_df), get_true_layers(woi_df)
    sim_matrix = np.zeros((len(ref_layers), len(woi_layers)))
    for i,l1 in enumerate(ref_layers):
        for j,l2 in enumerate(woi_layers):
            if l1['Group'] == l2['Group'] and l1['Group'] != -1: sim_matrix[i,j] = np.random.uniform(0.8, 0.95)
            else: sim_matrix[i,j] = np.random.uniform(0.1, 0.4)

    print('--> MOCK INFERENCE: Using ground truth layers for visualization.')
    output_path = 'well_correlation_plot.png'
    plot_well_correlation(inf['reference_well'], inf['well_of_interest'], ref_layers, woi_layers, sim_matrix, inf['correlation_threshold'], output_path)
    wandb.log({"well_correlation_plot": wandb.Image(output_path)})

print("✅ Inference and plotting functions defined.")

✅ Inference and plotting functions defined.


In [103]:
#@title 13. 🚀 Run the Full Pipeline

# --- STAGE 0: DATA PREPARATION ---
if config["run_data_preparation"]:
    run_data_preparation(config)
    print("\n--- STAGE 0 COMPLETE ---\n")

# --- STAGE 1: PRE-TRAINING (W&B SWEEP) ---
if config.get('run_pretraining', False):
    print("\n--- LAUNCHING PIPELINE 1: AUTOENCODER PRE-TRAINING (W&B SWEEP) ---")
    sweep_id = wandb.sweep(config['pretraining_sweep'], project=config['wandb']['project'], entity=config['wandb'].get('entity'))
    wandb.agent(sweep_id, function=train_autoencoder_sweep, count=config['wandb']['sweep_count'])
    print(f"\n🏆 Sweep finished. Best pre-trained model saved to {config['paths']['pretrained_encoder_path']}")
    print("\n--- STAGE 1 COMPLETE ---\n")

# --- STAGE 2: FINE-TUNING ---
if config.get('run_finetuning', False):
    print("\n--- LAUNCHING PIPELINE 2: FINE-TUNING ---")
    with wandb.init(project=config['wandb']['project'], entity=config['wandb'].get('entity'), job_type='fine-tuning', config=config) as run:
        print(f"--> W&B Run started. View at: {run.get_url()}")
        run_finetuning(config)
        artifact = wandb.Artifact("boundary-detector-model", type="model", description="Final fine-tuned W2W Transformer model")
        artifact.add_file(config['paths']['final_model_path'])
        run.log_artifact(artifact)
        print("✅ Final model logged as a W&B Artifact.")
    print("\n--- STAGE 2 COMPLETE ---\n")

# --- STAGE 3: INFERENCE ---
if config.get('run_inference', False):
    print("\n--- LAUNCHING PIPELINE 3: WELL-TO-WELL INFERENCE ---")
    with wandb.init(project=config['wandb']['project'], entity=config['wandb'].get('entity'), job_type='inference', config=config) as run:
        print(f"--> W&B Run started. View at: {run.get_url()}")
        run_correlation(config)
    print("\n--- STAGE 3 COMPLETE ---\n")

print("\n" + "="*60)
print("✅✅✅ All Requested Pipeline Stages are Complete! ✅✅✅")
print("You can view all your results, models, and charts in your Weights & Biases project.")
print("="*60)

--- LAUNCHING PIPELINE 0: DATA PREPARATION ---
--> Searching for .las files in 'data/raw_las_files/'...
--> Found 118 .las files. Reading now...
--> Saved combined data to 'data/train.csv'

--- Available Well Names for Inference ---
- 17/11-1
- 31/2-10
- 35/11-1
- 35/9-8
- 35/11-6
- 34/10-19
- 15/9-17
- 31/3-2
- 15/9-13 Sleipner East Appr
- 34/12-1
- 25/2-13 T4
- 16/10-5 Isbjoern
- 33/6-3 S
- 31/2-7
- 17/4-1
- 25/5-4  Byggve
- 16/11-1S T3
- 16/1-6 A Verdandi Appr
- 34/6-1
- 34/4-10 R
- 16/2-6 Johan Sverdrup
- 31/2-9
- 30/3-5S
- 25/4-5
- 25/8-5 S  Jotun
- 36/7-3
- 31/2-8
- 34/5-1 A
- 34/8-3
- 33/9-1
- 34/5-1 S
- 25/11-24 Jakob South
- 35/4-1
- 15/9-15 Gungne
- 35/11-13
- 34/10-33
- 16/2-11 A Johan Sverdrup Appr
- 31/6-5
- 16/1-2  Ivar Aasen Appr
- 31/3-3
- 35/9-2
- 25/7-2
- 35/6-2 S
- 33/9-17
- 25/3-1
- 35/11-7
- 25/10-10  Balder Triassic
- 29/3-1
- 31/5-4 S
- 31/6-8
- 31/2-1
- 32/2-1
- 29/6-1
- 35/11-10
- 16/4-1
- 34/8-1
- 25/8-7  Krap 1
- 34/10-21
- 25/11-15  Grane
- 16/10-2 Delta
- 3

[34m[1mwandb[0m: Agent Starting Run: bzyzfun2 with config:
[34m[1mwandb[0m: 	act_name: relu
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 25
[34m[1mwandb[0m: 	in_channels: 25
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	optimizer: AdamW


Traceback (most recent call last):
  File "/tmp/ipython-input-100-776563796.py", line 14, in train_autoencoder_sweep
    model = UNet1D(in_channels=sweep_cfg.in_channels, activation=sweep_cfg.act_name).to(device)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-97-1972233692.py", line 24, in __init__
    self.e1 = Block1D(32, 64, 2, a=activation)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Block1D.__init__() got an unexpected keyword argument 'a'


[34m[1mwandb[0m: [32m[41mERROR[0m Run bzyzfun2 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 302, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/tmp/ipython-input-100-776563796.py", line 14, in train_autoencoder_sweep
[34m[1mwandb[0m: [32m[41mERROR[0m     model = UNet1D(in_channels=sweep_cfg.in_channels, activation=sweep_cfg.act_name).to(device)
[34m[1mwandb[0m: [32m[41mERROR[0m             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/tmp/ipython-input-97-1972233692.py", line 24, in __init__
[34m[1mwandb[0m: [32m[41mERROR[0m     self.e1 = Block1D(32, 64, 2, a=activation)
[34m[1mwandb[0m: [32m[41mERROR[0m               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[

Traceback (most recent call last):
  File "/tmp/ipython-input-100-776563796.py", line 14, in train_autoencoder_sweep
    model = UNet1D(in_channels=sweep_cfg.in_channels, activation=sweep_cfg.act_name).to(device)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-97-1972233692.py", line 24, in __init__
    self.e1 = Block1D(32, 64, 2, a=activation)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Block1D.__init__() got an unexpected keyword argument 'a'


[34m[1mwandb[0m: [32m[41mERROR[0m Run 2duj75m4 errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 302, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/tmp/ipython-input-100-776563796.py", line 14, in train_autoencoder_sweep
[34m[1mwandb[0m: [32m[41mERROR[0m     model = UNet1D(in_channels=sweep_cfg.in_channels, activation=sweep_cfg.act_name).to(device)
[34m[1mwandb[0m: [32m[41mERROR[0m             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/tmp/ipython-input-97-1972233692.py", line 24, in __init__
[34m[1mwandb[0m: [32m[41mERROR[0m     self.e1 = Block1D(32, 64, 2, a=activation)
[34m[1mwandb[0m: [32m[41mERROR[0m               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[

Traceback (most recent call last):
  File "/tmp/ipython-input-100-776563796.py", line 14, in train_autoencoder_sweep
    model = UNet1D(in_channels=sweep_cfg.in_channels, activation=sweep_cfg.act_name).to(device)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-97-1972233692.py", line 24, in __init__
    self.e1 = Block1D(32, 64, 2, a=activation)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Block1D.__init__() got an unexpected keyword argument 'a'


[34m[1mwandb[0m: [32m[41mERROR[0m Run c5691rqs errored:
[34m[1mwandb[0m: [32m[41mERROR[0m Traceback (most recent call last):
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/usr/local/lib/python3.11/dist-packages/wandb/agents/pyagent.py", line 302, in _run_job
[34m[1mwandb[0m: [32m[41mERROR[0m     self._function()
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/tmp/ipython-input-100-776563796.py", line 14, in train_autoencoder_sweep
[34m[1mwandb[0m: [32m[41mERROR[0m     model = UNet1D(in_channels=sweep_cfg.in_channels, activation=sweep_cfg.act_name).to(device)
[34m[1mwandb[0m: [32m[41mERROR[0m             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[34m[1mwandb[0m: [32m[41mERROR[0m   File "/tmp/ipython-input-97-1972233692.py", line 24, in __init__
[34m[1mwandb[0m: [32m[41mERROR[0m     self.e1 = Block1D(32, 64, 2, a=activation)
[34m[1mwandb[0m: [32m[41mERROR[0m               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[


🏆 Sweep finished. Best pre-trained model saved to trained_models/autoencoder/best_autoencoder.pt

--- STAGE 1 COMPLETE ---


--- LAUNCHING PIPELINE 2: FINE-TUNING ---


--> W&B Run started. View at: https://wandb.ai/sahilpareek203-amrita-vishwa-vidyapeetham/W2W_Matcher_Pipeline_Notebook/runs/c5691rqs


Traceback (most recent call last):
  File "/tmp/ipython-input-103-3650830744.py", line 21, in <cell line: 0>
    run_finetuning(config)
  File "/tmp/ipython-input-101-2990093839.py", line 7, in run_finetuning
    model = W2WTransformerModel(config).to(device)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-97-1972233692.py", line 93, in __init__
    self.encoder = UNetEncoder1D(p['in_channels'], p['act_name'])
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-97-1972233692.py", line 65, in __init__
    self.e1 = Block1D(32, 64, 2, a=activation)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Block1D.__init__() got an unexpected keyword argument 'a'


TypeError: Block1D.__init__() got an unexpected keyword argument 'a'