In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# Model added : physio-seg-public

In [3]:
# Kaggle paths
DATA_DIR = r"/kaggle/input/physionet-ecg-image-digitization/"
MODEL_DIR = r"/kaggle/input/physio-seg-public/pytorch/net3_009_4200/1"
TEST_IMG_DIR = os.path.join(DATA_DIR, r"test")
TEST_CSV = os.path.join(DATA_DIR, r"test.csv")
SAMPLE_SUB = os.path.join(DATA_DIR, r"sample_submission.parquet")
OUT_SUB = r"/kaggle/working/submission.csv"

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cpu


# Model - Minimal UNet Blocks

In [5]:
class ConvBNReLU(nn.Module):
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.conv(x)

In [6]:
class DecoderBlock(nn.Module):
    def __init__(self, in_ch, skip_ch, out_ch):
        super().__init__()
        self.conv1 = ConvBNReLU(in_ch + skip_ch, out_ch)
        self.conv2 = ConvBNReLU(out_ch, out_ch)

    def forward(self, x, skip):
        x = F.interpolate(x, scale_factor=2, mode="nearest")
        x = torch.cat([x, skip], dim=1)
        x = self.conv1(x)
        x = self.conv2(x)
        return x

# ResNet34 Encoder (Torchvision-Free)

In [7]:
class Encoder(nn.Module):
    def __init__(self):
        super().__init__()
        from torchvision.models import resnet34
        m = resnet34(weights=None)

        self.layer0 = nn.Sequential(m.conv1, m.bn1, m.relu)
        self.layer1 = nn.Sequential(m.maxpool, m.layer1)
        self.layer2 = m.layer2
        self.layer3 = m.layer3
        self.layer4 = m.layer4

    def forward(self, x):
        x0 = self.layer0(x)
        x1 = self.layer1(x0)
        x2 = self.layer2(x1)
        x3 = self.layer3(x2)
        x4 = self.layer4(x3)
        return x0, x1, x2, x3, x4

# Full UNet Model (Matches physio-seg)

In [8]:
class ECGUNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.encoder = Encoder()

        self.decoder4 = DecoderBlock(512, 256, 256)
        self.decoder3 = DecoderBlock(256, 128, 128)
        self.decoder2 = DecoderBlock(128, 64, 64)
        self.decoder1 = DecoderBlock(64, 64, 16)
        
        self.head     = nn.Conv2d(16, 4, kernel_size=1)


    def forward(self, x):
        x0, x1, x2, x3, x4 = self.encoder(x)
        d4 = self.decoder4(x4, x3)
        d3 = self.decoder3(d4, x2)
        d2 = self.decoder2(d3, x1)
        d1 = self.decoder1(d2, x0)
        return self.head(d1)

# Load physio-seg Weights

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ECGUNet().to(device)

ckpt = torch.load(
    "/kaggle/input/physio-seg-public/pytorch/net3_009_4200/1/iter_0004200.pt",
    map_location=device
)

state = ckpt["state_dict"] if "state_dict" in ckpt else ckpt

clean = {}
for k, v in state.items():
    if "num_batches_tracked" in k:
        continue
    clean[k.replace("decoder.block.", "decoder.")\
            .replace("pixel.", "head.")] = v

model.load_state_dict(clean, strict=False)
model.eval()

print("Model loaded successfully")

Model loaded successfully


# Image Preprocessing

In [10]:
def load_image(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (1024, 512))
    img = img.astype(np.float32) / 255.0

    # replicate grayscale → RGB
    img = np.stack([img, img, img], axis=0)
    return torch.from_numpy(img).unsqueeze(0)

# Inference + Submission

In [11]:
@torch.no_grad()
def predict_mask(img_path):
    x = load_image(img_path).to(device)
    y = model(x)                # [1, 4, H, W]
    y = torch.sigmoid(y[:, 0])  # ECG trace channel
    return y.squeeze().cpu().numpy()

In [12]:
IMAGE_IDS = sorted(os.listdir(TEST_IMG_DIR))

records = []

for name in tqdm(IMAGE_IDS):
    img_path = os.path.join(TEST_IMG_DIR, name)

    mask = predict_mask(img_path)

    # simple Day-1 threshold
    binary = (mask > 0.5).astype(np.uint8)

    # flatten for submission (example format)
    rle = binary.flatten().tolist()

    records.append({
        "image_id": name,
        "prediction": " ".join(map(str, rle))
    })

100%|██████████| 2/2 [00:51<00:00, 25.84s/it]


In [13]:
df = pd.DataFrame(records)
df.to_csv(OUT_SUB, index=False)
print("submission.csv saved")

submission.csv saved


In [14]:
df.head()

Unnamed: 0,image_id,prediction
0,1053922973.png,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
1,2352854581.png,0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
