In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import pickle
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

In [2]:
with open('data/train_expert.pkl', 'rb') as f:
    train_expert = pickle.load(f)
with open('data/train_amateur.pkl', 'rb') as f:
    train_amateur = pickle.load(f)
with open('data/val_expert.pkl', 'rb') as f:
    val_expert = pickle.load(f)
with open('data/val_amateur.pkl', 'rb') as f:
    val_amateur = pickle.load(f)

In [3]:
class UNet(nn.Module):
    def __init__(self, in_channels=1, out_channels=1):
        super(UNet, self).__init__()

        def conv_block(in_ch, out_ch):
            return nn.Sequential(
                nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
                nn.ReLU(inplace=True)
            )

        self.encoder1 = conv_block(in_channels, 64)
        self.encoder2 = conv_block(64, 128)
        self.encoder3 = conv_block(128, 256)
        self.encoder4 = conv_block(256, 512)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.bottleneck = conv_block(512, 1024)

        self.upconv4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.decoder4 = conv_block(1024, 512)
        self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.decoder3 = conv_block(512, 256)
        self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.decoder2 = conv_block(256, 128)
        self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.decoder1 = conv_block(128, 64)

        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        # Encoder
        e1 = self.encoder1(x)
        e2 = self.encoder2(self.pool(e1))
        e3 = self.encoder3(self.pool(e2))
        e4 = self.encoder4(self.pool(e3))

        # Bottleneck
        b = self.bottleneck(self.pool(e4))

        # Decoder
        d4 = self.upconv4(b)
        d4 = torch.cat((e4, d4), dim=1)
        d4 = self.decoder4(d4)

        d3 = self.upconv3(d4)
        d3 = torch.cat((e3, d3), dim=1)
        d3 = self.decoder3(d3)

        d2 = self.upconv2(d3)
        d2 = torch.cat((e2, d2), dim=1)
        d2 = self.decoder2(d2)

        d1 = self.upconv1(d2)
        d1 = torch.cat((e1, d1), dim=1)
        d1 = self.decoder1(d1)

        # Final output
        out = self.final_conv(d1)
        return out

In [4]:
class UNet(nn.Module):
    def __init__(self, in_channels=1, out_channels=1):
        super(UNet, self).__init__()

        def conv_block(in_ch, out_ch):
            return nn.Sequential(
                nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
                #nn.BatchNorm2d(out_ch),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1),
                #nn.BatchNorm2d(out_ch),
                nn.ReLU(inplace=True)
            )

        self.encoder1 = conv_block(in_channels, 64)
        self.encoder2 = conv_block(64, 128)
        self.encoder3 = conv_block(128, 256)
        self.encoder4 = conv_block(256, 512)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.bottleneck = conv_block(512, 1024)
        self.bottleneck2 = conv_block(1024, 1024)
        self.bottleneck3 = conv_block(1024, 1024)

        self.upconv4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
        self.decoder4 = conv_block(1024, 512)
        self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.decoder3 = conv_block(512, 256)
        self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.decoder2 = conv_block(256, 128)
        self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.decoder1 = conv_block(128, 64)

        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        # Encoder
        e1 = self.encoder1(x)
        e2 = self.encoder2(self.pool(e1))
        e3 = self.encoder3(self.pool(e2))
        e4 = self.encoder4(self.pool(e3))

        # Bottleneck
        b = self.bottleneck(self.pool(e4))
        b2 = self.bottleneck2(b)
        b3 = self.bottleneck3(b2)

        # Decoder
        d4 = self.upconv4(b2)
        d4 = torch.cat((e4, d4), dim=1)
        d4 = self.decoder4(d4)

        d3 = self.upconv3(d4)
        d3 = torch.cat((e3, d3), dim=1)
        d3 = self.decoder3(d3)

        d2 = self.upconv2(d3)
        d2 = torch.cat((e2, d2), dim=1)
        d2 = self.decoder2(d2)

        d1 = self.upconv1(d2)
        d1 = torch.cat((e1, d1), dim=1)
        d1 = self.decoder1(d1)

        # Final output
        out = self.final_conv(d1)
        return out


In [5]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, logits, targets):
        probs = torch.sigmoid(logits)
        targets = targets.float()

        ce_loss = nn.BCEWithLogitsLoss(reduction='none')(logits, targets)
        p_t = probs * targets + (1 - probs) * (1 - targets)
        focal_weight = self.alpha * (1 - p_t) ** self.gamma

        loss = focal_weight * ce_loss
        return loss.mean()

In [6]:
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms.functional import resize
class MitralDataset(Dataset):
    def __init__(self, data, target_size=(512, 512)):
        self.data = data
        self.target_size = target_size

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        frame = self.data[idx]['frame']
        label = self.data[idx]['label']
        box = self.data[idx]['box']

        frame_tensor = torch.tensor(frame, dtype=torch.float32).unsqueeze(0)
        label_tensor = torch.tensor(label, dtype=torch.float32).unsqueeze(0)
        box_tensor = torch.tensor(box, dtype=torch.float32).unsqueeze(0)

        frame_tensor = resize(frame_tensor, self.target_size)
        label_tensor = resize(label_tensor, self.target_size)
        box_tensor = resize(box_tensor, self.target_size)


        return frame_tensor, label_tensor, box_tensor


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_expert_dataset = MitralDataset(train_expert)
val_expert_dataset = MitralDataset(val_expert)

train_loader = DataLoader(train_expert_dataset, batch_size=6, shuffle=True)
val_loader = DataLoader(val_expert_dataset, batch_size=6, shuffle=False)

In [8]:
model = UNet()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
criterion = FocalLoss(alpha=0.25, gamma=2.0)
epochs = 30


In [9]:
def calculate_iou(predictions, labels):
    predictions = predictions.bool()
    labels = labels.bool()

    intersection = (predictions & labels).sum(dim=(1, 2, 3))
    union = (predictions | labels).sum(dim=(1, 2, 3))

    iou = intersection / (union + 1e-8)
    return iou.mean().item()

In [None]:
model.to(device)
for epoch in tqdm(range(epochs)):
    model.train()
    epoch_loss = 0.0
    for images, labels, _ in train_loader:
        images, labels= images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / len(train_loader):.4f}")

    # Evaluate IoU on validation set
    model.eval()
    total_iou = 0.0
    with torch.no_grad():
        for val_images, val_labels,_ in val_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)
            output = model(val_images)
            val_predictions = (torch.sigmoid(output) > 0.5).float()
            total_iou += calculate_iou(val_predictions, val_labels)
    avg_iou = total_iou / len(val_loader)
    print(f"Epoch [{epoch + 1}/{epochs}], Validation IoU: {avg_iou:.4f}")

  0%|                                                                           | 0/30 [00:00<?, ?it/s]

Epoch [1/30], Loss: 0.0228


  3%|██                                                             | 1/30 [17:44<8:34:33, 1064.60s/it]

Epoch [1/30], Validation IoU: 0.0000


In [11]:
import gzip
with gzip.open('data/test.pkl', 'rb') as f:
    test = pickle.load(f)

In [12]:
# def get_sequences(arr):
#     first_indices, last_indices, lengths = [], [], []
#     n, i = len(arr), 0
#     arr = [0] + list(arr) + [0]
#     for index, value in enumerate(arr[:-1]):
#         if arr[index+1]-arr[index] == 1:
#             first_indices.append(index)
#         if arr[index+1]-arr[index] == -1:
#             last_indices.append(index)
#     lengths = list(np.array(last_indices)-np.array(first_indices))
#     return first_indices, lengths

In [13]:
def get_sequences(arr):
    arr = np.concatenate(([0], arr, [0]))
    changes = np.diff(arr)
    starts = np.where(changes == 1)[0]
    ends = np.where(changes == -1)[0]
    lengths = ends - starts
    return starts, lengths

In [14]:
class TestDataset(Dataset):
    def __init__(self, test_data):
        self.test_data = test_data

    def __len__(self):
        return len(self.test_data)

    def __getitem__(self, idx):
        video_name = self.test_data[idx]['name']
        video_frames = self.test_data[idx]['video']

        frames_tensor = torch.tensor(video_frames, dtype=torch.float32).permute(2, 0, 1)

        return video_name, frames_tensor

test_dataset = TestDataset(test)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [15]:
import cv2
from torchvision.transforms.functional import resize

model.eval()
submission_data = []
threshold = 0.5
with torch.no_grad():
    for video_name, frames_tensor in test_loader:
        video_name = video_name[0]
        frames_tensor = frames_tensor.squeeze(0).to(device)  # [num_frames, H, W]
        num_frames, original_height, original_width = frames_tensor.shape
        target_size = (512, 512)

        labels_tensor = torch.zeros((num_frames, original_height, original_width), dtype=torch.uint8)

        for frame_idx in range(num_frames):

            frame = frames_tensor[frame_idx].unsqueeze(0).unsqueeze(0)  # [1, 1, H, W]
            resized_frame = resize(frame, target_size).to(device)      # [1, 1, 512, 512]


            output = model(resized_frame)  # [1, 1, 512, 512]
            predicted_mask = torch.sigmoid(output).squeeze().cpu().numpy()  # [512, 512]

            restored_mask = cv2.resize(predicted_mask, (original_width, original_height), interpolation=cv2.INTER_NEAREST)

            labels_tensor[frame_idx] = torch.from_numpy((restored_mask > threshold).astype(np.uint8))


        labels_tensor = labels_tensor.permute(1, 2, 0)  # [H, W, num_frames]

        flattened_labels = labels_tensor.flatten()
        start_indices, lengths = get_sequences(flattened_labels)

        record_counter = 0
        for start_idx, length in zip(start_indices, lengths):
            submission_data.append({
                "id": f"{video_name}_{record_counter}",
                "value": f"[{start_idx}, {length}]"
            })
            record_counter += 1

In [16]:
print(submission_data[:10])

[{'id': 'E9AHVWGBUF_0', 'value': '[9100158, 1]'}, {'id': 'E9AHVWGBUF_1', 'value': '[9100261, 1]'}, {'id': 'E9AHVWGBUF_2', 'value': '[9100364, 1]'}, {'id': 'E9AHVWGBUF_3', 'value': '[9100467, 1]'}, {'id': 'E9AHVWGBUF_4', 'value': '[9100570, 1]'}, {'id': 'E9AHVWGBUF_5', 'value': '[9184721, 1]'}, {'id': 'E9AHVWGBUF_6', 'value': '[9184824, 1]'}, {'id': 'E9AHVWGBUF_7', 'value': '[9184927, 1]'}, {'id': 'E9AHVWGBUF_8', 'value': '[9185030, 1]'}, {'id': 'E9AHVWGBUF_9', 'value': '[9185133, 1]'}]


In [17]:
print(len(submission_data))

932132


In [18]:
import pandas as pd

df = pd.DataFrame(submission_data)
df.to_csv("predictions2.csv", index=False)