In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install elasticdeform

# Task: MV Segmentation

### Helper functions

In [None]:
def load_zipped_pickle(filename):
    with gzip.open(filename, 'rb') as f:
        loaded_object = pickle.load(f)
        return loaded_object

In [None]:
def save_zipped_pickle(obj, filename):
    with gzip.open(filename, 'wb') as f:
        pickle.dump(obj, f, 2)

In [None]:
def resize_frames(video, masks, target_size=(128, 128)):
    """
    Resize video frames and corresponding masks to a given target size.
    """
    resized_video = [cv2.resize(frame, target_size, interpolation=cv2.INTER_CUBIC) for frame in video]
    resized_masks = [(cv2.resize(m.astype(np.uint8), target_size, interpolation=cv2.INTER_CUBIC)).astype(bool) for m in masks]
    return resized_video, resized_masks

In [None]:
def extract_labeled_frames(train_data):
    """
    Extract labeled frames from the training data.
    """
    train_videos = []
    train_labels = []
    for data in train_data:
        frames = data['frames']
        video_frames = np.transpose(data['video'], (2,0,1))[frames]
        label_frames = np.transpose(data['label'], (2,0,1))[frames]

        train_videos.append(video_frames)
        train_labels.append(label_frames)
    return train_videos, train_labels

In [None]:
def augment_data(train_videos, train_labels, target_size=(128, 128), num_augmentations=25):
    """
    Resize and augment the training data directly using resize_frames and stack_transforms.

    """
    augmented_videos = []
    augmented_labels = []

    for video, labels in zip(train_videos, train_labels):
        # Resize original video and labels
        resized_video, resized_label = resize_frames(video, labels, target_size)

        # Add original resized set
        augmented_videos.append(resized_video)
        augmented_labels.append(resized_label)

        # Generate augmented samples
        for _ in range(num_augmentations):
            aug_vid_frames = []
            aug_lab_frames = []
            for v_frame, l_frame in zip(resized_video, resized_label):
                transformed_video, transformed_mask = stack_transforms(v_frame, l_frame)
                aug_vid_frames.append(transformed_video)
                aug_lab_frames.append(transformed_mask)
            augmented_videos.append(aug_vid_frames)
            augmented_labels.append(aug_lab_frames)

    return augmented_videos, augmented_labels


In [None]:
def normalize_and_stack(augmented_videos, augmented_labels):
    """
    Normalize video frames and stack them into arrays suitable for model training.
    """
    X, y = [], []
    for vid, lab in zip(augmented_videos, augmented_labels):
        for v_frame, l_frame in zip(vid, lab):
            # Normalize frame to [0,255]
            v_frame_norm = cv2.normalize(v_frame, None, 0, 255, cv2.NORM_MINMAX)
            X.append(np.expand_dims(v_frame_norm, 0))  # (1,H,W)
            y.append(np.expand_dims(l_frame, 0))       # (1,H,W)

    X = np.stack(X, 0) #(N, 1, H, W)
    y = np.stack(y, 0) #(N, 1, H, W)
    return X, y


In [None]:
def elastic_deformation_transform(video, mask):
    """Apply elastic deformation to a single frame and mask."""
    displacement = np.ones((2,128,128)) * np.random.uniform(-10, 10)
    deformed_video = elasticdeform.deform_grid(video, displacement=displacement, mode='constant', order=1)
    deformed_mask = elasticdeform.deform_grid(mask, displacement=displacement, mode='constant', order=1)
    return deformed_video, deformed_mask

def rotation_transform(video, mask):
    """Apply rotation to a single frame and mask."""
    displacement = np.zeros((2,128,128))
    angle = np.random.uniform(-30, 30)
    rotated_video = elasticdeform.deform_grid(video, displacement=displacement, mode='constant', order=1, rotate=angle)
    rotated_mask = elasticdeform.deform_grid(mask, displacement=displacement, mode='constant', order=1, rotate=angle)
    return rotated_video, rotated_mask

def zoom_transform(video, mask):
    """Apply zoom to a single frame and mask."""
    displacement = np.zeros((2,128,128))
    zoom_factor = np.random.uniform(0.8, 1.2)
    zoomed_video = elasticdeform.deform_grid(video, displacement=displacement, mode='constant', order=1, zoom=zoom_factor)
    zoomed_mask = elasticdeform.deform_grid(mask, displacement=displacement, mode='constant', order=1, zoom=zoom_factor)
    return zoomed_video, zoomed_mask

def stack_transforms(video, mask):
    """
    Apply a stack of transformations (elastic, rotation, zoom) sequentially to a frame and mask.
    """
    deformed_video, deformed_mask = elastic_deformation_transform(video, mask)
    rotated_video, rotated_mask = rotation_transform(deformed_video, deformed_mask)
    zoomed_video, zoomed_mask = zoom_transform(rotated_video, rotated_mask)
    return zoomed_video, zoomed_mask

### Load, Preprocess and Split Data

In [None]:
# load data
train_data = load_zipped_pickle("/content/drive/MyDrive/train.pkl")


In [None]:
#Preprocess
train_videos, train_labels = extract_labeled_frames(train_data)
augmented_videos, augmented_labels = augment_data(train_videos, train_labels, target_size=(128, 128), num_augmentations=25)
X, y = normalize_and_stack(augmented_videos, augmented_labels)

In [None]:
# Train/Validation Split
test_size=0.2
batch_size = 64

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=8, shuffle=False)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

### Define Model

In [None]:
# Model Definition

class UNet(nn.Module):
    def __init__(self, in_channels=1, out_channels=1):
        super(UNet, self).__init__()

        def conv_block(ic, oc):
            return nn.Sequential(
                nn.Conv2d(ic, oc, kernel_size=3, padding=1),
                nn.BatchNorm2d(oc),
                nn.ReLU(inplace=True),
                nn.Conv2d(oc, oc, kernel_size=3, padding=1),
                nn.BatchNorm2d(oc),
                nn.ReLU(inplace=True),
            )

        def upconv_block(ic, oc):
            return nn.Sequential(
                nn.ConvTranspose2d(ic, oc, kernel_size=2, stride=2),
                nn.BatchNorm2d(oc),
                nn.ReLU(inplace=True),
            )

        # Contracting path
        self.conv1 = conv_block(in_channels, 64)
        self.conv2 = conv_block(64, 128)
        self.conv3 = conv_block(128, 256)
        self.conv4 = conv_block(256, 512)

        # Bottleneck
        self.bottleneck = conv_block(512, 1024)

        # Expansive path
        self.upconv4 = upconv_block(1024, 512)
        self.conv5 = conv_block(1024, 512)
        self.upconv3 = upconv_block(512, 256)
        self.conv6 = conv_block(512, 256)
        self.upconv2 = upconv_block(256, 128)
        self.conv7 = conv_block(256, 128)
        self.upconv1 = upconv_block(128, 64)
        self.conv8 = conv_block(128, 64)

        # Output
        self.outconv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        # Contracting path
        c1 = self.conv1(x)
        c2 = self.conv2(F.max_pool2d(c1, 2))
        c3 = self.conv3(F.max_pool2d(c2, 2))
        c4 = self.conv4(F.max_pool2d(c3, 2))

        # Bottleneck
        bn = self.bottleneck(F.max_pool2d(c4, 2))

        # Expansive path
        u4 = self.upconv4(bn)
        c5 = self.conv5(torch.cat([c4, u4], dim=1))
        u3 = self.upconv3(c5)
        c6 = self.conv6(torch.cat([c3, u3], dim=1))
        u2 = self.upconv2(c6)
        c7 = self.conv7(torch.cat([c2, u2], dim=1))
        u1 = self.upconv1(c7)
        c8 = self.conv8(torch.cat([c1, u1], dim=1))

        out = torch.sigmoid(self.outconv(c8))
        return out


In [None]:
#Define Loss
def jaccard_similarity(y_pred, y_true):
    intersection = torch.sum(y_true * y_pred)
    union = torch.sum(y_true) + torch.sum(y_pred) - intersection
    return intersection / union

def jaccard_loss(y_pred, y_true):
    return 1-jaccard_similarity(y_true, y_pred)

### Train

In [None]:
#
criterion = jaccard_loss
optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 40

In [None]:
# Model Training
for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        
        outputs = model(inputs)
        
        loss = criterion(outputs, targets) 
        loss.backward()
        
        optimizer.step()

    # Validation
    model.eval()
    val_losses = []
    with torch.no_grad():
        for inputs_val, targets_val in val_loader:
            inputs_val, targets_val = inputs_val.to(device), targets_val.to(device)
            
            val_outputs = model(inputs_val)
            
            val_loss = criterion(val_outputs, targets_val)#**
            val_losses.append(val_loss.item())

    avg_val_loss = np.mean(val_losses)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {avg_val_loss:.4f}')

    # Save model if performance criterion is met
    if avg_val_loss <= 0.65:
        torch.save(model.state_dict(), f"/content/drive/MyDrive/models/unet_epoch_{epoch + 1}}.pt")

### Load best model

In [None]:
# Load the best model checkpoint if required
model.load_state_dict(torch.load('/content/drive/MyDrive/models/val_model_checkpoint_epoch_19.pt'))
model.eval()

### Load and preprocess Test data

In [None]:
test_batch_size=64

In [None]:
#load
test_data = load_zipped_pickle("/content/drive/MyDrive/test.pkl")

#Preprocess
test_frames = []
for data in test_data:
    video = np.transpose(data['video'], (2,0,1))  # (F, H, W)
    for frame in video:
        # Resize
        resized_frame = cv2.resize(frame, target_size, interpolation=cv2.INTER_CUBIC)
        # Normalize
        resized_frame = cv2.normalize(resized_frame, None, 0, 255, cv2.NORM_MINMAX)
        # Add channel dimension
        test_frames.append(np.expand_dims(resized_frame, 0))  # (1,H,W)

X_test = np.stack(test_frames, 0)  # (N,1,H,W)

test_dataset = TensorDataset(X_test)
test_loader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False)

### Predict

In [None]:
threshhold= 0.6

In [None]:
predictions = []
with torch.no_grad():
    for batch in test_loader:
        inputs = batch[0].to(device)
        preds = model(inputs)
        for p in preds:
            # Threshold
            mask = (p[0] >= threshhold).cpu().numpy()
            predictions.append(mask)

### Post-Process

In [None]:

def apply_morphological_closing(predictions):
    """
    Apply morphological closing to a list of boolean predictions.
    """
    post_processed = []
    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))

    for p in predictions:
        # Convert boolean mask to uint8 for OpenCV
        p_uint8 = (p.astype(np.uint8) * 255)
        # Apply morphological closing
        closed_mask = cv2.morphologyEx(p_uint8, cv2.MORPH_CLOSE, kernel)
        # Convert back to boolean
        closed_bool = (closed_mask > 0)
        post_processed.append(closed_bool)

    return post_processed





In [None]:
def resize_predictions_to_original(post_processed_predictions, test_data):
    """
    Resize the post-processed predictions back to the original video resolutions.

    """
    final = []
    current_idx = 0
    for data in test_data:
        original_video = data['video']
        h, w, num_frames = original_video.shape

        for f in range(num_frames):
            pred_mask = post_processed_predictions[current_idx].astype(np.uint8)
            pred_resized = cv2.resize(pred_mask, (w, h), interpolation=cv2.INTER_AREA).astype(bool)
            final.append(pred_resized)
            current_idx += 1

    return final

In [None]:
#post process
post_processed_predictions = apply_morphological_closing(predictions)
resized_post_processed_predictions = resize_predictions_to_original(post_processed_predictions, test_data)

### Save prediction in a CSV file

In [None]:

def get_sequences(arr):
    """
    Extract sequences of contiguous 1s in a binary flattened array.
    Returns start indices and lengths of each sequence of ones.
    """
    arr = arr.astype(int)
    first_indices = []
    last_indices = []
    extended_arr = np.concatenate(([0], arr, [0]))
    
    for i in range(len(extended_arr) - 1):
        # A run starts when we encounter a transition from 0 to 1
        if extended_arr[i] == 0 and extended_arr[i+1] == 1:
            first_indices.append(i)
        # A run ends when we encounter a transition from 1 to 0
        if extended_arr[i] == 1 and extended_arr[i+1] == 0:
            last_indices.append(i)

    lengths = [l - f for f, l in zip(first_indices, last_indices)]
    return first_indices, lengths


def create_submission_from_final(final_masks, test_data, output_file="submission.csv"):
    """
    Create the submission file from the final predicted masks.
    """
    ids, values = [], []
    count = 0
    current_idx = 0

    # Process each video at once
    for data in test_data:
        video_name = data['name']
        h, w, f = data['video'].shape  # original video shape (H, W, frames)

        # Gather all frames for this video
        video_masks = final_masks[current_idx:current_idx + f]
        current_idx += f

        # Stack them to form a 3D mask of shape (H, W, F)
        video_mask_3d = np.stack(video_masks, axis=-1)  # (H, W, F)

        # Flatten the 3D mask
        flat_mask = video_mask_3d.flatten().astype(int)

        # Find runs of consecutive 1s in the flattened mask
        start_indices, lengths = get_sequences(flat_mask)

        # Add each run to the CSV rows
        for start_idx, length in zip(start_indices, lengths):
            count += 1
            unique_id = f"{video_name}_{count}"
            value_str = f"[{start_idx}, {length}]"
            ids.append(unique_id)
            values.append(value_str)

    # Create and save submission dataframe
    df = pd.DataFrame({"id": ids, "value": values})
    df.to_csv(output_file, index=False)
    print(f"Submission file saved to {output_file}")


In [None]:
# Create and save submission
create_submission_from_final(resized_post_processed_predictions, test_data, output_file="/content/drive/MyDrive/submission_files/submission_rightformat.csv")


# *******************************************************************************

### Unused methods