In [12]:
import pickle
import gzip
import numpy as np
import os

### Helper functions

In [13]:
def load_zipped_pickle(filename):
    with gzip.open(filename, 'rb') as f:
        loaded_object = pickle.load(f)
        return loaded_object

In [14]:
def save_zipped_pickle(obj, filename):
    with gzip.open(filename, 'wb') as f:
        pickle.dump(obj, f, 2)

### Load data, make predictions and save prediction in correct format

In [15]:
# load data
train_data = load_zipped_pickle("data/train.pkl")
test_data = load_zipped_pickle("data/test.pkl")

KeyboardInterrupt: 

In [None]:
expert_train_data = [entry for entry in train_data if entry.get("dataset") == "expert"]

In [None]:
# print(train_data[0].keys())
# print(train_data[50]['dataset'])
# print(train_data[50]['video'].shape)
# print(train_data[5]['dataset'])
# print(train_data[5]['video'].shape)
# for i in test_data:
#     print(i['video'].shape)
print(len(test_data[0]['video']))

586


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split
import cv2

# Constants
# right now just resize down to 128,128. 
# For expertly labeled videos, this is lower res., for amateur videos they have 
# around the same resolution
INPUT_SHAPE = (128, 128, 1)
BATCH_SIZE = 8

class DataGenerator(Sequence):
    def __init__(self, data, input_shape=INPUT_SHAPE, batch_size=BATCH_SIZE):
        self.data = data
        self.input_shape = input_shape
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.data) / self.batch_size))

    def __getitem__(self, idx):
        batch_data = self.data[idx * self.batch_size:(idx + 1) * self.batch_size]
        X, y = [], []

        for entry in batch_data:
            video = entry['video']
            label = entry['label']
            frames = entry['frames']

            for frame_idx in frames:
                frame = video[:,:,frame_idx]  # get annotated frame
                label_frame = label[:,:,frame_idx]  # get label for frame

                label_frame = np.asarray(label_frame, dtype=np.float32)
                # resize frame and label to the input shape
                resized_frame = cv2.resize(frame, self.input_shape[:2])
                resized_label = cv2.resize(label_frame, self.input_shape[:2])

                # store frame and corresponding label
                X.append(np.expand_dims(resized_frame, axis=-1))
                y.append(np.expand_dims(resized_label, axis=-1))

        return np.array(X), np.array(y)

def build_unet(input_shape=INPUT_SHAPE):
    inputs = layers.Input(shape=input_shape)

    # Encoder
    c1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
    c1 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c1)
    p1 = layers.MaxPooling2D((2, 2))(c1)

    c2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(p1)
    c2 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c2)
    p2 = layers.MaxPooling2D((2, 2))(c2)

    # Bottleneck
    c3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(p2)
    c3 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(c3)

    # Decoder
    u4 = layers.UpSampling2D((2, 2))(c3)
    u4 = layers.Conv2D(32, (2, 2), activation='relu', padding='same')(u4)
    c4 = layers.Concatenate()([u4, c2])
    c4 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c4)
    c4 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(c4)

    u5 = layers.UpSampling2D((2, 2))(c4)
    u5 = layers.Conv2D(16, (2, 2), activation='relu', padding='same')(u5)
    c5 = layers.Concatenate()([u5, c1])
    c5 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c5)
    c5 = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(c5)

    outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(c5)

    model = models.Model(inputs, outputs)
    return model

train_split, val_split = train_test_split(train_data, test_size=0.2, random_state=42)

# prepare data
train_gen = DataGenerator(train_split)
val_gen = DataGenerator(val_split)

# build model
model = build_unet()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# train 
history = model.fit(train_gen, validation_data=val_gen, epochs=10)

2024-12-13 15:44:50.465559: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-13 15:44:50.549773: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-12-13 15:44:50.627655: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1734101090.698920   49734 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734101090.716754   49734 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-13 15:44:50.885184: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 1s/step - accuracy: 0.4674 - loss: 1.2236 - val_accuracy: 0.9889 - val_loss: 0.3605
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.9876 - loss: 0.3702 - val_accuracy: 0.9889 - val_loss: 0.2844
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.9879 - loss: 0.2741 - val_accuracy: 0.9889 - val_loss: 0.2006
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.9876 - loss: 0.1840 - val_accuracy: 0.9889 - val_loss: 0.1143
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.9891 - loss: 0.0946 - val_accuracy: 0.9889 - val_loss: 0.0633
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - accuracy: 0.9875 - loss: 0.0596 - val_accuracy: 0.9889 - val_loss: 0.0462
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [None]:
jaccard_indices = []

for i in range(len(val_gen)):
    X_val, y_val = val_gen[i]
    predictions = model.predict(X_val)

    predictions = (predictions > 0.5).astype(np.float32)
    y_val = (y_val > 0.5).astype(np.float32)

    for pred, true in zip(predictions, y_val):
        intersection = np.sum(pred * true)
        union = np.sum(pred + true) - intersection
        jaccard = intersection / union if union != 0 else 0
        jaccard_indices.append(jaccard)

average_jaccard_index = np.mean(jaccard_indices)
print(f"avg jaccard index on val set: {average_jaccard_index}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 333ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 273ms/step
avg jaccard index on val set: 0.06868281215429306


In [24]:
def get_sequences(arr):
    first_indices, last_indices, lengths = [], [], []
    arr = [0] + list(arr) + [0]
    for index, value in enumerate(arr[:-1]):
        if (arr[index+1] == 1) and (arr[index] == 0):
            first_indices.append(index+1)
        if (arr[index+1] == 0) and (arr[index] == 1):
            last_indices.append(index)
    lengths = list(np.array(last_indices)-np.array(first_indices))
    return [f'[{first_indices[i]}, {lengths[i]}]' for i in range(len(lengths))]

def flatten_mask_to_rle(mask):
    flat_mask = mask.flatten()
    return get_sequences(flat_mask)

In [25]:
ids = []
values = []

for entry in test_data:
    video_name = entry['name']
    video = entry['video']
    original_height, original_width = video.shape[:2]  # original dimensions
    full_frames = []
    for i in range(video.shape[2]):
        frame = video[:, :, i]
        frame_resized = cv2.resize(frame, INPUT_SHAPE[:2])
        frame_resized = np.expand_dims(frame_resized, axis=(0, -1))

        pred = model.predict(frame_resized)
        pred_binary = (pred > 0.5).astype(np.uint8)[0, :, :, 0]
        # just resize back up to orignal dim from (128,128). Not ideal but temporary approach
        pred_original_size = cv2.resize(pred_binary, (original_width, original_height), interpolation=cv2.INTER_NEAREST)
        full_frames.append(pred_original_size)

    unique_id_counter = 0
    mask = np.stack(full_frames, axis=-1)
    rle_segments = flatten_mask_to_rle(mask)
    for rle in rle_segments:
        ids.append(f"{video_name}_{unique_id_counter}")
        values.append(rle)
        unique_id_counter += 1

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35

In [26]:
import pandas as pd

# the sample.csv file has like 666000 rows, so if the length of ids is much less, something is probably off
print(len(ids))
submission_df = pd.DataFrame({"id": ids, "value": values})
submission_df.to_csv("predictions/prediction.csv", index=False)

69244


In [27]:
submission_df

Unnamed: 0,id,value
0,E9AHVWGBUF_0,"[43677, 0]"
1,E9AHVWGBUF_1,"[43683, 0]"
2,E9AHVWGBUF_2,"[43685, 1]"
3,E9AHVWGBUF_3,"[43693, 1]"
4,E9AHVWGBUF_4,"[43696, 1]"
...,...,...
69239,UB7LFQKZT5_1982,"[23975165, 0]"
69240,UB7LFQKZT5_1983,"[23975227, 0]"
69241,UB7LFQKZT5_1984,"[23975289, 0]"
69242,UB7LFQKZT5_1985,"[23975351, 0]"
