## Setup

In [1]:
import os
import glob
import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/signal-fast-radio-burst-detection/sample_submission.csv
/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/B1933+16_59475_68410_reduced_fc_0001023_labels.csv
/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/B0531+21_58713_43190_reduced_fc_0001023_labels.csv
/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/J1022+1001_59503_29150_reduced_fc_0004611_labels.csv
/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/J0621+1002_59691_60070_reduced_fc_0003041_labels.csv
/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/B0531+21_2020-05-31-11_36_46_0020479_labels.csv
/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/B0531+21_58713_43190_reduced_fc_0004095_labels.csv
/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/B0531+21_58713_43190_reduced_fc_0029695_labels.csv
/kaggle/input/signal-fast-radio-b

In [3]:
RANDOM_SEED = 42
# torch.backends.cudnn.enabled = False
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x7e7683b82f50>

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = ["Times New Roman"] + plt.rcParams["font.serif"]
plt.rcParams["font.size"] = 16

In [6]:
FULL_SET_FEATURE_PATH = "/kaggle/input/signal-fast-radio-burst-detection/train/train/"
FULL_SET_LABEL_PATH = "/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/"
TEST_SET_FEATURE_PATH = "/kaggle/input/signal-fast-radio-burst-detection/test/test/"
ANSWER_SHEET_PATH = "/kaggle/input/signal-fast-radio-burst-detection/sample_submission.csv"

## Explore the Data

In [7]:
full_feature_file_paths = sorted(glob.glob(FULL_SET_FEATURE_PATH+ '/' + "*.npy"))
print("training set feature files: ", full_feature_file_paths[:5])
print("number of training set feature files: ", len(full_feature_file_paths))
full_label_file_paths = sorted(glob.glob(FULL_SET_LABEL_PATH+ '/' + "*.csv"))
print("training set label files: ", full_label_file_paths[:5])
print("number of training set label files: ", len(full_label_file_paths))

test_feature_file_paths = sorted(glob.glob(TEST_SET_FEATURE_PATH+ '/' + "*.npy"))
print("test set feature files: ", test_feature_file_paths[:5])
print("number of test set feature files: ", len(test_feature_file_paths))

training set feature files:  ['/kaggle/input/signal-fast-radio-burst-detection/train/train/B0531+21_2020-05-31-11_36_46_0001023.npy', '/kaggle/input/signal-fast-radio-burst-detection/train/train/B0531+21_2020-05-31-11_36_46_0002047.npy', '/kaggle/input/signal-fast-radio-burst-detection/train/train/B0531+21_2020-05-31-11_36_46_0003071.npy', '/kaggle/input/signal-fast-radio-burst-detection/train/train/B0531+21_2020-05-31-11_36_46_0004095.npy', '/kaggle/input/signal-fast-radio-burst-detection/train/train/B0531+21_2020-05-31-11_36_46_0005119.npy']
number of training set feature files:  100
training set label files:  ['/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/B0531+21_2020-05-31-11_36_46_0001023_labels.csv', '/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/B0531+21_2020-05-31-11_36_46_0002047_labels.csv', '/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train/B0531+21_2020-05-31-11_36_46_0003071_labels.c

In [8]:
sample_feature_file_path = full_feature_file_paths[0]
sample_feature = np.load(sample_feature_file_path)
print(sample_feature.shape)
print(sample_feature)

(262144, 256)
[[182.20207 175.16605 191.80408 ... 237.36859 239.19215 308.23114]
 [193.30156 186.56226 159.6628  ... 295.21173 247.04266 319.43466]
 [213.11223 184.44229 189.07645 ... 246.4867  231.5767  288.76303]
 ...
 [159.88312 174.55893 172.22739 ... 303.33044 285.6743  402.75882]
 [198.9685  184.34851 211.50859 ... 246.7513  253.63019 293.59723]
 [187.20697 187.51857 177.88315 ... 281.9099  314.80948 347.81857]]


In [9]:
sample_label_file_path = full_label_file_paths[0]
sample_labels = pd.read_csv(sample_label_file_path)
sample_labels

Unnamed: 0.1,Unnamed: 0,index,labels
0,0,0,
1,1,1,
2,2,2,
3,3,3,
4,4,4,
...,...,...,...
1019,1019,1019,
1020,1020,1020,
1021,1021,1021,
1022,1022,1022,


Fill in the missing value with the **string** "None". This will save us from headache later down the line especially when we have to deal with DataFrame indices.

In [10]:
sample_labels.fillna("None")

Unnamed: 0.1,Unnamed: 0,index,labels
0,0,0,
1,1,1,
2,2,2,
3,3,3,
4,4,4,
...,...,...,...
1019,1019,1019,
1020,1020,1020,
1021,1021,1021,
1022,1022,1022,


In [11]:
full_label_counts = []
for filename in full_label_file_paths:
    labels = pd.read_csv(filename, usecols=["labels"]).fillna("None")
    full_label_counts.append(labels.value_counts(dropna=False))
full_label_counts = pd.concat(full_label_counts).groupby(["labels"], dropna=False).sum()
full_label_counts

labels
Broad             3201
Broad+Pulse          9
Narrow            8582
Narrow+Broad       138
Narrow+Pulse        60
None             62988
Pulse             3847
Uncertain         5721
Unknown+Pulse       37
Unlabeled        11842
Name: count, dtype: int64

In [12]:
full_label_counts.sum()

96425

In [13]:
num_labels = 0
for label_file_path in full_label_file_paths:
    labels = pd.read_csv(label_file_path, usecols=["labels"]).fillna("None")
    num_labels += len(labels)
num_labels

96425

## Loading and Preprocessing the Data

### Normalizing the data and create a Dataset object for the full set

In [14]:
def normalize_snr(spectogram):
    med = np.median(spectogram, axis=0)
    std = np.std(spectogram, axis=0)
    normalized_spectogram = (spectogram - med) / std
    # normalized_spectogram[normalized_spectogram < 0] = 0
    return normalized_spectogram

In [15]:
NUM_CHANNELS = 256

In [16]:
from torch.utils.data import Dataset

class SpectogramDataset(Dataset):

    def __init__(self, feature_directory_path, label_directory_path, lazy=False):
        self.feature_directory_path = feature_directory_path
        self.label_directory_path = label_directory_path
        self.index_to_block = {}

        feature_file_paths = sorted(glob.glob(self.feature_directory_path + '/' + "*.npy"))

        idx = 0
        for feature_file_path in feature_file_paths:
            sequence_name = os.path.splitext(os.path.basename(feature_file_path))[0]
            feature = np.load(feature_file_path)
            num_bins = feature.shape[0]
            if num_bins % NUM_CHANNELS == 0:
                num_blocks = num_bins // NUM_CHANNELS
            else:
                num_blocks = num_bins // NUM_CHANNELS + 1
            
            label_file_path = os.path.join(self.label_directory_path, sequence_name + "_labels.csv")

            labels = None
            labeled = True
            try:
                labels = pd.read_csv(label_file_path, usecols=["labels"]).fillna("None")
            except FileNotFoundError:
                labeled = False

            for i in range(num_blocks):
                if labeled and labels is not None:
                    label = labels.iloc[i].loc["labels"]
                    label_vec = torch.tensor([0, 0, 0, 0])
                    if isinstance(label, str):
                        if "Pulse" in label:
                            label_vec[0] = 1
                        if "Broad" in label:
                            label_vec[1] = 1
                        if "Narrow" in label:
                            label_vec[2] = 1
                        if label == "Uncertain" or  label == "Unlabeled":
                            label_vec = torch.tensor([0, 0, 0, 1])
                    else:
                        label_vec = torch.tensor([0, 0, 0, 1])
                else:
                    label_vec = None

                if lazy:
                    self.index_to_block[idx] = (sequence_name, i, None, label_vec)
                else:
                    block = feature[i * NUM_CHANNELS: (i + 1) * NUM_CHANNELS, :]
                    spectogram = block.T
                    spectogram.resize((NUM_CHANNELS, NUM_CHANNELS), refcheck = False)
                    spectogram = normalize_snr(spectogram)
                    spectogram = torch.from_numpy(spectogram)
                    self.index_to_block[idx] = (sequence_name, i, spectogram, label_vec)
                idx += 1

    def __len__(self):
        return len(self.index_to_block)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        sequence_name, block_num, spectogram, label_vec = self.index_to_block[idx]
        if spectogram is None:
            feature_file_path = os.path.join(self.feature_directory_path, sequence_name + ".npy")
            features = np.load(feature_file_path)
            block = features[block_num * NUM_CHANNELS: (block_num + 1) * NUM_CHANNELS, :]
            spectogram = block.T
            spectogram.resize((NUM_CHANNELS, NUM_CHANNELS), refcheck = False)
            spectogram = normalize_snr(spectogram)
            spectogram = torch.from_numpy(spectogram)

        return spectogram.unsqueeze(0), label_vec.unsqueeze(0)

In [None]:
full_set = SpectogramDataset(FULL_SET_FEATURE_PATH, FULL_SET_LABEL_PATH)
full_set

Let's verify that the length of the full set matches the number of labels.

In [None]:
print(len(full_set))

Also, check if the full set just created has correct random access behaviour (e.g. has correct outputs shape)

In [None]:
print(full_set[0])
print(full_set[0][0].shape)
print(full_set[0][1].shape)

Let's also redefine the spectogram plotting function so that it can accept `torch.tensor` object as its input.

In [None]:
def plot_spectogram(spectogram, title_string=None, vmin=None, vmax=None):
    if isinstance(spectogram, torch.Tensor):
        spectogram = spectogram.squeeze().detach().cpu().numpy()

    fig = plt.figure(figsize=(10,10))
    gs = fig.add_gridspec(
        2, 2, width_ratios=(4, 1), height_ratios=(1, 4),
        left=0.1, right=0.9, bottom=0.1, top=0.93,
        wspace=0, hspace=0
    )

    num_channels = spectogram.shape[0]
    num_bins = spectogram.shape[1]
    # dynamic_spectrum = np.transpose(X_demo)
    ax1 = fig.add_subplot(gs[1, 0])
    heatmap = ax1.imshow(
        spectogram,
        aspect="auto",
        origin="upper",
        interpolation="none",
        cmap="gray",
        vmin=vmin,
        vmax=vmax
    )
    ax1.set_xlabel("Time (ms)")
    ax1.set_ylabel("Channel")
    ax1.set_ylim(num_channels - 1, 0)
    ax1.set_xlim(0, num_bins - 1)

    lightcurve = np.sum(spectogram, axis=0)
    powerspectrum = np.sum(spectogram, axis=1)
    ax2 = fig.add_subplot(gs[1, 1])
    freq_bin = np.linspace(0, len(powerspectrum), len(powerspectrum))
    plt.plot(powerspectrum, freq_bin)
    ax2.set_yticklabels([])
    ax2.set_xticklabels([])
    ax2.set_xlim(0, 1.2 * np.max(powerspectrum))
    ax2.set_ylim(ax1.get_ylim())
    ax2.set_xlabel("SED")

    ax3 = fig.add_subplot(gs[0, 0])
    ax3.plot(lightcurve)
    ax3.set_xlim(ax1.get_xlim())
    ax3.set_xticklabels([])
    ax3.set_yticklabels([])
    # ax3.set_ylim(0, 1.2 * np.max(lightcurve))
    ax3.set_ylabel(r"Intensity")
    if title_string is not None:
        ax3.set_title(title_string)

    fig.colorbar(heatmap, ax=ax2, orientation="vertical", fraction = .5)
    plt.show()

By manually inspecting the label, I know that there is a pulse during block 535 of the sequence 
B0531+21_2020-05-31-11_36_46_0001023, so let's see

In [None]:
plot_spectogram(full_set[535][0], title_string="Spectogram")
print(full_set[535][1])

### Create a sampler

#### Split the full set into training and validation set

In [None]:
TRAIN_PROPORTION = 0.8
VAL_PROPORTION = 0.2

In [None]:
from torch.utils.data import random_split

rng = torch.Generator().manual_seed(42)
train_set, val_set = random_split(
    full_set, 
     [TRAIN_PROPORTION, VAL_PROPORTION], 
    generator=rng
)
print(train_set, len(train_set))
print(val_set, len(val_set))

#### Get the sampling weight for our training and validation set

First let's inspect the count of each class in the full set again.

In [None]:
full_label_counts

We are going to obtain a similar label counts from both the training and validation set we just created, except this time each class will be represented as `torch.tensor`.

In [None]:
train_label_counts = {}

for i in range(len(train_set)):
    # Convert the tensor to a tuple directly, ensuring it's hashable
    key = tuple(train_set[i][1].squeeze().tolist())  
    try:
        train_label_counts[key] += 1
    except KeyError:
        train_label_counts[key] = 1  # Initialize with 1 for the first occurrence

# train_label_counts
train_label_counts = pd.Series(train_label_counts)
train_label_counts.index.name = "labels"
train_label_counts.name = "count"
train_label_counts

In [None]:
val_label_counts = {}

for i in range(len(val_set)):
    # Convert the tensor to a tuple directly, ensuring it's hashable
    key = tuple(val_set[i][1].squeeze().tolist())  
    try:
        val_label_counts[key] += 1
    except KeyError:
        val_label_counts[key] = 1  # Initialize with 1 for the first occurrence

# val_label_counts
val_label_counts = pd.Series(val_label_counts)
val_label_counts.index.name = "labels"
val_label_counts.name = "count"
val_label_counts

Now, let's calculate the sampling weight for each class. The goal is to oversample the underrepresented classes and undersample the overrepresented classes. 

For "Uncertain" and "Unlabeled" we can set the sampling weight to 0 because we are not going to use them in training.

In [None]:
train_sampling_weights = []
for i in range(len(train_set)):
    label = train_set[i][1]
    key = tuple(train_set[i][1].squeeze().tolist())
    if key == (0, 0, 0, 1):
        train_sampling_weights.append(0)
    else:
        train_sampling_weights.append(1/train_label_counts[key])
print(train_sampling_weights[:5], len(train_sampling_weights))

In [None]:
val_sampling_weights = []
for i in range(len(val_set)):
    label = val_set[i][1]
    key = tuple(val_set[i][1].squeeze().tolist())
    if key == (0, 0, 0, 1):
        val_sampling_weights.append(0)
    else:
        val_sampling_weights.append(1/val_label_counts[key])
print(val_sampling_weights[:5], len(val_sampling_weights))

Lastly, create a WeightedRandomSampler object for the training and validation set.

In [None]:
from torch.utils.data.sampler import WeightedRandomSampler

train_sampler = WeightedRandomSampler(train_sampling_weights, len(train_sampling_weights))
val_sampler = WeightedRandomSampler(val_sampling_weights, len(val_sampling_weights))

## Preparing a Model for Training

In [None]:
class SpectrogramClassifierVGG16(nn.Module):
    def __init__(self, num_classes):
        super(SpectrogramClassifierVGG16, self).__init__()
        
        # Load pre-trained VGG16 model
        self.vgg = models.vgg16(pretrained=True)
        
        # Modify first conv layer to accept grayscale (1 channel instead of 3)
        # self.vgg.features[0] = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
        
        # Modify classifier for multilabel classification
        self.vgg.classifier[-1] = nn.Linear(4096, 2048)

        # Define multi-linear layers with ReLU
        self.classifier = nn.Sequential(
            nn.ReLU(),
            nn.Linear(2048, 1024),  # First linear layer
            nn.ReLU(),                            # ReLU activation
            nn.Linear(1024, 512),                 # Second linear layer
            nn.ReLU(),                            # ReLU activation
            nn.Linear(512, num_classes)           # Output layer
        )

        for param in self.vgg.features[:7].parameters():
            param.requires_grad = False

    def forward(self, x):
        return self.classifier(self.vgg(x))  # No sigmoid (handled by BCEWithLogitsLoss)

In [None]:
NUM_CLASSES = 3
LEARNING_RATE = 1e-4

In [None]:
model = SpectrogramClassifierVGG16(NUM_CLASSES).to(device)
criterion = nn.BCEWithLogitsLoss()  # Replaces BCELoss
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)

In [None]:
from torchmetrics.classification import MultilabelF1Score

# Torchmetrics F1 Score (Multilabel)
f1_metric = MultilabelF1Score(num_labels=NUM_CLASSES, average="macro").to(device)

### Create a DataLoader object

In [None]:
MINI_BATCH_SIZE = 64

In [None]:
train_data_loader = torch.utils.data.DataLoader(
    train_set,
    batch_size=MINI_BATCH_SIZE,
    sampler = train_sampler,
)
train_data_loader

Let's also verify that the DataLoader just created has correct data collation (batching) behaviour.

In [None]:
for sample in train_data_loader:
    feature = sample[0]
    label = sample[1]
    print(feature.shape)
    print(label.shape)
    break

In [None]:
label.squeeze(1)[:, :3].shape

Do the same thing with validation set.

In [None]:
val_data_loader = torch.utils.data.DataLoader(
    val_set,
    batch_size=MINI_BATCH_SIZE,
    sampler = val_sampler,
)
val_data_loader

## Training

In [None]:
NUM_EPOCHS = 10

In [None]:
!mkdir -p "/kaggle/working/results"

### Train the Model

In [None]:
best_f1 = 0
best_epoch = -1

for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = 0.0
    f1_metric.reset()

    for feature_tensor, label_tensor in train_data_loader:
        mini_batch_size = feature_tensor.size(0)
        feature_tensor, label_tensor = feature_tensor.to(device), label_tensor.to(device)

        optimizer.zero_grad()
        # To use pretrained weight of popular CNN architecture such as VGG or ResNet, the input 
        # tensor is required to have 3 channels (RGB), so we stack the input tensor three times along 
        # the first dimension
        feature_tensor_3_channels = torch.cat(
            [feature_tensor, feature_tensor, feature_tensor], 
            dim=1
        )
        outputs = model(feature_tensor_3_channels)  # Raw logits (no sigmoid)
        loss = criterion(outputs, label_tensor.squeeze(1)[:, :3].float())  # BCEWithLogitsLoss applies sigmoid internally

        loss.backward()
        optimizer.step()

        train_loss += loss.item() * mini_batch_size

         # Compute F1 Score (need to apply sigmoid before thresholding)
        preds = torch.sigmoid(outputs) > 0.5  # Convert to binary labels
        f1_metric.update(preds, label_tensor.squeeze(1)[:, :3].int())  # labels should be int

    # Compute epoch-level loss and F1-score
    train_loss_epoch_end = train_loss / len(train_data_loader.dataset)
    train_f1_epoch_end = f1_metric.compute().item()
    # train_f1_micro = f1_micro.compute().item()
    # train_f1_macro = f1_macro.compute().item()
    # epoch_loss = running_loss / len(train_data_loader)
    # epoch_f1 = f1_metric.compute().item()  # Compute final F1 score for epoch

    # print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] - Loss: {train_loss_epoch_end:.4f} - F1 Score: {train_f1_epoch_end:.4f}")

    model.eval()
    val_loss = 0.0
    f1_metric.reset()

    with torch.no_grad():
        for feature_tensor, label_tensor in val_data_loader:
            mini_batch_size = feature_tensor.size(0)
            feature_tensor, label_tensor = feature_tensor.to(device), label_tensor.to(device)

            # To use pretrained weight of popular CNN architecture such as VGG or ResNet, the input 
            # tensor is required to have 3 channels (RGB), so we stack the input tensor three times along 
            # the first dimension
            feature_tensor_3_channels = torch.cat(
                [feature_tensor, feature_tensor, feature_tensor], 
                dim=1
            )
            outputs = model(feature_tensor_3_channels)  # Raw logits (no sigmoid)
            loss = criterion(outputs, label_tensor.squeeze(1)[:, :3].float())  # BCEWithLogitsLoss applies sigmoid internally

            val_loss += loss.item() * mini_batch_size

            preds = torch.sigmoid(outputs) > 0.5
            f1_metric.update(preds, label_tensor.squeeze(1)[:, :3].int())  # labels should be int
        
        val_loss_epoch_end = val_loss / len(val_data_loader.dataset)
        val_f1_epoch_end = f1_metric.compute().item()

    # Print epoch results
    print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}:")
    print(f"       Train Loss: {train_loss:.4f} | Train F1 (Macro): {train_f1_epoch_end:.4f}")
    print(f"Validation Loss: {val_loss:.4f} | Validation F1 (Macro): {val_f1_epoch_end:.4f}\n")
    # torch.save(model.state_dict(), f"/kaggle/working/results/model_epoch_{epoch+1}.pth")
    # torch.save(optimizer.state_dict(), f"/kaggle/working/results/optim_epoch_{epoch+1}.pth")

    if val_f1_epoch_end > best_f1:
        best_f1 = val_f1_epoch_end
        best_epoch = epoch + 1
        torch.save(model.state_dict(), "/kaggle/working/results/model.pth")
        torch.save(optimizer.state_dict(), "/kaggle/working/results/optimizer.pth")

## Inference

### Load the trained model

In [None]:
model = SpectrogramClassifierVGG16(NUM_CLASSES).to(device)
model.load_state_dict(torch.load("/kaggle/working/results/model.pth"))
model.eval()

### Loading the answer sheet

In [None]:
answer_sheet = pd.read_csv(ANSWER_SHEET_PATH, index_col="id")
answer_sheet

In [None]:
with torch.no_grad():
    for test_feature_file_path in test_feature_file_paths:
        sequence_name = os.path.splitext(os.path.basename(test_feature_file_path))[0]
        feature = np.load(test_feature_file_path)
        num_bins = feature.shape[0]
        # if num_bins % NUM_CHANNELS == 0:
        #     num_blocks = num_bins // NUM_CHANNELS
        # else:
        #     num_blocks = num_bins // NUM_CHANNELS + 1
        # for block_num in range(0, num_blocks):
        for block_start_index in range(0, num_bins, NUM_CHANNELS):
            block_num = block_start_index // NUM_CHANNELS
            block = feature[block_start_index:block_start_index + NUM_CHANNELS, :]
            spectogram = block.T
            spectogram.resize((NUM_CHANNELS, NUM_CHANNELS), refcheck = False)
            spectogram = normalize_snr(spectogram)
            spectogram = torch.from_numpy(spectogram)
            feature_tensor = spectogram.unsqueeze(0).unsqueeze(0)
            feature_tensor = feature_tensor.to(device)
            feature_tensor_3_channels = torch.cat(
                [feature_tensor, feature_tensor, feature_tensor], 
                dim=1
            )
            outputs = model(feature_tensor_3_channels)  # Raw logits (no sigmoid)
            preds = torch.sigmoid(outputs) > 0.5
            preds = preds.int()

            answer = preds.cpu().detach().numpy().flatten()
            answer_sheet.loc[f"{sequence_name}_{block_num}", ["pulse", "broad", "narrow"]] = answer
            # print(f"{sequence_name}_{block_num}", answer)

answer_sheet

In [None]:
answer_sheet = answer_sheet.fillna(0)
answer_sheet = answer_sheet.astype({"pulse": int, "broad": int, "narrow": int})
try:
    answer_sheet = answer_sheet.drop("Usage", axis = 1)
except KeyError:
    pass
answer_sheet

In [None]:
!mkdir -p "/kaggle/working/answers"
answer_sheet.to_csv("/kaggle/working/answers/answer_sheet.csv")