In [None]:
#original train code
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm
import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

data_train_dir = '/kaggle/input/signal-fast-radio-burst-detection/train/train'
data_labels_dir = '/kaggle/input/signal-fast-radio-burst-detection/train-labels-corrected/train'

def load_large_npy(file_path):
    return np.memmap(file_path, dtype=np.float32, mode='r')

label_mapping = {
    "None": [0, 0, 0],
    "Pulse": [1, 0, 0],
    "Broad": [0, 1, 0],
    "Narrow": [0, 0, 1],
    "Broad+Pulse": [1, 1, 0],
    "Narrow+Pulse": [1, 0, 1],
    "Narrow+Broad": [0, 1, 1],
    "Pulse+Broad+Narrow": [1, 1, 1]
}

signals = []
labels = []

for filename in tqdm(os.listdir(data_labels_dir)[0:10] + os.listdir(data_labels_dir)[35:70]):
    filename_noext = filename[:-11]

    labels_csv = pd.read_csv(os.path.join(data_labels_dir, filename))

    # data_signals = load_large_npy(os.path.join(data_train_dir, f'{filename_noext}.npy'))

    data_signals = np.load(os.path.join(data_train_dir, f'{filename_noext}.npy'))

    chunk_size = 256
    num_chunks = len(data_signals) // chunk_size
    index_label = 0

    for i in range(num_chunks):
        start_ = i * chunk_size
        end_ = start_ + chunk_size
        chunk_signal = data_signals[start_:end_].astype(np.float32)

        label_row = labels_csv[labels_csv['index'] == index_label]

        label_ = label_row['labels'].values[0]

        if pd.isna(label_):
            label_ = "None"

        # print(label_)

        if label_ not in label_mapping:
            index_label += 1
            continue
        # print('processed: ' ,label_mapping[label_])
        # print(chunk_signal.shape)
        signals.append(chunk_signal)
        labels.append(label_mapping[label_])

        index_label += 1

class SignalDataset(Dataset):
    def __init__(self, signals, labels):
        self.signals = torch.tensor(np.array(signals), dtype=torch.float32)
        self.labels = torch.tensor(np.array(labels), dtype=torch.float32)

    def __len__(self):
        return len(self.signals)

    def __getitem__(self, idx):
        # เพิ่มมิติ channel ให้เป็น (1, 256, 256) สำหรับ Conv2D
        signal = self.signals[idx].unsqueeze(0)
        label = self.labels[idx]
        return signal, label


dataset = SignalDataset(signals, labels)
signal, label = dataset[0]
# คำนวณ class weights จาก labels
labels_tensor = torch.tensor(labels)  # แปลงเป็น Tensor
class_indices = torch.argmax(labels_tensor, dim=1)  # แปลง one-hot → class index
class_counts = torch.bincount(class_indices)  # นับจำนวนแต่ละ class
class_weights = 1.0 / class_counts.float()  # คำนวณ weight ของแต่ละ class
sample_weights = class_weights[class_indices]  # กำหนด weight ให้แต่ละ sample

# สร้าง WeightedRandomSampler
sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

# ใช้กับ DataLoader
dataloader = DataLoader(dataset, batch_size=8, sampler=sampler ,pin_memory=True)

# ทดสอบว่ามันสุ่มอะไรออกมา
for i, (signal, label) in enumerate(dataloader):
    print(f"Batch {i}: Signal Shape: {signal.shape}, Label: {label.numpy()}")
    if i == 2:  # แสดงแค่ 3 batch
        break


  0%|          | 0/45 [00:00<?, ?it/s]

Batch 0: Signal Shape: torch.Size([8, 1, 256, 256]), Label: [[0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 0.]
 [0. 0. 1.]]
Batch 1: Signal Shape: torch.Size([8, 1, 256, 256]), Label: [[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]]
Batch 2: Signal Shape: torch.Size([8, 1, 256, 256]), Label: [[0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]]


In [None]:
dataloader

<torch.utils.data.dataloader.DataLoader at 0x7ac0eda47dc0>

In [None]:
# normal CNN
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

# สร้างโมเดล CNN
class SignalCNN(nn.Module):
    def __init__(self):
        super(SignalCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 32 * 32, 128)  # Flatten ข้อมูล
        self.fc2 = nn.Linear(128, 3)  # 3 class output (ใช้ softmax ภายหลัง)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)  # (B, 16, 128, 128)

        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)  # (B, 32, 64, 64)

        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)  # (B, 64, 32, 32)

        x = torch.flatten(x, start_dim=1)  # Flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)  # ไม่มี softmax เพราะใช้ CrossEntropyLoss
        return x

# กำหนด Loss Function และ Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SignalCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# เทรนโมเดล
num_epochs = 10

for epoch in tqdm(range(num_epochs)):
    model.train()
    running_loss = 0.0

    for signals, labels in tqdm(dataloader):
        signals, labels = signals.to(device), labels.to(device)

        # เปลี่ยน labels จาก One-Hot → Class Index
        labels = torch.argmax(labels, dim=1)

        optimizer.zero_grad()
        outputs = model(signals)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader):.4f}")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm

# Load ResNet-50 model and modify it for your task
class ResNet50Modified(nn.Module):
    def __init__(self, num_classes=3):
        super(ResNet50Modified, self).__init__()

        # Load pre-trained ResNet-50
        self.resnet50 = models.resnet50(pretrained=True)

        # Modify the first convolution layer to accept 1 channel instead of 3
        self.resnet50.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

        # Modify the fully connected layer to match the number of classes
        self.resnet50.fc = nn.Linear(self.resnet50.fc.in_features, num_classes)

    def forward(self, x):
        return self.resnet50(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create the model
model = ResNet50Modified(num_classes=3).to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 5

for epoch in tqdm(range(num_epochs)):
    model.train()
    running_loss = 0.0

    for signals, labels in tqdm(dataloader):
        signals, labels = signals.to(device), labels.to(device)

        # Convert one-hot labels to class indices
        labels = torch.argmax(labels, dim=1)

        optimizer.zero_grad()
        outputs = model(signals)
        loss = criterion(outputs, labels)
        loss.backward()fl
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader):.4f}")




  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/4161 [00:00<?, ?it/s]

Epoch [1/5], Loss: 0.4227


  0%|          | 0/4161 [00:00<?, ?it/s]

Epoch [2/5], Loss: 0.1746


  0%|          | 0/4161 [00:00<?, ?it/s]

Epoch [3/5], Loss: 0.1341


  0%|          | 0/4161 [00:00<?, ?it/s]

Epoch [4/5], Loss: 0.1111


  0%|          | 0/4161 [00:00<?, ?it/s]

Epoch [5/5], Loss: 0.0856


In [None]:
# เซฟโมเดลที่เรียนรู้แล้ว
torch.save(model.state_dict(), "signal_cnn.pth")
print("Model saved as signal_cnn.pth ✅")

Model saved as signal_cnn.pth ✅


In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

with torch.no_grad():
    sample_signal = np.load('/kaggle/input/signal-fast-radio-burst-detection/train/train/B0531+21_2020-05-31-11_36_46_0001023.npy')[:256]

    # แปลงเป็น Tensor และย้ายไป GPU
    sample_signal = torch.tensor(sample_signal, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)

    # ส่งเข้าโมเดล
    model = model.to(device)  # ย้ายโมเดลไป GPU
    output = model(sample_signal)
    print(output)

    predicted_class = torch.argmax(output, dim=1).item()
    print(f"Predicted Class: {predicted_class}")


In [None]:
import pandas as pd
import torch
import numpy as np
import os
from tqdm.notebook import tqdm  # Ensure you have tqdm imported for the progress bars

# Assuming the model and device have been set up already
class_labels = ["Pulse", "Broad", "Narrow"]
test_dir = '/kaggle/input/signal-fast-radio-burst-detection/test/test'

# Initialize an empty dictionary to store predictions
predictions_dict = {}

# Loop through files in the directory
for filename in tqdm(os.listdir(test_dir)):
    if filename.endswith('.npy'):  # Only process .npy files
        file_path = os.path.join(test_dir, filename)

        # Load the signal data
        data_signals = np.load(file_path)

        # Process the file in chunks of 256
        chunk_size = 256
        num_chunks = len(data_signals) // chunk_size

        # Pad data if necessary
        if num_chunks * chunk_size != len(data_signals):
            pad_length = chunk_size - (len(data_signals) % chunk_size)
            data_signals = np.pad(data_signals, ((0, pad_length), (0, 0)), mode='constant', constant_values=0)
            num_chunks = len(data_signals) // chunk_size

        # Loop through each chunk and predict
        for i in tqdm(range(num_chunks)):
            start_idx = i * chunk_size
            end_idx = start_idx + chunk_size
            chunk_signal = data_signals[start_idx:end_idx]

            # Reshape for the model input
            signals_tensor = torch.tensor(chunk_signal, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device)  # Shape (1, 1, 256, 256)

            # Predict using the model
            with torch.no_grad():
                outputs = model(signals_tensor)

            # Convert logits to probabilities using sigmoid
            probabilities = torch.sigmoid(outputs)  # Use sigmoid for multi-label classification

            # Apply threshold of 0.5 to classify each label
            predicted_labels = (probabilities > 0.5).int()  # Binary classification for each class

            # Create an entry for the chunk in the dictionary
            chunk_key = f"{filename[:-4]}_{i}"  # Use the filename (without extension) and chunk number as key
            predictions_dict[chunk_key] = {
                "pulse": predicted_labels[0, 0].item(),
                "broad": predicted_labels[0, 1].item(),
                "narrow": predicted_labels[0, 2].item()
            }

# Convert the dictionary to a DataFrame
df = pd.DataFrame.from_dict(predictions_dict, orient='index')

# Add 'id' column with the 'index' value
df['id'] = df.index

# Reorder columns so that 'id' comes first
df = df[['id', 'pulse', 'broad', 'narrow']]

# Reset the index for a clean structure
df = df.reset_index(drop=True)

# Display the DataFrame
print(df)


  0%|          | 0/33 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/132 [00:00<?, ?it/s]

  0%|          | 0/68 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/452 [00:00<?, ?it/s]

  0%|          | 0/503 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/403 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/999 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/501 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

  0%|          | 0/1024 [00:00<?, ?it/s]

            id  pulse  broad  narrow
0         23_0      1      0       0
1         23_1      1      0       0
2         23_2      1      0       0
3         23_3      0      0       0
4         23_4      1      0       0
...        ...    ...    ...     ...
28103  16_1019      1      0       0
28104  16_1020      1      0       0
28105  16_1021      1      0       0
28106  16_1022      1      0       0
28107  16_1023      1      0       0

[28108 rows x 4 columns]


In [None]:
df['numeric_id'] = df['id'].str.split('_').str[0].astype(int)
df['chunk_id'] = df['id'].str.split('_').str[1].astype(int)

# Sort by numeric part and then by chunk number
df_sorted = df.sort_values(by=['numeric_id', 'chunk_id'])

# Reorder columns so that 'id' is first
df_sorted = df_sorted[['id', 'pulse', 'broad', 'narrow']]

# Reset the index for a clean structure
df_sorted = df_sorted.reset_index(drop=True)

In [None]:
df_sorted

Unnamed: 0,id,pulse,broad,narrow
0,0_0,1,0,0
1,0_1,1,0,0
2,0_2,1,0,0
3,0_3,1,0,0
4,0_4,1,0,0
...,...,...,...,...
28103,32_398,1,0,0
28104,32_399,1,0,0
28105,32_400,1,0,0
28106,32_401,1,0,0


In [None]:
# Save the sorted DataFrame to a CSV file
df_sorted.to_csv('/kaggle/working/sorted_predictions.csv', index=False)
