In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.utils import plot_model
from scipy.signal import welch, lfilter, butter

In [2]:
X_test_ligo_events = np.load('/kaggle/input/ligotestset/X_test_ligo.npy', allow_pickle=True)
y_test_ligo_events = np.load('/kaggle/input/ligotestset/y_test_ligo.npy', allow_pickle=True)

In [3]:
noise_indices = np.where(y_test_ligo_events == 'Noise')
noise_strain_values = X_test_ligo_events[noise_indices]

In [4]:
def whiten_data_segment(segment, fs):
    f, Pxx = welch(segment, fs=fs, nperseg=len(segment)//2) 

    nyquist = 0.5 * fs
    lowcut = 1.0 
    highcut = 1000.0  
    lowcut_normalized = lowcut / nyquist
    highcut_normalized = highcut / nyquist

    b, a = butter(4, [lowcut_normalized, highcut_normalized], btype='band')

    whitened_segment = lfilter(b, a, segment)

    return whitened_segment

def lowpass_filter(data, fs, cutoff):
    nyquist = 0.5 * fs
    normal_cutoff = cutoff / nyquist
    b, a = butter(4, normal_cutoff, btype='low', analog=False)
    filtered_data = lfilter(b, a, data)
    return filtered_data

fs = 2048

lowpass_cutoff = 100.0  

filtered_data_segments = []
for segment in noise_strain_values:
    whitened_segment = whiten_data_segment(segment, fs)
    
    filtered_segment = lowpass_filter(whitened_segment, fs, lowpass_cutoff)
    
    filtered_data_segments.append(filtered_segment)

filtered_data = np.array(filtered_data_segments)


In [5]:
X_test_ligo_events[noise_indices] = filtered_data

In [6]:
data_info = np.load('/kaggle/input/segmentlabels/segmentlabels.npy', allow_pickle=True)
file_paths = [item[0] for item in data_info]
labels = [item[1] for item in data_info]

In [7]:
label_to_index = {label: index for index, label in enumerate(np.unique(labels))}
y_test_ligo_events_encoded = np.array([label_to_index[label] for label in y_test_ligo_events])

num_classes = len(label_to_index)
y_test_ligo_events_encoded = to_categorical(y_test_ligo_events_encoded, num_classes=num_classes)

In [8]:
X_test_ligo_events = pad_sequences(X_test_ligo_events, dtype='float32', padding='post', value=0.0)

In [9]:
X_test_ligo_events = np.vstack(X_test_ligo_events)

In [10]:
X_test_ligo_events_tensor = tf.convert_to_tensor(X_test_ligo_events, dtype=tf.float32)
y_test_ligo_events_encoded = tf.convert_to_tensor(y_test_ligo_events_encoded, dtype=tf.float32)

In [11]:
signal_length = 2048

In [12]:
X_test_ligo_events_reshaped = X_test_ligo_events.reshape(-1, signal_length, 1)

In [13]:
X_train = np.load('/kaggle/input/trainvalidationtest/Train Val Test/X_train.npy', allow_pickle=True)
y_train = np.load('/kaggle/input/trainvalidationtest/Train Val Test/y_train.npy', allow_pickle=True)
X_val = np.load('/kaggle/input/trainvalidationtest/Train Val Test/X_val.npy', allow_pickle=True)
y_val = np.load('/kaggle/input/trainvalidationtest/Train Val Test/y_val.npy', allow_pickle=True)
X_test = np.load('/kaggle/input/trainvalidationtest/Train Val Test/X_test.npy', allow_pickle=True)
y_test = np.load('/kaggle/input/trainvalidationtest/Train Val Test/y_test.npy', allow_pickle=True)

In [14]:
data_info = np.load('/kaggle/input/alllabels/labels.npy', allow_pickle=True)
file_paths = [item[0] for item in data_info]
labels = [item[1] for item in data_info]

In [15]:
label_to_index = {label: index for index, label in enumerate(np.unique(labels))}
y_train_encoded = np.array([label_to_index[label] for label in y_train])
y_val_encoded = np.array([label_to_index[label] for label in y_val])
y_test_encoded = np.array([label_to_index[label] for label in y_test])

num_classes = len(label_to_index)
y_train_encoded = to_categorical(y_train_encoded, num_classes=num_classes)
y_val_encoded = to_categorical(y_val_encoded, num_classes=num_classes)
y_test_encoded = to_categorical(y_test_encoded, num_classes=num_classes)

In [16]:
X_train = pad_sequences(X_train, dtype='float32', padding='post', value=0.0)
X_val = pad_sequences(X_val, dtype='float32', padding='post', value=0.0)
X_test = pad_sequences(X_test, dtype='float32', padding='post', value=0.0)

In [17]:
X_train = np.vstack(X_train)
X_val = np.vstack(X_val)
X_test = np.vstack(X_test)

In [18]:
X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
X_val_tensor = tf.convert_to_tensor(X_val, dtype=tf.float32)
X_test_tensor = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_train_encoded_tensor = tf.convert_to_tensor(y_train_encoded, dtype=tf.float32)
y_val_encoded_tensor = tf.convert_to_tensor(y_val_encoded, dtype=tf.float32)
y_test_encoded_tensor = tf.convert_to_tensor(y_test_encoded, dtype=tf.float32)

In [19]:
signal_length = 2048

In [20]:
X_train_reshaped = X_train.reshape(-1, signal_length, 1)
X_val_reshaped = X_val.reshape(-1, signal_length, 1)
X_test_reshaped = X_test.reshape(-1, signal_length, 1)

In [21]:
mean = np.mean(X_train_reshaped)
std = np.std(X_train_reshaped)
real_ligo_mean = np.mean(X_test_ligo_events_reshaped)
real_ligo_std = np.std(X_test_ligo_events_reshaped)

X_test_ligo_events_scaled = np.zeros_like(X_test_ligo_events_reshaped)

positions_label_0 = np.where(y_test_ligo_events_encoded == 0)[0]

for i in positions_label_0:
    X_test_ligo_events_scaled[i] = X_test_ligo_events_reshaped[i]

positions_label_1 = np.where(y_test_ligo_events_encoded == 1)[0]

for i in positions_label_1:
    scaled_sample = (X_test_ligo_events_reshaped[i] - mean) / std
    X_test_ligo_events_scaled[i] = scaled_sample

positions_label_2 = np.where(y_test_ligo_events_encoded == 2)[0]
positions_label_3 = np.where(y_test_ligo_events_encoded == 3)[0]

for i in positions_label_2:
    scaled_sample = (X_test_ligo_events_reshaped[i] - mean) / std
    X_test_ligo_events_scaled[i] = scaled_sample

for i in positions_label_3:
    scaled_sample = (X_test_ligo_events_reshaped[i] - mean) / std
    X_test_ligo_events_scaled[i] = scaled_sample

In [22]:
test_data = X_test_ligo_events_scaled

In [23]:
print(mean)
print(std)
print(real_ligo_mean)
print(real_ligo_std)

-5.121349e-24
1.7853791e-19
-1.2127532e-23
1.6037192e-20


In [24]:
print(len(test_data))

1632


In [25]:
np.save('test_data1.npy', test_data)