In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, Input
from tensorflow.keras.utils import to_categorical

# Function to load and preprocess a single CSV file
def load_and_preprocess_csv(file_path):
    data = pd.read_csv(file_path)
    data['count'] = data.groupby('slice_index')['count'].transform(lambda x: x / x.sum())
    pivot_data = data.pivot(index='slice_index', columns='bin_start', values='count').fillna(0)
    return pivot_data

# Function to pad arrays to the same shape
def pad_array(array, max_shape):
    pad_width = [(0, max_shape[i] - array.shape[i]) for i in range(len(array.shape))]
    padded_array = np.pad(array, pad_width, mode='constant', constant_values=0)
    return padded_array

# Function to load and preprocess a pair of positive and negative CSV files
def load_signal_pair(pos_file, neg_file):
    pos_data = load_and_preprocess_csv(pos_file)
    neg_data = load_and_preprocess_csv(neg_file)
    combined_data = np.concatenate((pos_data, neg_data), axis=1)
    return combined_data

# Function to load data from a directory
def load_data_from_directory(directory):
    files = sorted([f for f in os.listdir(directory) if f.endswith('.csv')])
    
    signals = []
    labels = []
    max_shape = (0, 0)

    # First pass to determine the maximum shape
    for file in files:
        if 'positive' in file:
            pos_file = os.path.join(directory, file)
            neg_file = os.path.join(directory, file.replace('positive', 'negative'))
            if os.path.exists(neg_file):
                combined_data = load_signal_pair(pos_file, neg_file)
                max_shape = (max(max_shape[0], combined_data.shape[0]), max(max_shape[1], combined_data.shape[1]))

    # Save max_shape
    np.save('max_shape.npy', max_shape)

    # Second pass to pad arrays to the maximum shape
    for file in files:
        if 'positive' in file:
            pos_file = os.path.join(directory, file)
            neg_file = os.path.join(directory, file.replace('positive', 'negative'))
            if os.path.exists(neg_file):
                combined_data = load_signal_pair(pos_file, neg_file)
                padded_data = pad_array(combined_data, max_shape)
                signals.append(padded_data)
                labels.append(1 if 'Glitch_volumes' in directory else 0)
    
    return np.array(signals), np.array(labels)

# Load your data
merger_directory = '/home/arutkeerthi/Downloads/Glitchveto/Merger_volumes/1200MPC-2-Noisy'
glitch_directory = '/home/arutkeerthi/Downloads/Glitchveto/Glitch_volumes'

X_merger, y_merger = load_data_from_directory(merger_directory)
X_glitch, y_glitch = load_data_from_directory(glitch_directory)

# Combine merger and glitch data
X = np.concatenate((X_merger, X_glitch), axis=0)
y = np.concatenate((y_merger, y_glitch), axis=0)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert labels to categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

# Define the CNN model
input_shape = (X_train.shape[1], X_train.shape[2])
inputs = Input(shape=input_shape)

x = Conv1D(filters=32, kernel_size=3, activation='relu')(inputs)
x = MaxPooling1D(pool_size=2)(x)
x = Dropout(0.25)(x)

x = Conv1D(filters=64, kernel_size=3, activation='relu')(x)
x = MaxPooling1D(pool_size=2)(x)
x = Dropout(0.25)(x)

x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)

outputs = Dense(2, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Save the model
model.save('signal_classification_model.h5')

# Print training accuracy per epoch
print("Training accuracy per epoch:")
for epoch, acc in enumerate(history.history['accuracy']):
    print(f"Epoch {epoch + 1}: Training Accuracy = {acc:.4f}")

# Evaluate the model
print("\nValidation accuracy (test set):")
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {accuracy:.4f}')





Epoch 1/20


2024-07-30 15:41:51.682336: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 396519900 exceeds 10% of free system memory.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Training accuracy per epoch:
Epoch 1: Training Accuracy = 0.8420
Epoch 2: Training Accuracy = 0.9361
Epoch 3: Training Accuracy = 1.0000
Epoch 4: Training Accuracy = 1.0000
Epoch 5: Training Accuracy = 1.0000
Epoch 6: Training Accuracy = 1.0000
Epoch 7: Training Accuracy = 1.0000
Epoch 8: Training Accuracy = 1.0000
Epoch 9: Training Accuracy = 1.0000
Epoch 10: Training Accuracy = 1.0000
Epoch 11: Training Accuracy = 1.0000
Epoch 12: Training Accuracy = 1.0000
Epoch 13: Training Accuracy = 1.0000
Epoch 14: Training Accuracy = 1.0000
Epoch 15: Training Accuracy = 1.0000
Epoch 16: Training Accuracy = 1.0000
Epoch 17: Training Accuracy = 1.0000
Epoch 18: Training Accuracy = 1.0000
Epoch 19: Training Accuracy = 1.0000
Epoch 20: Training Accuracy = 1.0000

Validation accuracy

  saving_api.save_model(


Test accuracy: 1.0000


In [1]:
#Analyze the rogue glitches
#Principal component analysis to be done
#Keep on testing and tuning 

In [9]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model

# Load and preprocess a single CSV file
def load_and_preprocess_csv(file_path):
    data = pd.read_csv(file_path)
    data['count'] = data.groupby('slice_index')['count'].transform(lambda x: x / x.sum())
    pivot_data = data.pivot(index='slice_index', columns='bin_start', values='count').fillna(0)
    return pivot_data

# Function to pad arrays to the same shape
def pad_array(array, max_shape):
    pad_width = [(0, max_shape[i] - array.shape[i]) for i in range(len(array.shape))]
    padded_array = np.pad(array, pad_width, mode='constant', constant_values=0)
    return padded_array

# Load and preprocess a pair of positive and negative CSV files
def preprocess_signal_pair(pos_file, neg_file, max_shape):
    pos_data = load_and_preprocess_csv(pos_file)
    neg_data = load_and_preprocess_csv(neg_file)
    combined_data = np.concatenate((pos_data, neg_data), axis=1)
    padded_data = pad_array(combined_data, max_shape)
    return np.expand_dims(padded_data, axis=0)  # Add batch dimension

# Define the path to your model and load it
model_path = '/home/arutkeerthi/Downloads/Glitchveto/signal_classification_model.h5'
model = load_model(model_path)

# Load max_shape
max_shape = np.load('max_shape.npy')

# Define the path to your test CSV files
pos_file = '/home/arutkeerthi/Downloads/Glitchveto/Glitch_volumes/positive_12.csv'
neg_file = '/home/arutkeerthi/Downloads/Glitchveto/Glitch_volumes/negative_12.csv'

# Preprocess the test pair
test_data = preprocess_signal_pair(pos_file, neg_file, max_shape)

# Predict using the trained model
prediction = model.predict(test_data)
predicted_class = np.argmax(prediction)

# Print the results
print(f"Prediction probabilities: {prediction}")
print(f"Predicted class: {'Glitch' if predicted_class == 1 else 'Non-Glitch'}")


Prediction probabilities: [[3.4571328e-06 9.9999654e-01]]
Predicted class: Glitch


In [10]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model

# Load and preprocess a single CSV file
def load_and_preprocess_csv(file_path):
    data = pd.read_csv(file_path)
    data['count'] = data.groupby('slice_index')['count'].transform(lambda x: x / x.sum())
    pivot_data = data.pivot(index='slice_index', columns='bin_start', values='count').fillna(0)
    return pivot_data

# Function to pad arrays to the same shape
def pad_array(array, max_shape):
    pad_width = [(0, max_shape[i] - array.shape[i]) for i in range(len(array.shape))]
    padded_array = np.pad(array, pad_width, mode='constant', constant_values=0)
    return padded_array

# Load and preprocess a pair of positive and negative CSV files
def preprocess_signal_pair(pos_file, neg_file, max_shape):
    pos_data = load_and_preprocess_csv(pos_file)
    neg_data = load_and_preprocess_csv(neg_file)
    combined_data = np.concatenate((pos_data, neg_data), axis=1)
    padded_data = pad_array(combined_data, max_shape)
    return np.expand_dims(padded_data, axis=0)  # Add batch dimension

# Define the path to your model and load it
model_path = '/home/arutkeerthi/Downloads/Glitchveto/signal_classification_model.h5'
model = load_model(model_path)

# Load max_shape
max_shape = np.load('max_shape.npy')

# Define the path to your test CSV files
pos_file = '/home/arutkeerthi/Downloads/Glitchveto/Merger_volumes/1200MPC-2-Noisy/positive_10.csv'
neg_file = '/home/arutkeerthi/Downloads/Glitchveto/Merger_volumes/1200MPC-2-Noisy/negative_10.csv'

# Preprocess the test pair
test_data = preprocess_signal_pair(pos_file, neg_file, max_shape)

# Predict using the trained model
prediction = model.predict(test_data)
predicted_class = np.argmax(prediction)

# Print the results
print(f"Prediction probabilities: {prediction}")
print(f"Predicted class: {'Glitch' if predicted_class == 1 else 'Non-Glitch'}")

Prediction probabilities: [[9.9999475e-01 5.3000554e-06]]
Predicted class: Non-Glitch


+-------------------------------------+
|         Data Preprocessing          |
+-------------------------------------+
| 1. Load CSV Files                   |
|    - Positive CSV                   |
|    - Negative CSV                   |
| 2. Normalize Counts                 |
| 3. Pivot Data                        |
| 4. Combine Data                      |
| 5. Determine Maximum Shape           |
| 6. Pad Data                         |
| 7. Save Max Shape                    |
| 8. Combine and Label Data            |
+-------------------------------------+

          |
          V

+-------------------------------------+
|         Neural Network Architecture |
+-------------------------------------+
| 1. Input Layer                      |
| 2. Convolutional Layer 1            |
| 3. Pooling Layer 1                  |
| 4. Dropout Layer 1                  |
| 5. Convolutional Layer 2            |
| 6. Pooling Layer 2                  |
| 7. Dropout Layer 2                  |
| 8. Flatten Layer                    |
| 9. Dense Layer                      |
| 10. Dropout Layer 3                 |
| 11. Output Layer                    |
| 12. Compile Model                   |
+-------------------------------------+

          |
          V

+-------------------------------------+
|             Model Training           |
+-------------------------------------+
| 1. Fit Model                         |
| 2. Evaluate Model                    |
+-------------------------------------+

          |
          V

+-------------------------------------+
|                Testing               |
+-------------------------------------+
| 1. Load Test Files                  |
| 2. Preprocess Test Data              |
| 3. Predict                           |
| 4. Output Results                    |
+-------------------------------------+


Input Layer
   ↓
Conv1D (32 filters, kernel_size=3, activation='relu')
   ↓
MaxPooling1D (pool_size=2)
   ↓
Dropout (rate=0.25)
   ↓
Conv1D (64 filters, kernel_size=3, activation='relu')
   ↓
MaxPooling1D (pool_size=2)
   ↓
Dropout (rate=0.25)
   ↓
Flatten
   ↓
Dense (128 units, activation='relu')
   ↓
Dropout (rate=0.5)
   ↓
Dense (2 units, activation='softmax')
   ↓
Output Layer

In [14]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model

# Load and preprocess a single CSV file
def load_and_preprocess_csv(file_path):
    data = pd.read_csv(file_path)
    data['count'] = data.groupby('slice_index')['count'].transform(lambda x: x / x.sum())
    pivot_data = data.pivot(index='slice_index', columns='bin_start', values='count').fillna(0)
    return pivot_data

# Function to pad arrays to the same shape
def pad_array(array, max_shape):
    pad_width = [(0, max_shape[i] - array.shape[i]) for i in range(len(array.shape))]
    padded_array = np.pad(array, pad_width, mode='constant', constant_values=0)
    return padded_array

# Load and preprocess a pair of positive and negative CSV files
def preprocess_signal_pair(pos_file, neg_file, max_shape):
    pos_data = load_and_preprocess_csv(pos_file)
    neg_data = load_and_preprocess_csv(neg_file)
    combined_data = np.concatenate((pos_data, neg_data), axis=1)
    padded_data = pad_array(combined_data, max_shape)
    return np.expand_dims(padded_data, axis=0)  # Add batch dimension

# Define the path to your model and load it
model_path = '/home/arutkeerthi/Downloads/Glitchveto/signal_classification_model.h5'
model = load_model(model_path)

# Load max_shape
max_shape = np.load('max_shape.npy')

# Define the path to your test CSV files
pos_file = '/home/arutkeerthi/Downloads/Glitchveto/Merger_volumes/1000MPC-3-Noisy/positive_10.csv'
neg_file = '/home/arutkeerthi/Downloads/Glitchveto/Merger_volumes/1000MPC-3-Noisy/negative_10.csv'

# Preprocess the test pair
test_data = preprocess_signal_pair(pos_file, neg_file, max_shape)

# Predict using the trained model
prediction = model.predict(test_data)
predicted_class = np.argmax(prediction)

# Print the results
print(f"Prediction probabilities: {prediction}")
print(f"Predicted class: {'Glitch' if predicted_class == 1 else 'Non-Glitch'}")

Prediction probabilities: [[9.9993193e-01 6.8076231e-05]]
Predicted class: Non-Glitch
