In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.mixture import GaussianMixture
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Conv1D, Flatten
import matplotlib.pyplot as plt


In [None]:
# Load data
data = pd.read_csv("seismic_oceanographic_data.csv")  # Replace with actual file path

# Normalize the data for stability during training
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

# Set dimensions for time-series analysis
timesteps = 30  # Number of time steps in each sample
features = data_scaled.shape[1]


In [None]:
def create_sequences(data, timesteps):
    sequences = []
    for i in range(len(data) - timesteps):
        sequences.append(data[i:i+timesteps])
    return np.array(sequences)

data_sequences = create_sequences(data_scaled, timesteps)


In [None]:
# LSTM Autoencoder
lstm_autoencoder = Sequential([
    LSTM(64, activation='relu', input_shape=(timesteps, features), return_sequences=True),
    LSTM(32, activation='relu', return_sequences=False),
    RepeatVector(timesteps),
    LSTM(32, activation='relu', return_sequences=True),
    LSTM(64, activation='relu', return_sequences=True),
    TimeDistributed(Dense(features))
])

lstm_autoencoder.compile(optimizer='adam', loss='mse')


In [None]:
# Train on normal (non-anomalous) data
history = lstm_autoencoder.fit(data_sequences, data_sequences, epochs=50, batch_size=32, validation_split=0.2)

In [None]:
# Reconstruction loss threshold
reconstructions = lstm_autoencoder.predict(data_sequences)
mse_loss = np.mean(np.abs(data_sequences - reconstructions), axis=(1, 2))
lstm_threshold = np.mean(mse_loss) + 2 * np.std(mse_loss)  # Set threshold at 2 standard deviations


In [None]:
# CNN model for anomaly detection
cnn = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(timesteps, features)),
    Conv1D(32, kernel_size=3, activation='relu'),
    Flatten(),
    Dense(1, activation='sigmoid')  # Output as probability of anomaly
])

cnn.compile(optimizer='adam', loss='binary_crossentropy')


In [None]:
# Create labels (0 for normal, 1 for anomalies)
labels = np.zeros(data_sequences.shape[0])  # Normal data

# Train CNN on normal data
cnn.fit(data_sequences, labels, epochs=20, batch_size=32, validation_split=0.2)


In [None]:
# Train GMM on normal data
gmm = GaussianMixture(n_components=2, covariance_type='full')
gmm.fit(data_scaled)

# Define GMM anomaly threshold based on log-likelihood of normal data
gmm_scores = gmm.score_samples(data_scaled)
gmm_threshold = np.mean(gmm_scores) - 2 * np.std(gmm_scores)  # Anomalies fall below this threshold


In [None]:
def detect_anomaly(sequence):
    # LSTM Autoencoder reconstruction loss
    lstm_reconstructed = lstm_autoencoder.predict(sequence.reshape(1, timesteps, features))
    lstm_loss = np.mean(np.abs(sequence - lstm_reconstructed))

    lstm_anomaly = lstm_loss > lstm_threshold

    # CNN Prediction
    cnn_pred = cnn.predict(sequence.reshape(1, timesteps, features))
    cnn_anomaly = cnn_pred > 0.5  # Flag as anomaly if probability > 0.5

    # GMM score
    flattened_seq = sequence.flatten().reshape(1, -1)
    gmm_score = gmm.score_samples(flattened_seq)
    gmm_anomaly = gmm_score < gmm_threshold

    # Ensemble decision: anomaly if two or more models flag it
    anomaly_count = sum([lstm_anomaly, cnn_anomaly, gmm_anomaly])
    is_anomaly = anomaly_count >= 2  # Flag as anomaly if 2 or more models agree

    return is_anomaly, {"LSTM": lstm_anomaly, "CNN": cnn_anomaly, "GMM": gmm_anomaly}


In [None]:
# Test on new data
new_data = pd.read_csv("new_seismic_oceanographic_data.csv")  # Replace with actual path
new_data_scaled = scaler.transform(new_data)
new_data_sequences = create_sequences(new_data_scaled, timesteps)

# Detect anomalies
anomalies = []
for sequence in new_data_sequences:
    is_anomaly, model_results = detect_anomaly(sequence)
    anomalies.append(is_anomaly)

# Output detected anomalies
anomaly_indices = np.where(anomalies)[0]
print("Anomalies detected at indices:", anomaly_indices)
