In [6]:
import os
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
from google.colab import drive

from google.colab import drive
import os

if not os.path.exists('/content/drive/MyDrive'):
    drive.mount('/content/drive')
else:
    print("✅ Drive already mounted.")

# ===== Paths =====
BASE_DIR = "/content/drive/MyDrive/spacecraft_anomaly_project"  # change if needed
RAW_DIR = os.path.join(BASE_DIR, "data", "raw")
MODELS_DIR = os.path.join(BASE_DIR, "models")
OUTPUTS_DIR = os.path.join(BASE_DIR, "outputs")

os.makedirs(OUTPUTS_DIR, exist_ok=True)

# ===== File names =====
DATA_FILE = os.path.join(RAW_DIR, "dataset.csv")
SEGMENTS_FILE = os.path.join(RAW_DIR, "segments.csv")
SCALER_FILE = os.path.join(MODELS_DIR, "scaler.pkl")
AUTOENCODER_FILE = os.path.join(MODELS_DIR, "autoencoder/lstm_autoencoder.h5")
TTI_SCALER_FILE = os.path.join(MODELS_DIR, "tti_scaler.pkl")
TTI_MODEL_FILE = os.path.join(MODELS_DIR, "tti_lstm_regressor.keras")

# Window length from training
TIME_STEPS = 30

✅ Drive already mounted.


In [4]:
print("📂 Loading telemetry dataset...")
df = pd.read_csv(DATA_FILE)
print(f"✅ Raw shape: {df.shape}")

df_features = df.drop(columns=['anomaly', 'channel'], errors='ignore')

# ===== Load scaler from training =====
scaler = joblib.load(SCALER_FILE)
df_scaled = pd.DataFrame(scaler.transform(df_features), columns=df_features.columns)
print("✅ Data scaled.")

# ===== Create sliding windows =====
def create_sequences(data, time_steps=TIME_STEPS):
    sequences = []
    for i in range(len(data) - time_steps + 1):
        sequences.append(data[i:i+time_steps])
    return np.array(sequences)

X_seq = create_sequences(df_scaled.values)
print(f"✅ Created sequences: {X_seq.shape}")

📂 Loading telemetry dataset...
✅ Raw shape: (2123, 23)
✅ Data scaled.
✅ Created sequences: (2094, 30, 21)


In [7]:
print("📂 Loading models...")
autoencoder = load_model(AUTOENCODER_FILE)
tti_model = load_model(TTI_MODEL_FILE)
tti_scaler = joblib.load(TTI_SCALER_FILE)
print("✅ All models loaded successfully.")

📂 Loading models...




✅ All models loaded successfully.


In [8]:
# ===== Reconstruction errors =====
X_pred = autoencoder.predict(X_seq)
mse = np.mean(np.square(X_seq - X_pred), axis=(1, 2))

# Threshold — same as training
THRESHOLD = np.percentile(mse, 95)  # example: 95th percentile
anomalies_idx = np.where(mse > THRESHOLD)[0]

print(f"🚨 Detected {len(anomalies_idx)} anomalies.")

# Extract anomaly sequences
X_anomalies = X_seq[anomalies_idx]
print(f"✅ X_anomalies shape: {X_anomalies.shape}")

[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step
🚨 Detected 105 anomalies.
✅ X_anomalies shape: (105, 30, 21)


In [10]:
# ===== Cell 5: Scaling for TTI model =====

print("📏 Scaling anomaly sequences for TTI model...")

# X_anomalies shape should be (num_samples, TIME_STEPS, num_features)
num_samples, time_steps, num_features = X_anomalies.shape

# ✅ Reshape to 2D (combine samples & timesteps) for scaling
X_anomalies_2d = X_anomalies.reshape(-1, num_features)  # shape: (num_samples * time_steps, num_features)

# ===== Load scaler from training =====
tti_scaler = joblib.load(TTI_SCALER_FILE)

# ✅ Scale using the training scaler
X_anomalies_scaled_2d = tti_scaler.transform(X_anomalies_2d)

# ✅ Reshape back to original 3D shape
X_anomalies_scaled = X_anomalies_scaled_2d.reshape(num_samples, time_steps, num_features)

print(f"✅ Data scaled for TTI model: {X_anomalies_scaled.shape}")


📏 Scaling anomaly sequences for TTI model...
✅ Data scaled for TTI model: (105, 30, 21)


In [16]:
# ===== Cell 6: Predict TTI for detected anomalies (shape-safe) =====

if len(X_anomalies) == 0:
    print("⚠ No anomalies detected — skipping TTI prediction.")
    y_pred_tti = np.array([])
else:
    # Flatten only the feature dimension for scaling
    n_samples, n_steps, n_features = X_anomalies.shape  # e.g., (samples, TIME_STEPS, 21)
    X_anomalies_reshaped = X_anomalies.reshape(-1, n_features)  # (samples * TIME_STEPS, 21)

    # Scale each timestep’s features
    X_anomalies_scaled = tti_scaler.transform(X_anomalies_reshaped)

    # Reshape back to LSTM format
    X_anomalies_scaled = X_anomalies_scaled.reshape(n_samples, n_steps, n_features)

    # Predict TTI
    y_pred_tti = tti_model.predict(X_anomalies_scaled)

    print(f"✅ TTI predictions completed. Shape: {y_pred_tti.shape}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
✅ TTI predictions completed. Shape: (105, 1)


In [24]:
import numpy as np
import os
from tensorflow import keras

# Paths
BASE_DIR = "/content/drive/MyDrive/spacecraft_anomaly_project"
PROCESSED_DIR = os.path.join(BASE_DIR, "data", "processed")
MODELS_DIR = os.path.join(BASE_DIR, "models")

# Load scaled data - assuming shape (num_samples, num_features)
X_full_flat = np.load(os.path.join(PROCESSED_DIR, "X_full.npy"))
print(f"✅ Loaded X_full (flat): {X_full_flat.shape}")

# Parameters
TIME_STEPS = 30  # must match your model's expected input time steps
FEATURES = X_full_flat.shape[1]

# Convert flat data into overlapping sequences for LSTM input
def create_sequences(data, window_size=TIME_STEPS):
    sequences = []
    for i in range(len(data) - window_size + 1):
        sequences.append(data[i:i+window_size])
    return np.array(sequences)

X_full_seq = create_sequences(X_full_flat)
print(f"✅ Created sequences: {X_full_seq.shape} (samples, timesteps, features)")

# Load autoencoder
autoencoder = keras.models.load_model(os.path.join(MODELS_DIR, "autoencoder/lstm_autoencoder.h5"))
print("✅ Autoencoder model loaded.")

# Predict reconstruction
X_pred = autoencoder.predict(X_full_seq)
print(f"✅ Reconstruction predictions done: {X_pred.shape}")

# Compute reconstruction error per sequence (mean squared error)
reconstruction_errors = np.mean(np.square(X_full_seq - X_pred), axis=(1,2))
print(f"✅ Computed reconstruction errors: {reconstruction_errors.shape}")

# Save reconstruction errors
errors_path = os.path.join(PROCESSED_DIR, "reconstruction_errors.npy")
np.save(errors_path, reconstruction_errors)
print(f"✅ Saved reconstruction errors at {errors_path}")




✅ Loaded X_full (flat): (2123, 21)
✅ Created sequences: (2094, 30, 21) (samples, timesteps, features)
✅ Autoencoder model loaded.
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step
✅ Reconstruction predictions done: (2094, 30, 21)
✅ Computed reconstruction errors: (2094,)
✅ Saved reconstruction errors at /content/drive/MyDrive/spacecraft_anomaly_project/data/processed/reconstruction_errors.npy


In [25]:
import numpy as np
import os

# ===== Paths =====
PROCESSED_DIR = "/content/drive/MyDrive/spacecraft_anomaly_project/data/processed"

# ===== Load reconstruction errors (make sure you have this file saved earlier) =====
reconstruction_errors_path = os.path.join(PROCESSED_DIR, "reconstruction_errors.npy")
reconstruction_errors = np.load(reconstruction_errors_path)
print(f"✅ Loaded reconstruction errors: {reconstruction_errors.shape}")

# ===== Define threshold to detect anomalies =====
threshold = 0.1  # Adjust this based on your dataset and error distribution

# ===== Detect anomalies where error exceeds threshold =====
anomaly_indices = np.where(reconstruction_errors > threshold)[0]
print(f"✅ Detected {len(anomaly_indices)} anomaly indices")

# ===== Save anomaly indices for future use =====
anomaly_indices_path = os.path.join(PROCESSED_DIR, "anomaly_indices.npy")
np.save(anomaly_indices_path, anomaly_indices)
print(f"✅ Saved anomaly indices to: {anomaly_indices_path}")


✅ Loaded reconstruction errors: (2094,)
✅ Detected 2094 anomaly indices
✅ Saved anomaly indices to: /content/drive/MyDrive/spacecraft_anomaly_project/data/processed/anomaly_indices.npy


In [29]:
import numpy as np
import pandas as pd
import os

# === Paths ===
RAW_DIR = '/content/drive/MyDrive/spacecraft_anomaly_project/data/raw'
PROCESSED_DIR = '/content/drive/MyDrive/spacecraft_anomaly_project/data/processed'

# === Load anomaly indices ===
anomaly_indices_path = os.path.join(PROCESSED_DIR, 'anomaly_indices.npy')
anomaly_indices = np.load(anomaly_indices_path)
print(f"✅ Loaded anomaly indices: {anomaly_indices.shape[0]} anomalies")

# === Load raw dataset ===
df_raw = pd.read_csv(os.path.join(RAW_DIR, 'dataset.csv'))
print(f"✅ Loaded raw dataset: {df_raw.shape[0]} rows, {df_raw.shape[1]} columns")

# === Create synthetic timestamps with lowercase 's' ===
sampling_interval_seconds = 1  # Adjust if needed
df_raw['timestamp'] = pd.date_range(start='2020-01-01 00:00:00', periods=len(df_raw), freq=f'{sampling_interval_seconds}s')
timestamps = df_raw['timestamp'].values
print(f"✅ Created synthetic timestamps: {len(timestamps)} entries")

# === Define window size used in anomaly detection ===
window_size = 30  # adjust if you used a different window size

# === Build anomalies DataFrame ===
anomaly_windows = []
for idx in anomaly_indices:
    end_idx = min(idx + window_size - 1, len(timestamps) - 1)
    start_time = timestamps[idx]
    end_time = timestamps[end_idx]

    # Convert numpy.timedelta64 to seconds correctly
    duration = (end_time - start_time) / np.timedelta64(1, 's')  # duration in seconds

    anomaly_windows.append({'start_time': start_time, 'end_time': end_time, 'duration_sec': duration})

anomalies_df = pd.DataFrame(anomaly_windows)
print("✅ Created anomalies DataFrame:")
print(anomalies_df.head())

✅ Loaded anomaly indices: 2094 anomalies
✅ Loaded raw dataset: 2123 rows, 23 columns
✅ Created synthetic timestamps: 2123 entries
✅ Created anomalies DataFrame:
           start_time            end_time  duration_sec
0 2020-01-01 00:00:00 2020-01-01 00:00:29          29.0
1 2020-01-01 00:00:01 2020-01-01 00:00:30          29.0
2 2020-01-01 00:00:02 2020-01-01 00:00:31          29.0
3 2020-01-01 00:00:03 2020-01-01 00:00:32          29.0
4 2020-01-01 00:00:04 2020-01-01 00:00:33          29.0


In [37]:
# ===== Cell 7: Predict TTI and prioritize anomalies for ground station =====

import numpy as np
import pandas as pd
import os

# Paths
PROCESSED_DIR = "/content/drive/MyDrive/spacecraft_anomaly_project/data/processed"
RAW_DIR = "/content/drive/MyDrive/spacecraft_anomaly_project/data/raw"
OUTPUTS_DIR = os.path.join(PROCESSED_DIR, "outputs")
os.makedirs(OUTPUTS_DIR, exist_ok=True)

# Load the subset of anomaly indices that correspond to X_anomalies_scaled
tti_anomaly_indices_path = os.path.join(PROCESSED_DIR, "anomaly_indices.npy")
tti_anomaly_indices = np.load(tti_anomaly_indices_path)
print(f"✅ Loaded {len(tti_anomaly_indices)} anomaly indices for TTI prediction")

# Load raw dataset to get timestamps
df_raw = pd.read_csv(os.path.join(RAW_DIR, "dataset.csv"))

# Create synthetic timestamps if needed (adjust if real timestamps exist)
if "timestamp" not in df_raw.columns:
    sampling_interval_seconds = 1  # or the actual sampling interval you used
    df_raw["timestamp"] = pd.date_range(start="2020-01-01 00:00:00", periods=len(df_raw), freq=f"{sampling_interval_seconds}s")

timestamps = pd.to_datetime(df_raw["timestamp"]).values

# Define window size (used during anomaly window creation)
window_size = 30

# Build anomalies DataFrame only for tti_anomaly_indices
anomaly_windows = []
for idx in tti_anomaly_indices:
    end_idx = min(idx + window_size - 1, len(timestamps) - 1)
    start_time = timestamps[idx]
    end_time = timestamps[end_idx]
    duration_sec = (end_time - start_time) / np.timedelta64(1, 's')
    anomaly_windows.append({"start_time": start_time, "end_time": end_time, "duration_sec": duration_sec})

anomalies_df = pd.DataFrame(anomaly_windows)
print(f"✅ Created anomalies DataFrame with {len(anomalies_df)} entries")

# Predict TTI using the loaded LSTM regressor
print(f"✅ Input shape to TTI model: {X_anomalies_scaled.shape}")
y_pred_tti = tti_model.predict(X_anomalies_scaled)
print("✅ TTI predictions complete.")

# Add predicted TTI to anomalies DataFrame (lengths match!)
anomalies_df["predicted_TTI"] = y_pred_tti.flatten()

# Sort anomalies by shortest predicted TTI
anomalies_sorted = anomalies_df.sort_values(by="predicted_TTI", ascending=True)

# Optional: filter anomalies below TTI threshold
TTI_THRESHOLD = 100  # seconds (adjust as needed)
filtered_anomalies = anomalies_sorted[anomalies_sorted["predicted_TTI"] < TTI_THRESHOLD]

# Limit to top N anomalies
TOP_N = 20  # adjust as needed
high_priority_anomalies = filtered_anomalies.head(TOP_N)

# Show selected anomalies
print(f"✅ High-priority anomalies for transmission (Top {TOP_N} with TTI < {TTI_THRESHOLD}s):")
display(high_priority_anomalies)

# Save prioritized anomalies to outputs folder
output_file = os.path.join(OUTPUTS_DIR, "prioritized_anomalies.csv")
high_priority_anomalies.to_csv(output_file, index=False)
print(f"📁 Saved prioritized anomalies to {output_file}")

✅ Loaded 2094 anomaly indices for TTI prediction
✅ Created anomalies DataFrame with 2094 entries
✅ Input shape to TTI model: (105, 30, 21)
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
✅ TTI predictions complete.


ValueError: Length of values (105) does not match length of index (2094)

In [27]:
print(df.columns)

Index(['segment', 'anomaly', 'train', 'channel', 'sampling', 'duration', 'len',
       'mean', 'var', 'std', 'kurtosis', 'skew', 'n_peaks', 'smooth10_n_peaks',
       'smooth20_n_peaks', 'diff_peaks', 'diff2_peaks', 'diff_var',
       'diff2_var', 'gaps_squared', 'len_weighted', 'var_div_duration',
       'var_div_len'],
      dtype='object')
