In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from tqdm import tqdm
import random

# --- Configuration ---
N_DEVICES = 30
N_FAILING_DEVICES = 9 # Making it a multiple of 3 for easy division
TIME_START = datetime(2023, 1, 1)
TIME_END = datetime(2023, 12, 31)
SAMPLING_INTERVAL_HOURS = 2

# --- MODIFIED Function to generate data for a single device ---
def generate_watch_data(device_id, failure_type="healthy"):
    """
    Generates a time-series dataset for a single smartwatch with specific failure types.
    """
    timestamps = pd.to_datetime(np.arange(TIME_START, TIME_END, timedelta(hours=SAMPLING_INTERVAL_HOURS)))
    n_samples = len(timestamps)
    df = pd.DataFrame({'timestamp': timestamps, 'watch_id': device_id})

    # --- Generate features for a healthy smartwatch ---
    df['battery_level'] = 100 - (np.sin(np.arange(n_samples) / (24 / SAMPLING_INTERVAL_HOURS) * np.pi) + 1) * 45 + np.random.normal(0, 2, n_samples)
    df['heart_rate_bpm'] = np.random.normal(70, 15, n_samples) + np.sin(np.arange(n_samples) / 50) * 10
    df['steps_per_hour'] = np.random.poisson(300, n_samples) * (np.sin(np.arange(n_samples) / (24 / SAMPLING_INTERVAL_HOURS) * np.pi) > 0.5)
    df['gps_active'] = np.random.choice([0, 1], size=n_samples, p=[0.95, 0.05])
    df['screen_on_time_minutes'] = np.random.exponential(5, n_samples)
    df['ambient_temp_c'] = np.random.uniform(15, 35, n_samples) + np.sin(np.arange(n_samples) / (365 * 24 / SAMPLING_INTERVAL_HOURS) * 2 * np.pi) * 5
    df['water_pressure_atm'] = np.random.normal(1.0, 0.01, n_samples)
    df['fall_detection_events'] = np.random.poisson(0.001, n_samples)

    # --- Introduce anomalies for failing devices ---
    if failure_type != "healthy":
        failure_point = int(n_samples * np.random.uniform(0.8, 0.95))
        degradation_period = int(n_samples * 0.20)
        degradation_start_index = failure_point - degradation_period
        num_degradation_samples = n_samples - degradation_start_index

        if failure_type == "battery_failure":
            print(f"Injecting '{failure_type}' for {device_id}")
            df.loc[degradation_start_index:, 'battery_level'] *= np.linspace(1, 0.4, num_degradation_samples) # More aggressive drain
            df.loc[degradation_start_index:, 'ambient_temp_c'] += np.linspace(0, 8, num_degradation_samples) # Battery gets warm

        elif failure_type == "heart_rate_sensor_failure":
            print(f"Injecting '{failure_type}' for {device_id}")
            df.loc[degradation_start_index:, 'heart_rate_bpm'] += np.random.normal(0, 20, num_degradation_samples) + np.linspace(0, 35, num_degradation_samples) # More erratic

        elif failure_type == "water_seal_failure":
            print(f"Injecting '{failure_type}' for {device_id}")
            df.loc[degradation_start_index:, 'water_pressure_atm'] += np.random.normal(0, 0.2, num_degradation_samples) + np.linspace(0, 0.8, num_degradation_samples) # Stronger, more erratic signal
            df.loc[degradation_start_index:, 'battery_level'] *= 0.95 # Slight battery drain from shorting

        # Set target variable
        df['failed_in_next_7d'] = 0
        failure_window_start = failure_point - int(7 * 24 / SAMPLING_INTERVAL_HOURS)
        df.loc[failure_window_start:failure_point, 'failed_in_next_7d'] = 1
    else:
        df['failed_in_next_7d'] = 0

    # Clip values
    df['battery_level'] = df['battery_level'].clip(0, 100)
    df['heart_rate_bpm'] = df['heart_rate_bpm'].clip(30, 220)
    df['steps_per_hour'] = df['steps_per_hour'].clip(0)
    return df

# --- Generate data for all watches and combine ---
all_watches_df = []
failing_watch_ids = [f"watch_{i:03d}" for i in range(N_FAILING_DEVICES)]
random.shuffle(failing_watch_ids)

# --- MODIFIED Main Loop to assign different failures ---
failure_types = ["battery_failure", "heart_rate_sensor_failure", "water_seal_failure"]
watch_id_to_failure = {}
for i, watch_id in enumerate(failing_watch_ids):
    watch_id_to_failure[watch_id] = failure_types[i % len(failure_types)]


for i in tqdm(range(N_DEVICES), desc="Generating Device Data"):
    device_id = f"watch_{i:03d}"
    failure_type = watch_id_to_failure.get(device_id, "healthy")
    watch_df = generate_watch_data(device_id, failure_type)
    all_watches_df.append(watch_df)

final_df = pd.concat(all_watches_df, ignore_index=True)

# --- Save to new CSV ---
output_filename = 'smartwatch_telemetry_v2.csv'
final_df.to_csv(output_filename, index=False)

print(f"\n✅ Enhanced smartwatch telemetry dataset generated successfully!")
print(f"📄 Saved to '{output_filename}'")

Generating Device Data:  17%|█▋        | 5/30 [00:00<00:00, 49.90it/s]

Injecting 'battery_failure' for watch_000
Injecting 'heart_rate_sensor_failure' for watch_001
Injecting 'water_seal_failure' for watch_002
Injecting 'heart_rate_sensor_failure' for watch_003
Injecting 'heart_rate_sensor_failure' for watch_004
Injecting 'battery_failure' for watch_005
Injecting 'water_seal_failure' for watch_006
Injecting 'water_seal_failure' for watch_007
Injecting 'battery_failure' for watch_008


Generating Device Data: 100%|██████████| 30/30 [00:00<00:00, 54.75it/s]



✅ Enhanced smartwatch telemetry dataset generated successfully!
📄 Saved to 'smartwatch_telemetry_v2.csv'


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import joblib
from tqdm import tqdm
import os

# --- Configuration ---
INPUT_CSV = "/content/smartwatch_telemetry_v2.csv"
OUTPUT_PATH = "."

# Define the sequence length based on the data's 2-hour interval
SAMPLING_INTERVAL_HOURS = 2
TIMESTEPS_PER_DAY = 24 // SAMPLING_INTERVAL_HOURS
SEQUENCE_DAYS = 14  # Look at 14 days of history for each prediction
SEQUENCE_TIMESTEPS = SEQUENCE_DAYS * TIMESTEPS_PER_DAY

def prepare_smartwatch_data():
    """
    Loads raw smartwatch data, scales it, creates sequences, and saves the final arrays.
    """
    print("--- Step 1: Loading and Preprocessing Data ---")

    try:
        # Define efficient data types for smartwatch features
        dtype_map = {
            'battery_level': 'float32', 'heart_rate_bpm': 'float32',
            'steps_per_hour': 'int16', 'gps_active': 'int8',
            'screen_on_time_minutes': 'float32', 'ambient_temp_c': 'float32',
            'water_pressure_atm': 'float32', 'fall_detection_events': 'int8',
            'failed_in_next_7d': 'int8'
        }
        df = pd.read_csv(INPUT_CSV, dtype=dtype_map, parse_dates=['timestamp'])
    except FileNotFoundError:
        print(f"Error: The file '{INPUT_CSV}' was not found.")
        return

    df.sort_values(by=['watch_id', 'timestamp'], inplace=True)
    print(f"Loaded {len(df)} data points from {df['watch_id'].nunique()} devices.")

    # --- Step 2: Scaling Features ---

    # Define the features the LSTM will use
    feature_columns = [
        'battery_level', 'heart_rate_bpm', 'steps_per_hour', 'gps_active',
        'screen_on_time_minutes', 'ambient_temp_c', 'water_pressure_atm',
        'fall_detection_events'
    ]

    print(f"\n--- Step 2: Scaling {len(feature_columns)} Features ---")

    scaler = MinMaxScaler()
    df[feature_columns] = scaler.fit_transform(df[feature_columns])

    scaler_filename = os.path.join(OUTPUT_PATH, "smartwatch_scaler.joblib")
    joblib.dump(scaler, scaler_filename)
    print(f"Scaler saved to '{scaler_filename}'.")

    # --- Step 3: Creating Sequences ---

    print(f"\n--- Step 3: Creating Sequences with {SEQUENCE_TIMESTEPS} Timesteps ({SEQUENCE_DAYS} days) ---")

    sequences = []
    labels = []

    # Group data by each watch to create sequences separately
    grouped_data = df.groupby('watch_id')

    for watch_id, watch_data in tqdm(grouped_data, desc="Processing Watches"):
        feature_data = watch_data[feature_columns].values
        label_data = watch_data['failed_in_next_7d'].values

        for i in range(len(feature_data) - SEQUENCE_TIMESTEPS):
            seq = feature_data[i:i + SEQUENCE_TIMESTEPS]
            label = label_data[i + SEQUENCE_TIMESTEPS - 1]

            sequences.append(seq)
            labels.append(label)

    X = np.array(sequences)
    y = np.array(labels)

    print("\n--- Step 4: Finalizing and Saving Data ---")
    print(f"Final shape of sequences (X): {X.shape}")
    print(f"Final shape of labels (y): {y.shape}")

    np.save(os.path.join(OUTPUT_PATH, "smartwatch_sequences.npy"), X)
    np.save(os.path.join(OUTPUT_PATH, "smartwatch_labels.npy"), y)

    print(f"\nData preparation complete. Model-ready files saved.")
    print("You are now ready for the final step: training the LSTM model.")


if __name__ == "__main__":
    prepare_smartwatch_data()

--- Step 1: Loading and Preprocessing Data ---
Loaded 131040 data points from 30 devices.

--- Step 2: Scaling 8 Features ---
Scaler saved to './smartwatch_scaler.joblib'.

--- Step 3: Creating Sequences with 168 Timesteps (14 days) ---


Processing Watches: 100%|██████████| 30/30 [00:00<00:00, 215.75it/s]



--- Step 4: Finalizing and Saving Data ---
Final shape of sequences (X): (126000, 168, 8)
Final shape of labels (y): (126000,)

Data preparation complete. Model-ready files saved.
You are now ready for the final step: training the LSTM model.


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.utils import class_weight
import matplotlib.pyplot as plt
import os

# --- Step 0: Setup and GPU Check ---
# (Same as before)
print("--- Step 0: Setup and GPU Check ---")
device_name = tf.test.gpu_device_name()
if not '/device:GPU:0' in device_name:
  print('\nWARNING: GPU device not found.')
else:
  print('\nSUCCESS: Found GPU at: {}'.format(device_name))


# --- Step 1: Load and Split Data ---
# (Same as before)
print("\n--- Step 1: Loading and Splitting Data ---")
X = np.load('smartwatch_sequences.npy')
y = np.load('smartwatch_labels.npy')
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# --- Step 2: Handle Class Imbalance ---
# (Same as before)
print("\n--- Step 2: Calculating Class Weights ---")
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}
print(f"Calculated class weights: {class_weight_dict}")


# --- Step 3: Build the LSTM Model Architecture ---
# (Same as before)
print("\n--- Step 3: Building the LSTM Model ---")
input_shape = (X_train.shape[1], X_train.shape[2])
model = Sequential([
    LSTM(64, input_shape=input_shape, return_sequences=False),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])
model.summary()


# --- Step 4: Compile and Train the Model (MODIFIED) ---
print("\n--- Step 4: Compiling and Training the Model ---")

# FIX 1: Use a lower learning rate
optimizer = keras.optimizers.Adam(learning_rate=0.0001)

# FIX 2: Define callbacks for smarter training
# Stop training if the validation loss doesn't improve for 3 epochs
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True # Automatically restore the best model
)
# Reduce the learning rate if validation loss plateaus
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2, # Reduce by a factor of 5
    patience=2
)

model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')]
)

# Train the model with the new callbacks
history = model.fit(
    X_train,
    y_train,
    epochs=20, # We can still set a max, but EarlyStopping will likely finish first
    batch_size=128,
    validation_data=(X_test, y_test),
    class_weight=class_weight_dict,
    callbacks=[early_stopping, reduce_lr] # Add the callbacks here
)


# --- Step 5: Evaluate the Model ---
# (Same as before)
print("\n--- Step 5: Evaluating Model Performance ---")
y_pred_proba = model.predict(X_test)
y_pred = (y_pred_proba > 0.5).astype("int32")
print("\nClassification Report (from best model):")
print(classification_report(y_test, y_pred, target_names=['Healthy', 'Failure Imminent']))

# --- Step 6: Save the Trained Model ---
# (Same as before)
MODEL_FILENAME = 'smartwatch_model_v2.h5'
model.save(MODEL_FILENAME)
print(f"\nModel training and evaluation complete. Best model saved to '{MODEL_FILENAME}'.")

--- Step 0: Setup and GPU Check ---


--- Step 1: Loading and Splitting Data ---

--- Step 2: Calculating Class Weights ---
Calculated class weights: {0: np.float64(0.5030542579949695), 1: np.float64(82.3529411764706)}

--- Step 3: Building the LSTM Model ---


  super().__init__(**kwargs)



--- Step 4: Compiling and Training the Model ---
Epoch 1/20
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 210ms/step - accuracy: 0.8424 - loss: 0.5834 - precision: 0.0208 - recall: 0.4637 - val_accuracy: 0.9305 - val_loss: 0.2315 - val_precision: 0.0790 - val_recall: 0.9804 - learning_rate: 1.0000e-04
Epoch 2/20
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 210ms/step - accuracy: 0.8902 - loss: 0.2240 - precision: 0.0570 - recall: 0.9857 - val_accuracy: 0.7707 - val_loss: 0.4698 - val_precision: 0.0258 - val_recall: 1.0000 - learning_rate: 1.0000e-04
Epoch 3/20
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 208ms/step - accuracy: 0.8896 - loss: 0.2060 - precision: 0.0516 - recall: 0.9928 - val_accuracy: 0.9038 - val_loss: 0.2671 - val_precision: 0.0593 - val_recall: 1.0000 - learning_rate: 1.0000e-04
Epoch 4/20
[1m788/788[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m205s[0m 211ms/step - accuracy: 0.9349 - loss: 0.130




Classification Report (from best model):
                  precision    recall  f1-score   support

         Healthy       1.00      0.93      0.97     25047
Failure Imminent       0.09      1.00      0.16       153

        accuracy                           0.93     25200
       macro avg       0.54      0.97      0.56     25200
    weighted avg       0.99      0.93      0.96     25200


Model training and evaluation complete. Best model saved to 'smartwatch_model_v2.h5'.


In [None]:
MODEL_FILENAME = 'smartwatch_model_v2.h5'
model.save(MODEL_FILENAME)
print(f"\nModel training and evaluation complete. Best model saved to '{MODEL_FILENAME}'.")




Model training and evaluation complete. Best model saved to 'smartwatch_model_v2.h5'.
