In [None]:
import pandas as pd
data = pd.read_csv('sunnyside_percent_25.csv')
data.head()

In [None]:
import time
import numpy as np

def measure_inference_time(model, X_test, y_test):
    """
    Measure inference time
    """
    # Measure baseline overhead
    c = 0
    t1 = time.time()
    for i in range(100):
        x = 2
    t1 = (time.time() - t1) / 100
    
    # Store timing results
    arr = []
    
    # Process each test sample
    for i in range(len(X_test[10])):
        # Get single test sample
        test_sample = X_test[i:i+1]  # Keep the batch dimension
        
        # Measure inference time with 50 iterations
        t2 = time.time()
        for _ in range(100):
            _ = model.predict(test_sample, verbose=0)
        t2 = (time.time() - t2) / 100
        
        # Calculate net inference time
        inference_time = t2 - t1
        arr.append(inference_time)
    
    # Calculate statistics
    avg_time = np.mean(arr)
    std_time = np.std(arr)
    
    print("\nInference Time Statistics:")
    print(f"Average inference time per sample: {avg_time:.4f} seconds")
    print(f"Standard deviation: {std_time:.4f} seconds")
    print(f"Min time: {min(arr):.4f} seconds")
    print(f"Max time: {max(arr):.4f} seconds")
    
    return arr

In [None]:
import numpy as np
import psutil
from memory_profiler import memory_usage

def measure_subset_memory_usage(model, X_test, start_idx=0, num_samples=100, num_runs=10):
    """
    Measure memory usage for processing a subset of test data
    
    Parameters:
    -----------
    model : ML model object
        The model to evaluate
    X_test : numpy.ndarray
        Test data
    start_idx : int
        Starting index for the subset
    num_samples : int
        Number of samples to include in the subset
    num_runs : int
        Number of times to repeat the measurement for reliability
    """
    memory_results = []
    
    # Get test subset using array indexing
    X_subset = X_test[start_idx:start_idx + num_samples]
    
    # Get baseline memory
    baseline_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
    print(f"Baseline memory: {baseline_memory:.2f} MB")
    
    print(f"\nMeasuring memory usage for {len(X_subset)} samples ({num_runs} runs)...")
    
    # Function to measure
    def predict_subset():
        return model.predict(X_subset)
    
    # Repeat measurement multiple times
    for i in range(num_runs):
        # Memory profiling for the subset
        mem_usage = memory_usage(
            (predict_subset, (), {}),
            interval=0.005,  # Adjusted to 5ms sampling interval
            max_iterations=1,
            include_children=True
        )
        
        # Calculate peak memory usage for this run
        peak_memory = max(mem_usage) - baseline_memory
        memory_results.append(peak_memory)
        print(f"Run {i+1}/{num_runs}: Peak memory usage: {peak_memory:.2f} MB")
    
    # Calculate statistics
    memory_stats = {
        'mean': np.mean(memory_results),
        'std': np.std(memory_results),
        'min': np.min(memory_results),
        'max': np.max(memory_results),
        'per_sample_mean': np.mean(memory_results) / len(X_subset)
    }
    
    print("\nMemory Usage Statistics (for subset):")
    print(f"Subset size: {len(X_subset)} samples")
    print(f"Average peak memory for subset: {memory_stats['mean']:.2f} MB")
    print(f"Standard deviation: {memory_stats['std']:.2f} MB")
    print(f"Min peak memory: {memory_stats['min']:.2f} MB")
    print(f"Max peak memory: {memory_stats['max']:.2f} MB")
    print(f"Average memory per sample: {memory_stats['per_sample_mean']:.4f} MB")
    
    return {
        'memory_results': memory_results,
        'memory_stats': memory_stats,
        'baseline_memory': baseline_memory,
        'subset_size': len(X_subset)
    }

In [None]:
epochs = 10
sequence_length = 24

num_samples = 20000
num_runs = 10

# TCN

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
import sys
# Add the path to the Python path
sys.path.append('keras-tcn-master')

# Import TCN
from tcn import TCN

# Load the data
df = data.copy()

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)

# Drop unnecessary columns
df = df.drop(['timestamp_utc', 'Senosor'], axis=1)

# Handle missing values
df = df.ffill()

# Encode the 'Class' column
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
df['Class'] = df['Class'].map(class_encoding)

# Prepare data for TCN
sequence_length = sequence_length
features = df.drop(['timestamp', 'datetime', 'Class'], axis=1).values
labels = df['Class'].values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X, y = np.array(X), np.array(y)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Convert labels to categorical
num_classes = len(class_encoding)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

# Create and train the TCN model
tcn_model = Sequential([
    Input(shape=(X_train.shape[1], X_train.shape[2])),
    TCN(64, kernel_size=3, nb_stacks=2, dilations=[1, 2, 4, 8], padding='causal', 
        use_batch_norm=True, dropout_rate=0.2, return_sequences=True),
    TCN(32, kernel_size=3, nb_stacks=2, dilations=[1, 2, 4, 8], padding='causal', 
        use_batch_norm=True, dropout_rate=0.2, return_sequences=False),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(num_classes, activation='softmax')
])

tcn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = tcn_model.fit(X_train, y_train_cat, validation_split=0.2,
                    epochs= epochs, batch_size=128, verbose=1, callbacks=[early_stopping])


# Evaluate the model
y_pred = tcn_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test_cat, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision, recall, f1, _ = precision_recall_fscore_support(y_test_classes, y_pred_classes, average='weighted')

print("\nModel Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")


from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, classification_report

class_report = classification_report(y_test_classes, y_pred_classes,
                                   target_names=list(class_encoding.keys()),
                                   output_dict=True)

# Print detailed metrics
print("\nModel Accuracy:", f"{accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_classes, y_pred_classes,
                          target_names=list(class_encoding.keys()),
                          digits=4))  # Just add this digits parameter

print("Starting inference time measurement...")
inference_times = measure_inference_time(
    model=tcn_model,
    X_test=X_test,
    y_test=y_test
)

# Usage example:
print("Starting memory profiling for subset...")
memory_metrics = measure_subset_memory_usage(
    model=tcn_model,
    X_test=X_test,
    start_idx=0,
    num_samples=num_samples,  # First 100 samples
    num_runs= num_runs
)

# ResNet

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Conv1D, BatchNormalization, Activation, Add, GlobalAveragePooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Load the data
df = data.copy()

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)

# Drop unnecessary columns
df = df.drop(['timestamp_utc', 'Senosor'], axis=1)

# Handle missing values
df = df.ffill()

# Encode the 'Class' column
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
df['Class'] = df['Class'].map(class_encoding)

# Prepare data for TCN
sequence_length = sequence_length
features = df.drop(['timestamp', 'datetime', 'Class'], axis=1).values
labels = df['Class'].values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X, y = np.array(X), np.array(y)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Convert labels to categorical
num_classes = len(class_encoding)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

# Define ResNet model
def residual_block(x, filters, kernel_size=3, stride=1):
    shortcut = x
    x = Conv1D(filters, kernel_size, strides=stride, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv1D(filters, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)
    if stride != 1 or shortcut.shape[-1] != filters:
        shortcut = Conv1D(filters, 1, strides=stride, padding='same')(shortcut)
        shortcut = BatchNormalization()(shortcut)
    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    return x

def create_resnet_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = Conv1D(64, 7, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = residual_block(x, 64)
    x = residual_block(x, 64)
    x = residual_block(x, 128, stride=2)
    x = residual_block(x, 128)
    x = residual_block(x, 256, stride=2)
    x = residual_block(x, 256)
    
    x = GlobalAveragePooling1D()(x)
    x = Dense(256, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs, outputs)
    return model

# Create and compile ResNet model
resnet_model = create_resnet_model((X_train.shape[1], X_train.shape[2]), num_classes)
resnet_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = resnet_model.fit(X_train, y_train_cat, validation_split=0.2,
                           epochs=epochs, batch_size=128, verbose=1, callbacks=[early_stopping])

# Evaluate the model
y_pred = resnet_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test_cat, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision, recall, f1, _ = precision_recall_fscore_support(y_test_classes, y_pred_classes, average='weighted')

print("\nModel Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, classification_report

class_report = classification_report(y_test_classes, y_pred_classes,
                                   target_names=list(class_encoding.keys()),
                                   output_dict=True)

# Print detailed metrics
print("\nModel Accuracy:", f"{accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_classes, y_pred_classes,
                          target_names=list(class_encoding.keys()),
                          digits=4))  # Just add this digits parameter

print("Starting inference time measurement...")
inference_times = measure_inference_time(
    model=resnet_model,
    X_test=X_test,
    y_test=y_test
)

# Usage example:
print("Starting memory profiling for subset...")
memory_metrics = measure_subset_memory_usage(
    model=resnet_model,
    X_test=X_test,
    start_idx=0,
    num_samples=num_samples,  # First 100 samples
    num_runs=num_runs
)

# LSTM

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Load the data
df = data.copy()

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)

# Drop unnecessary columns
df = df.drop(['timestamp_utc', 'Senosor'], axis=1)

# Handle missing values
df = df.ffill()

# Encode the 'Class' column
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
df['Class'] = df['Class'].map(class_encoding)

# Prepare data for LSTM
sequence_length = sequence_length
features = df.drop(['timestamp', 'datetime', 'Class'], axis=1).values
labels = df['Class'].values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X, y = np.array(X), np.array(y)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Convert labels to categorical
num_classes = len(class_encoding)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

# Define LSTM model
def create_lstm_model(input_shape, num_classes):
    model = Sequential([
        Input(shape=input_shape),
        LSTM(64, return_sequences=True),
        Dropout(0.2),
        LSTM(32),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    return model

# Create and compile LSTM model
lstm_model = create_lstm_model((X_train.shape[1], X_train.shape[2]), num_classes)
lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = lstm_model.fit(X_train, y_train_cat, validation_split=0.2,
                         epochs=epochs, batch_size=128, verbose=1, callbacks=[early_stopping])

# Evaluate the model
y_pred = lstm_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test_cat, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision, recall, f1, _ = precision_recall_fscore_support(y_test_classes, y_pred_classes, average='weighted')

print("\nModel Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, classification_report

class_report = classification_report(y_test_classes, y_pred_classes,
                                   target_names=list(class_encoding.keys()),
                                   output_dict=True)

# Print detailed metrics
print("\nModel Accuracy:", f"{accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_classes, y_pred_classes,
                          target_names=list(class_encoding.keys()),
                          digits=4))  # Just add this digits parameter

print("Starting inference time measurement...")
inference_times = measure_inference_time(
    model=lstm_model,
    X_test=X_test,
    y_test=y_test
)

# Optional: Visualize timing distribution
plt.figure(figsize=(10, 5))
plt.hist(inference_times, bins=30)
plt.title('Distribution of Inference Times')
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency')
plt.show()


print("Starting memory profiling for subset...")
memory_metrics = measure_subset_memory_usage(
    model=lstm_model,
    X_test=X_test,
    start_idx=0,
    num_samples=num_samples,  # First 100 samples
    num_runs=num_runs
)


# Bi-LSTM

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Bidirectional, LSTM, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Load the data
df = data.copy()

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)

# Drop unnecessary columns
df = df.drop(['timestamp_utc', 'Senosor'], axis=1)

# Handle missing values
df = df.ffill()

# Encode the 'Class' column
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
df['Class'] = df['Class'].map(class_encoding)

# Prepare data for TCN
sequence_length = sequence_length
features = df.drop(['timestamp', 'datetime', 'Class'], axis=1).values
labels = df['Class'].values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X, y = np.array(X), np.array(y)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Convert labels to categorical
num_classes = len(class_encoding)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

# Define Bi-LSTM model
def create_bilstm_model(input_shape, num_classes):
    model = Sequential([
        Input(shape=input_shape),
        Bidirectional(LSTM(64, return_sequences=True)),
        Dropout(0.2),
        Bidirectional(LSTM(32)),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    return model

# Create and compile Bi-LSTM model
bilstm_model = create_bilstm_model((X_train.shape[1], X_train.shape[2]), num_classes)
bilstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = bilstm_model.fit(X_train, y_train_cat, validation_split=0.2,
                           epochs=epochs, batch_size=128, verbose=1, callbacks=[early_stopping])

# Evaluate the model
y_pred = bilstm_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test_cat, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision, recall, f1, _ = precision_recall_fscore_support(y_test_classes, y_pred_classes, average='weighted')

print("\nModel Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, classification_report

class_report = classification_report(y_test_classes, y_pred_classes,
                                   target_names=list(class_encoding.keys()),
                                   output_dict=True)

# Print detailed metrics
print("\nModel Accuracy:", f"{accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_classes, y_pred_classes,
                          target_names=list(class_encoding.keys()),
                          digits=4))  # Just add this digits parameter

print("Starting inference time measurement...")
inference_times = measure_inference_time(
    model=bilstm_model,
    X_test=X_test,
    y_test=y_test
)

# Optional: Visualize timing distribution
plt.figure(figsize=(10, 5))
plt.hist(inference_times, bins=30)
plt.title('Distribution of Inference Times')
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency')
plt.show()

print("Starting memory profiling for subset...")
memory_metrics = measure_subset_memory_usage(
    model=bilstm_model,
    X_test=X_test,
    start_idx=0,
    num_samples=num_samples,  # First 100 samples
    num_runs=num_runs
)


# GRU

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GRU, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Load the data
df = data.copy()

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)

# Drop unnecessary columns
df = df.drop(['timestamp_utc', 'Senosor'], axis=1)

# Handle missing values
df = df.ffill()

# Encode the 'Class' column
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
df['Class'] = df['Class'].map(class_encoding)

# Prepare data for TCN
sequence_length = sequence_length
features = df.drop(['timestamp', 'datetime', 'Class'], axis=1).values
labels = df['Class'].values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X, y = np.array(X), np.array(y)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Convert labels to categorical
num_classes = len(class_encoding)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)

# Define GRU model
def create_gru_model(input_shape, num_classes):
    model = Sequential([
        Input(shape=input_shape),
        GRU(64, return_sequences=True),
        Dropout(0.2),
        GRU(32),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    return model

# Create and compile GRU model
gru_model = create_gru_model((X_train.shape[1], X_train.shape[2]), num_classes)
gru_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = gru_model.fit(X_train, y_train_cat, validation_split=0.2,
                        epochs=epochs, batch_size=128, verbose=1, callbacks=[early_stopping])

# Evaluate the model
y_pred = gru_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test_cat, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision, recall, f1, _ = precision_recall_fscore_support(y_test_classes, y_pred_classes, average='weighted')

print("\nModel Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, classification_report

class_report = classification_report(y_test_classes, y_pred_classes,
                                   target_names=list(class_encoding.keys()),
                                   digits=4)

# Print detailed metrics
print("\nModel Accuracy:", accuracy)
print("\nClassification Report:")
print(class_report)

inference_times = measure_inference_time(
    model=gru_model,
    X_test=X_test,
    y_test=y_test
)

# Optional: Visualize timing distribution
plt.figure(figsize=(10, 5))
plt.hist(inference_times, bins=30)
plt.title('Distribution of Inference Times')
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency')
plt.show()

print("Starting memory profiling for subset...")
memory_metrics = measure_subset_memory_usage(
    model=gru_model,
    X_test=X_test,
    start_idx=0,
    num_samples=num_samples,  # First 100 samples
    num_runs=num_runs
)


# TST

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load and preprocess data
df = data
df = df.drop(columns=['timestamp_utc'])
#df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
df = df.drop([ 'Senosor'], axis=1)
df = df.ffill()

# Prepare sequences
sequence_length = sequence_length
features = df.drop([ 'datetime', 'Class'], axis=1).values
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
labels = df['Class'].map(class_encoding).values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X = np.array(X)
y = np.array(y)

# Split and scale data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

y_train_cat = tf.keras.utils.to_categorical(y_train, len(class_encoding))
y_test_cat = tf.keras.utils.to_categorical(y_test, len(class_encoding))


class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, d_model, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.d_model = d_model
        self.num_heads = num_heads
        self.depth = d_model // num_heads
        
        self.wq = layers.Dense(d_model)
        self.wk = layers.Dense(d_model)
        self.wv = layers.Dense(d_model)
        self.dense = layers.Dense(d_model)
    
    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])
    
    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        
        q = self.wq(inputs)
        k = self.wk(inputs)
        v = self.wv(inputs)
        
        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)
        
        scaled_attention = tf.matmul(q, k, transpose_b=True)
        scaled_attention = scaled_attention / tf.math.sqrt(tf.cast(self.depth, tf.float32))
        
        attention_weights = tf.nn.softmax(scaled_attention, axis=-1)
        output = tf.matmul(attention_weights, v)
        
        output = tf.transpose(output, perm=[0, 2, 1, 3])
        output = tf.reshape(output, (batch_size, -1, self.d_model))
        
        return self.dense(output)
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "d_model": self.d_model,
            "num_heads": self.num_heads
        })
        return config

class TransformerBlock(layers.Layer):
    def __init__(self, d_model, num_heads, dff, dropout=0.1, **kwargs):
        super().__init__(**kwargs)
        self.d_model = d_model
        self.num_heads = num_heads
        self.dff = dff
        self.dropout_rate = dropout
        
        self.mha = MultiHeadSelfAttention(d_model, num_heads)
        self.ffn = tf.keras.Sequential([
            layers.Dense(dff, activation='relu'),
            layers.Dense(d_model)
        ])
        
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        
        self.dropout1 = layers.Dropout(dropout)
        self.dropout2 = layers.Dropout(dropout)
    
    def call(self, inputs, training=False):
        attn_output = self.mha(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "d_model": self.d_model,
            "num_heads": self.num_heads,
            "dff": self.dff,
            "dropout": self.dropout_rate
        })
        return config

class PositionalEncoding(layers.Layer):
    def __init__(self, max_steps, d_model, **kwargs):
        super().__init__(**kwargs)
        self.max_steps = max_steps
        self.d_model = d_model
        
        # Create positional encoding matrix once during initialization
        position = tf.range(max_steps, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, d_model, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / d_model))
        
        pe = tf.zeros((max_steps, d_model))
        # Use scatter_nd to update sine values
        sine_indices = tf.stack([
            tf.repeat(tf.range(max_steps), tf.shape(div_term)),
            tf.tile(tf.range(0, d_model, 2), [max_steps])
        ], axis=1)
        sine_updates = tf.reshape(tf.sin(position * div_term), [-1])
        pe = tf.tensor_scatter_nd_update(pe, sine_indices, sine_updates)
        
        # Use scatter_nd to update cosine values
        if d_model > 1:
            cosine_indices = tf.stack([
                tf.repeat(tf.range(max_steps), tf.shape(div_term)),
                tf.tile(tf.range(1, d_model, 2), [max_steps])
            ], axis=1)
            cosine_updates = tf.reshape(tf.cos(position * div_term), [-1])
            pe = tf.tensor_scatter_nd_update(pe, cosine_indices, cosine_updates)
        
        self.pe = pe[tf.newaxis, :, :]  # Add batch dimension
        
    def call(self, inputs):
        return inputs + self.pe[:, :tf.shape(inputs)[1], :]
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "max_steps": self.max_steps,
            "d_model": self.d_model
        })
        return config

class TimeSeriesTransformer(Model):
    def __init__(self, 
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 max_seq_len,
                 num_features,
                 num_classes,
                 dropout_rate=0.1):
        super().__init__()
        
        self.num_layers = num_layers
        self.d_model = d_model
        self.num_heads = num_heads
        self.max_seq_len = max_seq_len
        
        # Input projection
        self.input_projection = layers.Dense(d_model)
        
        # Positional encoding
        self.pos_encoding = PositionalEncoding(max_seq_len, d_model)
        
        # Transformer blocks
        self.transformer_blocks = [
            TransformerBlock(d_model, num_heads, dff, dropout_rate)
            for _ in range(num_layers)
        ]
        
        # Output layers
        self.dropout = layers.Dropout(dropout_rate)
        self.global_pooling = layers.GlobalAveragePooling1D()
        self.final_layer = layers.Dense(num_classes, activation='softmax')
        
    def call(self, inputs, training=False):
        # Input projection
        x = self.input_projection(inputs)
        
        # Add positional encoding
        x = self.pos_encoding(x)
        
        # Apply transformer blocks
        for transformer_block in self.transformer_blocks:
            x = transformer_block(x, training=training)
        
        # Global pooling
        x = self.global_pooling(x)
        x = self.dropout(x, training=training)
        
        # Final classification
        return self.final_layer(x)

# Model parameters
num_layers = 4
d_model = 128
num_heads = 8
dff = 256
max_seq_len = sequence_length
num_features = X_train.shape[2]
num_classes = len(class_encoding)
dropout_rate = 0.1

# Create and compile the model
ts_transformer = TimeSeriesTransformer(
    num_layers=num_layers,
    d_model=d_model,
    num_heads=num_heads,
    dff=dff,
    max_seq_len=max_seq_len,
    num_features=num_features,
    num_classes=num_classes,
    dropout_rate=dropout_rate
)

# Compile the model
ts_transformer.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

history = ts_transformer.fit(
    X_train,
    y_train_cat,
    validation_split=0.2,
    epochs=epochs,
    batch_size=128,
    callbacks=[early_stopping],
    verbose=1
)
# Evaluate the model
y_pred = ts_transformer.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test_cat, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision, recall, f1, _ = precision_recall_fscore_support(
    y_test_classes,
    y_pred_classes,
    average='weighted'
)

print("\nTime Series Transformer Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, classification_report

class_report = classification_report(y_test_classes, y_pred_classes,
                                   target_names=list(class_encoding.keys()),
                                   digits=4)

# Print detailed metrics
print("\nModel Accuracy:", accuracy)
print("\nClassification Report:")
print(class_report)

print("Starting inference time measurement...")
inference_times = measure_inference_time(
    model=ts_transformer,
    X_test=X_test,
    y_test=y_test
)

# Optional: Visualize timing distribution
plt.figure(figsize=(10, 5))
plt.hist(inference_times, bins=30)
plt.title('Distribution of Inference Times')
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency')
plt.show()

memory_metrics = measure_subset_memory_usage(
    model=ts_transformer,
    X_test=X_test,
    start_idx=0,
    num_samples=num_samples,  # First 100 samples
    num_runs=num_runs
)


# Informer

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# Load and preprocess data
df = data.copy()
df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
df = df.drop(['timestamp_utc', 'Senosor'], axis=1)
df = df.ffill()

# Prepare sequences
sequence_length = sequence_length
features = df.drop(['timestamp', 'datetime', 'Class'], axis=1).values
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
labels = df['Class'].map(class_encoding).values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X = np.array(X)
y = np.array(y)

# Split and scale data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

y_train_cat = tf.keras.utils.to_categorical(y_train, len(class_encoding))
y_test_cat = tf.keras.utils.to_categorical(y_test, len(class_encoding))


class PositionalEncoding(layers.Layer):
    def __init__(self, max_steps, d_model, **kwargs):
        super().__init__(**kwargs)
        self.max_steps = max_steps
        self.d_model = d_model
        
        # Create positional encoding matrix once during initialization
        position = tf.range(max_steps, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, d_model, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / d_model))
        
        pe = tf.zeros((max_steps, d_model))
        # Use scatter_nd to update sine values
        sine_indices = tf.stack([
            tf.repeat(tf.range(max_steps), tf.shape(div_term)),
            tf.tile(tf.range(0, d_model, 2), [max_steps])
        ], axis=1)
        sine_updates = tf.reshape(tf.sin(position * div_term), [-1])
        pe = tf.tensor_scatter_nd_update(pe, sine_indices, sine_updates)
        
        # Use scatter_nd to update cosine values
        if d_model > 1:
            cosine_indices = tf.stack([
                tf.repeat(tf.range(max_steps), tf.shape(div_term)),
                tf.tile(tf.range(1, d_model, 2), [max_steps])
            ], axis=1)
            cosine_updates = tf.reshape(tf.cos(position * div_term), [-1])
            pe = tf.tensor_scatter_nd_update(pe, cosine_indices, cosine_updates)
        
        self.pe = pe[tf.newaxis, :, :]  # Add batch dimension
        
    def call(self, inputs):
        return inputs + self.pe[:, :tf.shape(inputs)[1], :]
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "max_steps": self.max_steps,
            "d_model": self.d_model
        })
        return config



class ProbSparseAttention(layers.Layer):
    def __init__(self, d_model, num_heads, factor=5, **kwargs):
        super().__init__(**kwargs)
        self.d_model = d_model
        self.num_heads = num_heads
        self.factor = factor
        self.depth = d_model // num_heads
        
        self.wq = layers.Dense(d_model)
        self.wk = layers.Dense(d_model)
        self.wv = layers.Dense(d_model)
        self.dense = layers.Dense(d_model)
    
    def _prob_QK(self, Q, K, sample_k):
        B, H, L_Q, D = tf.shape(Q)[0], tf.shape(Q)[1], tf.shape(Q)[2], tf.shape(Q)[3]
        L_K = tf.shape(K)[2]
        
        Q_K = tf.matmul(Q, K, transpose_b=True)
        Q_K = Q_K / tf.math.sqrt(tf.cast(self.depth, tf.float32))
        
        M = tf.math.reduce_max(Q_K, axis=-1, keepdims=True)
        Q_K = Q_K - M
        Q_K = tf.exp(Q_K)
        
        sample_size = tf.minimum(L_K, sample_k)
        mean_attention = tf.reduce_mean(Q_K, axis=2)
        _, indices = tf.nn.top_k(mean_attention, k=sample_size)
        
        return indices
    
    def call(self, inputs, training=None):
        batch_size = tf.shape(inputs)[0]
        seq_len = tf.shape(inputs)[1]
        
        Q = self.wq(inputs)
        K = self.wk(inputs)
        V = self.wv(inputs)
        
        Q = tf.reshape(Q, (batch_size, -1, self.num_heads, self.depth))
        Q = tf.transpose(Q, perm=[0, 2, 1, 3])
        K = tf.reshape(K, (batch_size, -1, self.num_heads, self.depth))
        K = tf.transpose(K, perm=[0, 2, 1, 3])
        V = tf.reshape(V, (batch_size, -1, self.num_heads, self.depth))
        V = tf.transpose(V, perm=[0, 2, 1, 3])
        
        L_K = tf.shape(K)[2]
        sample_k = tf.cast(tf.math.log(tf.cast(L_K, tf.float32)) * self.factor, tf.int32)
        sample_k = tf.minimum(sample_k, L_K)
        
        indices = self._prob_QK(Q, K, sample_k)
        
        batch_indices = tf.range(batch_size)[:, tf.newaxis, tf.newaxis]
        batch_indices = tf.tile(batch_indices, [1, self.num_heads, sample_k])
        head_indices = tf.range(self.num_heads)[tf.newaxis, :, tf.newaxis]
        head_indices = tf.tile(head_indices, [batch_size, 1, sample_k])
        
        gather_indices = tf.stack([batch_indices, head_indices, indices], axis=-1)
        
        K_sampled = tf.gather_nd(K, gather_indices)
        V_sampled = tf.gather_nd(V, gather_indices)
        
        attention_scores = tf.matmul(Q, K_sampled, transpose_b=True)
        attention_scores = attention_scores / tf.math.sqrt(tf.cast(self.depth, tf.float32))
        
        attention_weights = tf.nn.softmax(attention_scores, axis=-1)
        output = tf.matmul(attention_weights, V_sampled)
        
        output = tf.transpose(output, perm=[0, 2, 1, 3])
        output = tf.reshape(output, (batch_size, -1, self.d_model))
        
        return self.dense(output)

class InformerBlock(layers.Layer):
    def __init__(self, d_model, num_heads, dff, dropout=0.1, factor=5, **kwargs):
        super().__init__(**kwargs)
        self.d_model = d_model
        self.num_heads = num_heads
        self.dff = dff
        self.dropout_rate = dropout
        self.factor = factor
        
        self.prob_attention = ProbSparseAttention(d_model, num_heads, factor)
        self.ffn = tf.keras.Sequential([
            layers.Dense(dff, activation='relu'),
            layers.Dense(d_model)
        ])
        
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        
        self.dropout1 = layers.Dropout(dropout)
        self.dropout2 = layers.Dropout(dropout)
    
    def call(self, inputs, training=None):
        attn_output = self.prob_attention(inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

class TimeSeriesInformer(Model):
    def __init__(self, 
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 max_seq_len,
                 num_features,
                 num_classes,
                 dropout_rate=0.1,
                 factor=5):
        super().__init__()
        
        self.num_layers = num_layers
        self.d_model = d_model
        self.num_heads = num_heads
        self.max_seq_len = max_seq_len
        
        self.input_projection = layers.Dense(d_model)
        self.pos_encoding = PositionalEncoding(max_seq_len, d_model)
        
        self.informer_blocks = [
            InformerBlock(d_model, num_heads, dff, dropout_rate, factor)
            for _ in range(num_layers)
        ]
        
        self.dropout = layers.Dropout(dropout_rate)
        self.global_pooling = layers.GlobalAveragePooling1D()
        self.final_layer = layers.Dense(num_classes, activation='softmax')
        
    def call(self, inputs, training=None):
        x = self.input_projection(inputs)
        x = self.pos_encoding(x)
        
        for informer_block in self.informer_blocks:
            x = informer_block(x, training=training)
        
        x = self.global_pooling(x)
        x = self.dropout(x, training=training)
        
        return self.final_layer(x)


# Model parameters
num_layers = 4
d_model = 128
num_heads = 8
dff = 256
max_seq_len = sequence_length
num_features = X_train.shape[2]
num_classes = len(class_encoding)
dropout_rate = 0.1
factor = 5

# Create and compile the Informer model
ts_informer = TimeSeriesInformer(
    num_layers=num_layers,
    d_model=d_model,
    num_heads=num_heads,
    dff=dff,
    max_seq_len=max_seq_len,
    num_features=num_features,
    num_classes=num_classes,
    dropout_rate=dropout_rate,
    factor=factor
)

# Compile the model
ts_informer.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the model
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

history = ts_informer.fit(
    X_train,
    y_train_cat,
    validation_split=0.2,
    epochs=5,
    batch_size=128,
    callbacks=[early_stopping],
    verbose=1
)

# TST-AE

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns



# Load and preprocess data
df = data
df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
df = df.drop(['timestamp_utc', 'Senosor'], axis=1)
df = df.ffill()

# Prepare sequences
sequence_length = sequence_length
features = df.drop(['timestamp', 'datetime', 'Class'], axis=1).values
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
labels = df['Class'].map(class_encoding).values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X = np.array(X)
y = np.array(y)

# Split and scale data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

y_train_cat = tf.keras.utils.to_categorical(y_train, len(class_encoding))
y_test_cat = tf.keras.utils.to_categorical(y_test, len(class_encoding))


class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, d_model, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.d_model = d_model
        self.num_heads = num_heads
        self.depth = d_model // num_heads
        
        self.wq = layers.Dense(d_model)
        self.wk = layers.Dense(d_model)
        self.wv = layers.Dense(d_model)
        self.dense = layers.Dense(d_model)
    
    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])
    
    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        
        q = self.wq(inputs)
        k = self.wk(inputs)
        v = self.wv(inputs)
        
        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)
        
        scaled_attention = tf.matmul(q, k, transpose_b=True)
        scaled_attention = scaled_attention / tf.math.sqrt(tf.cast(self.depth, tf.float32))
        
        attention_weights = tf.nn.softmax(scaled_attention, axis=-1)
        output = tf.matmul(attention_weights, v)
        
        output = tf.transpose(output, perm=[0, 2, 1, 3])
        output = tf.reshape(output, (batch_size, -1, self.d_model))
        
        return self.dense(output)
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "d_model": self.d_model,
            "num_heads": self.num_heads
        })
        return config

class TransformerBlock(layers.Layer):
    def __init__(self, d_model, num_heads, dff, dropout=0.1, **kwargs):
        super().__init__(**kwargs)
        self.d_model = d_model
        self.num_heads = num_heads
        self.dff = dff
        self.dropout_rate = dropout
        
        self.mha = MultiHeadSelfAttention(d_model, num_heads)
        self.ffn = tf.keras.Sequential([
            layers.Dense(dff, activation='relu'),
            layers.Dense(d_model)
        ])
        
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        
        self.dropout1 = layers.Dropout(dropout)
        self.dropout2 = layers.Dropout(dropout)
    
    def call(self, inputs, training=False):
        attn_output = self.mha(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "d_model": self.d_model,
            "num_heads": self.num_heads,
            "dff": self.dff,
            "dropout": self.dropout_rate
        })
        return config

class PositionalEncoding(layers.Layer):
    def __init__(self, max_steps, d_model, **kwargs):
        super().__init__(**kwargs)
        self.max_steps = max_steps
        self.d_model = d_model
        
        # Create positional encoding matrix once during initialization
        position = tf.range(max_steps, dtype=tf.float32)[:, tf.newaxis]
        div_term = tf.exp(tf.range(0, d_model, 2, dtype=tf.float32) * -(tf.math.log(10000.0) / d_model))
        
        pe = tf.zeros((max_steps, d_model))
        # Use scatter_nd to update sine values
        sine_indices = tf.stack([
            tf.repeat(tf.range(max_steps), tf.shape(div_term)),
            tf.tile(tf.range(0, d_model, 2), [max_steps])
        ], axis=1)
        sine_updates = tf.reshape(tf.sin(position * div_term), [-1])
        pe = tf.tensor_scatter_nd_update(pe, sine_indices, sine_updates)
        
        # Use scatter_nd to update cosine values
        if d_model > 1:
            cosine_indices = tf.stack([
                tf.repeat(tf.range(max_steps), tf.shape(div_term)),
                tf.tile(tf.range(1, d_model, 2), [max_steps])
            ], axis=1)
            cosine_updates = tf.reshape(tf.cos(position * div_term), [-1])
            pe = tf.tensor_scatter_nd_update(pe, cosine_indices, cosine_updates)
        
        self.pe = pe[tf.newaxis, :, :]  # Add batch dimension
        
    def call(self, inputs):
        return inputs + self.pe[:, :tf.shape(inputs)[1], :]
    
    def get_config(self):
        config = super().get_config()
        config.update({
            "max_steps": self.max_steps,
            "d_model": self.d_model
        })
        return config




# [Previous custom layer implementations remain the same - MultiHeadSelfAttention, TransformerBlock, PositionalEncoding]

class TimeSeriesTransformerAutoencoder(Model):
    def __init__(self, 
                 num_layers,
                 d_model,
                 num_heads,
                 dff,
                 max_seq_len,
                 num_features,
                 num_classes,
                 dropout_rate=0.1):
        super().__init__()
        
        self.num_layers = num_layers
        self.d_model = d_model
        self.num_heads = num_heads
        self.max_seq_len = max_seq_len
        self.num_features = num_features
        
        # Input projection
        self.input_projection = layers.Dense(d_model)
        
        # Positional encoding
        self.pos_encoding = PositionalEncoding(max_seq_len, d_model)
        
        # Encoder transformer blocks
        self.encoder_blocks = [
            TransformerBlock(d_model, num_heads, dff, dropout_rate)
            for _ in range(num_layers)
        ]
        
        # Bottleneck
        self.bottleneck = layers.Dense(d_model)
        
        # Decoder transformer blocks
        self.decoder_blocks = [
            TransformerBlock(d_model, num_heads, dff, dropout_rate)
            for _ in range(num_layers)
        ]
        
        # Reconstruction output
        self.reconstruction_layer = layers.Dense(num_features)
        
        # Classification layers
        self.global_pooling = layers.GlobalAveragePooling1D()
        self.classifier_dense1 = layers.Dense(128, activation='relu')
        self.dropout = layers.Dropout(dropout_rate)
        self.classifier_dense2 = layers.Dense(64, activation='relu')
        self.classification_layer = layers.Dense(num_classes, activation='softmax')
    
    def call(self, inputs, training=False):
        # Input projection and positional encoding
        x = self.input_projection(inputs)
        x = self.pos_encoding(x)
        
        # Encoder
        for encoder_block in self.encoder_blocks:
            x = encoder_block(x, training=training)
        
        # Store encoded representation
        encoded = x
        
        # Classification branch
        class_features = self.global_pooling(encoded)
        class_features = self.classifier_dense1(class_features)
        class_features = self.dropout(class_features, training=training)
        class_features = self.classifier_dense2(class_features)
        classified = self.classification_layer(class_features)
        
        # Decoder branch
        decoder_features = self.bottleneck(encoded)
        for decoder_block in self.decoder_blocks:
            decoder_features = decoder_block(decoder_features, training=training)
        reconstructed = self.reconstruction_layer(decoder_features)
        
        return {
            'reconstruction_output': reconstructed,
            'classification_output': classified
        }

# Model parameters
num_layers = 4
d_model = 128
num_heads = 8
dff = 256
max_seq_len = sequence_length
num_features = X_train.shape[2]
num_classes = len(class_encoding)
dropout_rate = 0.1

# Create model
tst_ae = TimeSeriesTransformerAutoencoder(
    num_layers=num_layers,
    d_model=d_model,
    num_heads=num_heads,
    dff=dff,
    max_seq_len=max_seq_len,
    num_features=num_features,
    num_classes=num_classes,
    dropout_rate=dropout_rate
)

# Create a sample input to build the model
sample_input = tf.zeros((1, sequence_length, num_features))
_ = tst_ae(sample_input)

# Compile model
tst_ae.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss={
        'reconstruction_output': 'mse',
        'classification_output': 'categorical_crossentropy'
    },
    loss_weights={
        'reconstruction_output': 0.3,
        'classification_output': 0.7
    },
    metrics={
        'classification_output': ['accuracy']
    }
)

# Train model
early_stopping = EarlyStopping(
    monitor='val_classification_output_accuracy',
    mode='max',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

history = tst_ae.fit(
    X_train,
    {
        'reconstruction_output': X_train,
        'classification_output': y_train_cat
    },
    validation_split=0.2,
    epochs=epochs,
    batch_size=128,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate the model
predictions = tst_ae.predict(X_test)
reconstructed_sequences = predictions['reconstruction_output']
y_pred = predictions['classification_output']
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test_cat, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_test_classes, y_pred_classes)
precision, recall, f1, _ = precision_recall_fscore_support(
    y_test_classes,
    y_pred_classes,
    average='weighted'
)

print("\nTST-Autoencoder Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, classification_report

class_report = classification_report(y_test_classes, y_pred_classes,
                                   target_names=list(class_encoding.keys()),
                                   output_dict=True)

# Print detailed metrics
print("\nModel Accuracy:", f"{accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_classes, y_pred_classes,
                          target_names=list(class_encoding.keys()),
                          digits=4))  # Just add this digits parameter

print("Starting inference time measurement...")
inference_times = measure_inference_time(
    model=tst_ae,
    X_test=X_test,
    y_test=y_test
)

# Optional: Visualize timing distribution
plt.figure(figsize=(10, 5))
plt.hist(inference_times, bins=30)
plt.title('Distribution of Inference Times')
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency')
plt.show()

print("Starting memory profiling for subset...")
memory_metrics = measure_subset_memory_usage(
    model=tst_ae,
    X_test=X_test,
    start_idx=0,
    num_samples=num_samples,  # First 100 samples
    num_runs=num_runs
)

# LSTM-AE

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns



# Load and preprocess data
df = data
df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)
df = df.drop(['timestamp_utc', 'Senosor'], axis=1)
df = df.ffill()

# Prepare sequences
sequence_length = sequence_length
features = df.drop(['timestamp', 'datetime', 'Class'], axis=1).values
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
labels = df['Class'].map(class_encoding).values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X = np.array(X)
y = np.array(y)

# Split and scale data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

y_train_cat = tf.keras.utils.to_categorical(y_train, len(class_encoding))
y_test_cat = tf.keras.utils.to_categorical(y_test, len(class_encoding))

import tensorflow as tf
from tensorflow.keras import layers, Model

class LSTMAutoencoder(Model):
    def __init__(self,
                 sequence_length,
                 num_features,
                 num_classes,
                 lstm_units=128,
                 latent_dim=64,
                 dropout_rate=0.1):
        super().__init__()
        self.sequence_length = sequence_length
        self.num_features = num_features
        self.lstm_units = lstm_units
        
        # Encoder
        self.encoder_lstm1 = layers.LSTM(lstm_units, return_sequences=True)
        self.encoder_lstm2 = layers.LSTM(lstm_units // 2, return_sequences=True)
        self.encoder_lstm3 = layers.LSTM(latent_dim, return_sequences=True)
        
        # Classifier branch
        self.global_pooling = layers.GlobalAveragePooling1D()
        self.classifier_dense1 = layers.Dense(128, activation='relu')
        self.dropout1 = layers.Dropout(dropout_rate)
        self.classifier_dense2 = layers.Dense(64, activation='relu')
        self.classifier_output = layers.Dense(num_classes, activation='softmax')
        
        # Decoder
        self.decoder_lstm1 = layers.LSTM(latent_dim, return_sequences=True)
        self.decoder_lstm2 = layers.LSTM(lstm_units // 2, return_sequences=True)
        self.decoder_lstm3 = layers.LSTM(lstm_units, return_sequences=True)
        self.decoder_output = layers.Dense(num_features)
        
    def call(self, inputs, training=False):
        # Encoder
        x = self.encoder_lstm1(inputs)
        x = self.encoder_lstm2(x)
        encoded = self.encoder_lstm3(x)
        
        # Classification branch
        class_features = self.global_pooling(encoded)
        class_features = self.classifier_dense1(class_features)
        class_features = self.dropout1(class_features, training=training)
        class_features = self.classifier_dense2(class_features)
        classified = self.classifier_output(class_features)
        
        # Decoder branch
        decoded = self.decoder_lstm1(encoded)
        decoded = self.decoder_lstm2(decoded)
        decoded = self.decoder_lstm3(decoded)
        reconstructed = self.decoder_output(decoded)
        
        return {
            'reconstruction_output': reconstructed,
            'classification_output': classified
        }

# Get number of features from the training data
num_features = X_train.shape[2]

# Create and compile LSTM-AE model
lstm_ae = LSTMAutoencoder(
    sequence_length=sequence_length,
    num_features=num_features,
    num_classes=len(class_encoding),
    lstm_units=128,
    latent_dim=64,
    dropout_rate=0.1
)

# Build model with sample input
sample_input = tf.zeros((1, sequence_length, num_features))
_ = lstm_ae(sample_input)

# Compile model
lstm_ae.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss={
        'reconstruction_output': 'mse',
        'classification_output': 'categorical_crossentropy'
    },
    loss_weights={
        'reconstruction_output': 0.3,
        'classification_output': 0.7
    },
    metrics={
        'classification_output': ['accuracy']
    }
)

# Train LSTM-AE model
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_classification_output_accuracy',
    mode='max',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

lstm_history = lstm_ae.fit(
    X_train,
    {
        'reconstruction_output': X_train,
        'classification_output': y_train_cat
    },
    validation_split=0.2,
    epochs=epochs,
    batch_size=128,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate LSTM-AE model
lstm_predictions = lstm_ae.predict(X_test)
lstm_reconstructed = lstm_predictions['reconstruction_output']
lstm_y_pred = lstm_predictions['classification_output']
lstm_y_pred_classes = np.argmax(lstm_y_pred, axis=1)

# Get true test classes from one-hot encoded format
y_test_classes = np.argmax(y_test_cat, axis=1)

# Calculate metrics for LSTM-AE
lstm_accuracy = accuracy_score(y_test_classes, lstm_y_pred_classes)
lstm_precision, lstm_recall, lstm_f1, _ = precision_recall_fscore_support(
    y_test_classes,
    lstm_y_pred_classes,
    average='weighted'
)

print("\nLSTM-Autoencoder Performance:")
print(f"Accuracy: {lstm_accuracy:.4f}")
print(f"Precision: {lstm_precision:.4f}")
print(f"Recall: {lstm_recall:.4f}")
print(f"F1-score: {lstm_f1:.4f}")



from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, classification_report

class_report = classification_report(y_test_classes, lstm_y_pred_classes,
                                   target_names=list(class_encoding.keys()),
                                   output_dict=True)


print("\nClassification Report:")
print(classification_report(y_test_classes, lstm_y_pred_classes,
                          target_names=list(class_encoding.keys()),
                          digits=4))  # Just add this digits parameter

print("Starting inference time measurement...")
inference_times = measure_inference_time(
    model=lstm_ae,
    X_test=X_test,
    y_test=y_test
)

# Optional: Visualize timing distribution
plt.figure(figsize=(10, 5))
plt.hist(inference_times, bins=30)
plt.title('Distribution of Inference Times')
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency')
plt.show()

print("Starting memory profiling for subset...")
memory_metrics = measure_subset_memory_usage(
    model=lstm_ae,
    X_test=X_test,
    start_idx=0,
    num_samples=num_samples,  # First 100 samples
    num_runs=num_runs
)
