In [None]:
import pandas as pd
data = pd.read_csv('sunnyside_percent_25.csv')
data.head()

# Inference Time Measurement

In [None]:
import time
import numpy as np

def measure_inference_time(model, X_test, y_test):
    """
    Measure inference time
    """
    # Measure baseline overhead
    c = 0
    t1 = time.time()
    for i in range(100):
        x = 2
    t1 = (time.time() - t1) / 100
    
    # Store timing results
    arr = []
    
    # Process each test sample
    for i in range(len(X_test[10])):
        # Get single test sample
        test_sample = X_test[i:i+1]  # Keep the batch dimension
        
        # Measure inference time with 50 iterations
        t2 = time.time()
        for _ in range(100):
            _ = model.predict(test_sample, verbose=0)
        t2 = (time.time() - t2) / 100
        
        # Calculate net inference time
        inference_time = t2 - t1
        arr.append(inference_time)
    
    # Calculate statistics
    avg_time = np.mean(arr)
    std_time = np.std(arr)
    
    print("\nInference Time Statistics:")
    print(f"Average inference time per sample: {avg_time:.4f} seconds")
    print(f"Standard deviation: {std_time:.4f} seconds")
    print(f"Min time: {min(arr):.4f} seconds")
    print(f"Max time: {max(arr):.4f} seconds")
    
    return arr

# Memory Measurement

In [None]:
import numpy as np
import psutil
from memory_profiler import memory_usage

def measure_subset_memory_usage(model, X_test, start_idx=0, num_samples=100, num_runs=10):
    """
    Measure memory usage for processing a subset of test data
    
    Parameters:
    -----------
    model : ML model object
        The model to evaluate
    X_test : numpy.ndarray
        Test data
    start_idx : int
        Starting index for the subset
    num_samples : int
        Number of samples to include in the subset
    num_runs : int
        Number of times to repeat the measurement for reliability
    """
    memory_results = []
    
    # Get test subset using array indexing
    X_subset = X_test[start_idx:start_idx + num_samples]
    
    # Get baseline memory
    baseline_memory = psutil.Process().memory_info().rss / 1024 / 1024  # MB
    print(f"Baseline memory: {baseline_memory:.2f} MB")
    
    print(f"\nMeasuring memory usage for {len(X_subset)} samples ({num_runs} runs)...")
    
    # Function to measure
    def predict_subset():
        return model.predict(X_subset)
    
    # Repeat measurement multiple times
    for i in range(num_runs):
        # Memory profiling for the subset
        mem_usage = memory_usage(
            (predict_subset, (), {}),
            interval=0.005,  # Adjusted to 5ms sampling interval
            max_iterations=1,
            include_children=True
        )
        
        # Calculate peak memory usage for this run
        peak_memory = max(mem_usage) - baseline_memory
        memory_results.append(peak_memory)
        print(f"Run {i+1}/{num_runs}: Peak memory usage: {peak_memory:.2f} MB")
    
    # Calculate statistics
    memory_stats = {
        'mean': np.mean(memory_results),
        'std': np.std(memory_results),
        'min': np.min(memory_results),
        'max': np.max(memory_results),
        'per_sample_mean': np.mean(memory_results) / len(X_subset)
    }
    
    print("\nMemory Usage Statistics (for subset):")
    print(f"Subset size: {len(X_subset)} samples")
    print(f"Average peak memory for subset: {memory_stats['mean']:.2f} MB")
    print(f"Standard deviation: {memory_stats['std']:.2f} MB")
    print(f"Min peak memory: {memory_stats['min']:.2f} MB")
    print(f"Max peak memory: {memory_stats['max']:.2f} MB")
    print(f"Average memory per sample: {memory_stats['per_sample_mean']:.4f} MB")
    
    return {
        'memory_results': memory_results,
        'memory_stats': memory_stats,
        'baseline_memory': baseline_memory,
        'subset_size': len(X_subset)
    }

# Data Preprocessing

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Conv1D, BatchNormalization, Activation, Add, GlobalAveragePooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Load the data
df = data.copy()

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp_utc'], unit='s')
df['datetime'] = pd.to_datetime(df['datetime'], utc=True)

# Drop unnecessary columns
df = df.drop(['timestamp_utc', 'Senosor'], axis=1)

# Handle missing values
df = df.ffill()

# Encode the 'Class' column
class_encoding = {'clean': 0, 'random': 1, 'malfunction': 2, 'drift': 3, 'bias': 4}
df['Class'] = df['Class'].map(class_encoding)

# Prepare data for TCN
sequence_length = sequence_length
features = df.drop(['timestamp', 'datetime', 'Class'], axis=1).values
labels = df['Class'].values

X, y = [], []
for i in range(len(df) - sequence_length):
    X.append(features[i:i+sequence_length])
    y.append(labels[i+sequence_length])

X, y = np.array(X), np.array(y)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape)

# Convert labels to categorical
num_classes = len(class_encoding)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)


# Model Loading

In [None]:
# Model Loading
from tensorflow.keras.models import load_model
resnet_model = load_model('ResNet.keras')
resnet_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
print("Starting inference time measurement...")
inference_times = measure_inference_time(
    model=resnet_model,
    X_test=X_test,
    y_test=y_test
)

# Usage example:
print("Starting memory profiling for subset...")
memory_metrics = measure_subset_memory_usage(
    model=resnet_model,
    X_test=X_test,
    start_idx=0,
    num_samples=20000,  
    num_runs=10
)