In [25]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def load_time_series_data(file_path):
    """
    Load time series data from a CSV file.
    Assumes the CSV has a single column of time series data.
    """
    data = pd.read_csv(file_path, header=None)
    return data.values.flatten()

def sliding_window(time_series, window_size, stride):
    """
    Segment the time series into fixed-sized, overlapping windows.
    """
    windows = []
    for i in range(0, len(time_series) - window_size + 1, stride):
        windows.append(time_series[i:i + window_size])
    return np.array(windows)

def normalize_windows(windows):
    """
    Normalize each window to ensure it forms a valid probability distribution.
    """
    scaler = MinMaxScaler()
    normalized_windows = np.array([scaler.fit_transform(window.reshape(-1, 1)).flatten() for window in windows])
    return normalized_windows

def preprocess_data(file_path, window_size, stride):
    """
    Preprocess the time series data:
    1. Load the data.
    2. Apply sliding window segmentation.
    3. Normalize the windows.
    """
    # Load time series data
    time_series = load_time_series_data(file_path)

    # Apply sliding window
    windows = sliding_window(time_series, window_size, stride)

    # Normalize windows
    normalized_windows = normalize_windows(windows)

    return normalized_windows

# Example usage
if __name__ == "__main__":
    # Parameters
    file_path = '178_UCR_Anomaly_ltstdbs30791AI_17555_52600_52800.txt'  # Replace with your file path
    window_size = 8  # Size of each sliding window
    stride = 2  # Stride for overlapping windows

    # Preprocess data
    preprocessed_data = preprocess_data(file_path, window_size, stride)
    print("Preprocessed data shape:", preprocessed_data.shape)
    print("First window:", preprocessed_data[0])

Preprocessed data shape: (28058, 8)
First window: [1.   0.5  0.5  0.5  0.25 0.25 0.25 0.  ]
