In [None]:
import sys
from pathlib import Path
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

# Add project root to path
project_root = Path.cwd().parent
sys.path.append(str(project_root))

from preprocessing.config import (
    Config, DatasetConfig, PreprocessingConfig,
    AugmentationConfig, DataLoaderConfig, TransformConfig
)
from preprocessing.dataloader.pipeline import TimeSeriesPipeline

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)


In [None]:
# Create synthetic time series data
n_samples = 100
seq_length = 50
n_channels = 1

# Generate sine waves with different frequencies
t = np.linspace(0, 10, seq_length)
data = []
targets = []

for i in range(n_samples):
    freq = np.random.uniform(0.5, 2.0)
    phase = np.random.uniform(0, 2 * np.pi)
    amplitude = np.random.uniform(0.5, 2.0)
    
    # Add some noise
    signal = amplitude * np.sin(2 * np.pi * freq * t + phase)
    noise = np.random.normal(0, 0.1, seq_length)
    signal = signal + noise
    
    data.append(signal)
    targets.append(freq > 1.25)  # Binary classification based on frequency

data = np.array(data).reshape(n_samples, seq_length, n_channels)
targets = np.array(targets)

# Convert to torch tensors
data = torch.FloatTensor(data)
targets = torch.LongTensor(targets)

# Create a simple dataset
from torch.utils.data import TensorDataset
dataset = TensorDataset(data, targets)


In [None]:
# Create pipeline configuration
config = Config(
    dataset=DatasetConfig(name="synthetic"),
    preprocessing=PreprocessingConfig(
        transforms=[
            TransformConfig(
                name="MinMaxScaler",
                params={"feature_range": [0, 1]}
            ),
            TransformConfig(
                name="StandardScaler",
                params={"epsilon": 1e-8}
            )
        ]
    ),
    augmentation=AugmentationConfig(
        enabled=True,
        methods=["linear_combo"],
        linear_combo_ratio=0.5
    ),
    dataloader=DataLoaderConfig(
        batch_size=32,
        shuffle=True
    )
)

# Create pipeline
pipeline = TimeSeriesPipeline(config)

# Fit transforms
pipeline.fit_transforms(dataset)

# Create dataloader
dataloader = pipeline.create_dataloader(dataset)


In [None]:
# Visualize original and processed data
def plot_samples(original_batch, processed_batch, num_samples=3):
    fig, axes = plt.subplots(2, num_samples, figsize=(15, 6))
    fig.suptitle('Original vs Processed Time Series')
    
    for i in range(num_samples):
        # Plot original
        axes[0, i].plot(original_batch[i, :, 0].numpy())
        axes[0, i].set_title(f'Original {i+1}')
        axes[0, i].grid(True)
        
        # Plot processed
        axes[1, i].plot(processed_batch[i, :, 0].numpy())
        axes[1, i].set_title(f'Processed {i+1}')
        axes[1, i].grid(True)
    
    plt.tight_layout()
    plt.show()

# Get a batch of data
original_batch = next(iter(DataLoader(dataset, batch_size=3, shuffle=True)))[0]
processed_batch = next(iter(dataloader))[0]

plot_samples(original_batch, processed_batch)
