# Integration Testing

This notebook performs end-to-end integration testing of the Industrial Data System components.

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import time

# Add parent directories to path for module imports
notebook_dir = Path(os.path.abspath(''))
project_root = notebook_dir.parent.parent  # Go up two levels from notebooks/
sys.path.insert(0, str(project_root))

print("Imports successful!")
print(f"Python version: {sys.version}")
print(f"Working directory: {os.getcwd()}")
print(f"Project root: {project_root}")

## 1. Test All Import Paths

In [None]:
print("Testing module imports...\n")
test_results = []

# Test imports
imports_to_test = [
    ('industrial_data_system.utils.asc_utils', ['load_and_process_asc_file', 'convert_asc_to_parquet']),
    ('industrial_data_system.core.config', ['get_config']),
    ('industrial_data_system.core.db_manager', ['DatabaseManager']),
]

for module_name, items in imports_to_test:
    try:
        if items:
            exec(f"from {module_name} import {', '.join(items)}")
        else:
            exec(f"import {module_name}")
        print(f"✓ {module_name}")
        test_results.append((module_name, True, None))
    except Exception as e:
        print(f"✗ {module_name}: {str(e)}")
        test_results.append((module_name, False, str(e)))

success_count = sum(1 for _, success, _ in test_results if success)
print(f"\nImport Results: {success_count}/{len(test_results)} successful")

## 2. Test Data Pipeline (Load → Process → Transform)

In [None]:
from Normal_data_to_fft import (
    load_and_process_asc_file,
    extract_pump_channels,
    process_all_channels_from_asc
)

print("Testing complete data pipeline...\n")

# Step 1: Load
start_time = time.time()
test_file = '../Tests/Data/V24-2025__0011_2.ASC'

if os.path.exists(test_file):
    df = load_and_process_asc_file(test_file)
    load_time = time.time() - start_time
    print(f"✓ Step 1 - Load: {df.shape[0]} samples loaded in {load_time:.2f}s")
    
    # Step 2: Extract channels
    start_time = time.time()
    data = extract_pump_channels(df)
    extract_time = time.time() - start_time
    print(f"✓ Step 2 - Extract: {len(data)} channels extracted in {extract_time:.2f}s")
    
    # Step 3: FFT Transform
    start_time = time.time()
    freqs, fft_features, feature_matrix = process_all_channels_from_asc(
        data, window_size=1024, overlap=0.5, max_freq=500.0
    )
    fft_time = time.time() - start_time
    print(f"✓ Step 3 - FFT Transform: {feature_matrix.shape} features in {fft_time:.2f}s")
    
    print(f"\nTotal pipeline time: {load_time + extract_time + fft_time:.2f}s")
    pipeline_success = True
else:
    print(f"✗ Test file not found: {test_file}")
    print("Creating synthetic data for testing...")
    
    # Create synthetic data
    t = np.linspace(0, 50, 5000)
    data = {
        'Messzeit[s]': t,
        'Pressure [bar]': 50 + 5*np.sin(2*np.pi*0.5*t) + np.random.normal(0, 0.5, len(t)),
        'Flow [L/min]': 100 + 10*np.sin(2*np.pi*0.3*t) + np.random.normal(0, 1, len(t)),
        'Speed [rpm]': 1500 + 50*np.sin(2*np.pi*0.2*t) + np.random.normal(0, 5, len(t)),
    }
    
    freqs, fft_features, feature_matrix = process_all_channels_from_asc(
        data, window_size=1024, overlap=0.5, max_freq=500.0
    )
    print("✓ Synthetic data pipeline complete")
    pipeline_success = True

## 3. Test Database Operations

In [None]:
from industrial_data_system.core.db_manager import DatabaseManager

print("Testing database operations...\n")

try:
    # Initialize database
    db_manager = DatabaseManager()
    print(f"✓ Database initialized")
    
    # Test model registry operations
    models = db_manager.get_all_models()
    print(f"✓ Model registry accessible: {len(models) if models else 0} models found")
    
    db_success = True
except Exception as e:
    print(f"✗ Database error: {str(e)}")
    db_success = False

## 4. Test Configuration Management

In [None]:
from industrial_data_system.core.config import get_config

print("Testing configuration management...\n")

try:
    config = get_config()
    print(f"✓ Configuration loaded")
    
    # Display some config values
    if hasattr(config, '__dict__'):
        print("\nConfiguration values:")
        for key, value in list(config.__dict__.items())[:5]:
            print(f"  {key}: {value}")
    
    config_success = True
except Exception as e:
    print(f"✗ Configuration error: {str(e)}")
    config_success = False

## 5. Test File Format Support

In [None]:
from industrial_data_system.utils.asc_utils import (
    load_and_process_asc_file,
    load_and_process_csv_file,
    load_and_process_tdms_file
)

print("Testing file format support...\n")

format_support = {
    'ASC': True,  # Already tested
    'CSV': False,
    'TDMS': False
}

# Test CSV loading (if CSV file exists)
try:
    # This is just testing if the function exists and is callable
    if callable(load_and_process_csv_file):
        print("✓ CSV loader available")
        format_support['CSV'] = True
except Exception as e:
    print(f"✗ CSV loader: {str(e)}")

# Test TDMS loading
try:
    if callable(load_and_process_tdms_file):
        print("✓ TDMS loader available")
        format_support['TDMS'] = True
except Exception as e:
    print(f"✗ TDMS loader: {str(e)}")

print(f"\nSupported formats: {', '.join([k for k, v in format_support.items() if v])}")

## 6. Performance Benchmarks

In [None]:
print("Running performance benchmarks...\n")

# Benchmark data sizes
data_sizes = [1000, 5000, 10000]
benchmark_results = []

for size in data_sizes:
    # Create synthetic data
    t = np.linspace(0, size/100, size)
    test_data = {
        'Messzeit[s]': t,
        'Channel1': np.sin(2*np.pi*t) + np.random.normal(0, 0.1, size),
        'Channel2': np.cos(2*np.pi*t) + np.random.normal(0, 0.1, size),
        'Channel3': np.sin(4*np.pi*t) + np.random.normal(0, 0.1, size),
    }
    
    # Benchmark FFT processing
    start_time = time.time()
    freqs, fft_features, feature_matrix = process_all_channels_from_asc(
        test_data, window_size=min(512, size//4), overlap=0.5, max_freq=100.0
    )
    elapsed = time.time() - start_time
    
    benchmark_results.append((size, elapsed))
    print(f"  Size {size:6d}: {elapsed:.3f}s ({size/elapsed:.0f} samples/s)")

# Plot benchmark results
fig, ax = plt.subplots(figsize=(10, 6))
sizes, times = zip(*benchmark_results)
ax.plot(sizes, times, 'o-', linewidth=2, markersize=8)
ax.set_xlabel('Data Size (samples)', fontsize=12)
ax.set_ylabel('Processing Time (seconds)', fontsize=12)
ax.set_title('FFT Processing Performance', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 7. Memory Usage Analysis

In [None]:
import sys

print("Analyzing memory usage...\n")

# Check memory usage of main data structures
if 'df' in locals():
    df_memory = df.memory_usage(deep=True).sum() / 1024**2  # MB
    print(f"  DataFrame: {df_memory:.2f} MB")

if 'feature_matrix' in locals():
    matrix_memory = feature_matrix.nbytes / 1024**2  # MB
    print(f"  Feature Matrix: {matrix_memory:.2f} MB")

if 'fft_features' in locals():
    fft_memory = sum(v.nbytes for v in fft_features.values()) / 1024**2  # MB
    print(f"  FFT Features: {fft_memory:.2f} MB")

# Total memory footprint estimate
total_memory = 0
if 'df_memory' in locals():
    total_memory += df_memory
if 'matrix_memory' in locals():
    total_memory += matrix_memory
if 'fft_memory' in locals():
    total_memory += fft_memory

print(f"\n  Total estimated memory: {total_memory:.2f} MB")

## 8. Error Handling Tests

In [None]:
print("Testing error handling...\n")

error_tests = []

# Test 1: Non-existent file
try:
    load_and_process_asc_file('nonexistent_file.asc')
    error_tests.append(('Non-existent file', False, 'Should have raised error'))
except Exception as e:
    error_tests.append(('Non-existent file', True, str(type(e).__name__)))
    print(f"✓ Non-existent file handling: {type(e).__name__}")

# Test 2: Invalid data
try:
    invalid_data = {'time': [1, 2, 3]}  # Missing required structure
    process_all_channels_from_asc(invalid_data)
    error_tests.append(('Invalid data', False, 'Should have raised error'))
except Exception as e:
    error_tests.append(('Invalid data', True, str(type(e).__name__)))
    print(f"✓ Invalid data handling: {type(e).__name__}")

# Test 3: Empty data
try:
    empty_data = {'Messzeit[s]': np.array([])}
    process_all_channels_from_asc(empty_data)
    error_tests.append(('Empty data', False, 'Should have raised error'))
except Exception as e:
    error_tests.append(('Empty data', True, str(type(e).__name__)))
    print(f"✓ Empty data handling: {type(e).__name__}")

print(f"\nError handling: {sum(1 for _, success, _ in error_tests if success)}/{len(error_tests)} tests passed")

## 9. Integration Test Summary Report

In [None]:
print("\n" + "=" * 70)
print("INTEGRATION TEST SUMMARY REPORT")
print("=" * 70)

# Collect all test results
test_categories = [
    ('Module Imports', success_count, len(test_results)),
    ('Data Pipeline', 1 if pipeline_success else 0, 1),
    ('Database Operations', 1 if db_success else 0, 1),
    ('Configuration', 1 if config_success else 0, 1),
    ('File Format Support', sum(format_support.values()), len(format_support)),
    ('Error Handling', sum(1 for _, s, _ in error_tests if s), len(error_tests)),
]

total_passed = sum(passed for _, passed, _ in test_categories)
total_tests = sum(total for _, _, total in test_categories)

print(f"\nTest Results by Category:")
print("-" * 70)
for category, passed, total in test_categories:
    percentage = (passed/total*100) if total > 0 else 0
    status = "✓" if passed == total else "⚠" if passed > 0 else "✗"
    print(f"{status} {category:30s}: {passed:2d}/{total:2d} ({percentage:5.1f}%)")

print("-" * 70)
overall_percentage = (total_passed/total_tests*100) if total_tests > 0 else 0
print(f"\nOverall Test Results: {total_passed}/{total_tests} ({overall_percentage:.1f}%)")

if overall_percentage >= 90:
    print("\n✓ System Status: EXCELLENT - All systems operational")
elif overall_percentage >= 70:
    print("\n⚠ System Status: GOOD - Minor issues detected")
elif overall_percentage >= 50:
    print("\n⚠ System Status: FAIR - Some components need attention")
else:
    print("\n✗ System Status: POOR - Significant issues detected")

print("\n" + "=" * 70)

# Performance summary
if benchmark_results:
    print("\nPerformance Summary:")
    avg_time_per_1k = np.mean([t/(s/1000) for s, t in benchmark_results])
    print(f"  Average processing time: {avg_time_per_1k:.3f}s per 1000 samples")

if 'total_memory' in locals():
    print(f"\nMemory Usage:")
    print(f"  Total memory footprint: {total_memory:.2f} MB")

print("\n" + "=" * 70)

## 10. Generate Test Report (Optional)

In [None]:
# Save test report to file
report_file = 'integration_test_report.txt'

with open(report_file, 'w') as f:
    f.write("="*70 + "\n")
    f.write("INTEGRATION TEST REPORT\n")
    f.write(f"Generated: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write("="*70 + "\n\n")
    
    f.write("Test Results by Category:\n")
    f.write("-"*70 + "\n")
    for category, passed, total in test_categories:
        percentage = (passed/total*100) if total > 0 else 0
        f.write(f"{category:30s}: {passed:2d}/{total:2d} ({percentage:5.1f}%)\n")
    
    f.write("-"*70 + "\n")
    f.write(f"\nOverall: {total_passed}/{total_tests} ({overall_percentage:.1f}%)\n")
    f.write("="*70 + "\n")

print(f"\n✓ Test report saved to: {report_file}")