# Visualization Testing

This notebook tests various visualization components and plotting functions.

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

# Add parent directory to path
sys.path.insert(0, os.path.abspath('..'))
sys.path.insert(0, os.path.join(os.path.abspath('..'), 'Tests'))

print("Imports successful!")

## 1. Load Test Data

In [None]:
from Normal_data_to_fft import (
    load_and_process_asc_file,
    extract_pump_channels,
    process_all_channels_from_asc
)

# Load test data
test_file = '../Tests/Data/V24-2025__0011_2.ASC'

if os.path.exists(test_file):
    df = load_and_process_asc_file(test_file)
    data = extract_pump_channels(df)
    print(f"✓ Loaded real data: {df.shape}")
else:
    # Create synthetic data
    print("Creating synthetic data...")
    t = np.linspace(0, 50, 5000)
    data = {
        'Messzeit[s]': t,
        'Pressure [bar]': 50 + 5*np.sin(2*np.pi*0.5*t) + 2*np.sin(2*np.pi*5*t) + np.random.normal(0, 0.5, len(t)),
        'Flow [L/min]': 100 + 10*np.sin(2*np.pi*0.3*t) + 3*np.sin(2*np.pi*3*t) + np.random.normal(0, 1, len(t)),
        'Speed [rpm]': 1500 + 50*np.sin(2*np.pi*0.2*t) + 20*np.sin(2*np.pi*2*t) + np.random.normal(0, 5, len(t)),
        'Torque [Nm]': 150 + 20*np.sin(2*np.pi*0.4*t) + 5*np.sin(2*np.pi*4*t) + np.random.normal(0, 2, len(t)),
        'Temperature [°C]': 65 + 3*np.sin(2*np.pi*0.1*t) + np.random.normal(0, 0.3, len(t))
    }
    df = pd.DataFrame(data)
    print("✓ Created synthetic data")

time_key = 'Messzeit[s]' if 'Messzeit[s]' in data else list(data.keys())[0]
channel_names = [k for k in data.keys() if k != time_key]

## 2. Time Series Line Plots

In [None]:
# Multi-channel time series plot
channels_to_plot = channel_names[:4]
fig, axes = plt.subplots(len(channels_to_plot), 1, figsize=(15, 3*len(channels_to_plot)))

if len(channels_to_plot) == 1:
    axes = [axes]

fig.suptitle('Multi-Channel Time Series Visualization', fontsize=16, fontweight='bold')

colors = plt.cm.Set2(np.linspace(0, 1, len(channels_to_plot)))

for i, channel in enumerate(channels_to_plot):
    axes[i].plot(data[time_key], data[channel], linewidth=1.0, color=colors[i], alpha=0.8)
    axes[i].set_ylabel(channel, fontsize=11, fontweight='bold')
    axes[i].grid(True, alpha=0.3, linestyle='--')
    axes[i].set_xlim([data[time_key][0], data[time_key][-1]])
    
    # Add mean line
    mean_val = np.mean(data[channel])
    axes[i].axhline(y=mean_val, color='red', linestyle='--', linewidth=1.5, alpha=0.5, label=f'Mean: {mean_val:.2f}')
    axes[i].legend(loc='upper right')

axes[-1].set_xlabel('Time [s]', fontsize=11, fontweight='bold')
plt.tight_layout()
plt.show()

## 3. Correlation Matrix Heatmap

In [None]:
# Create correlation matrix
df_numeric = df.select_dtypes(include=[np.number])
correlation_matrix = df_numeric.corr()

# Plot heatmap
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', 
            center=0, square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Channel Correlation Matrix', fontsize=14, fontweight='bold', pad=20)
plt.tight_layout()
plt.show()

print("\nHighly Correlated Pairs (|r| > 0.7):")
for i in range(len(correlation_matrix.columns)):
    for j in range(i+1, len(correlation_matrix.columns)):
        if abs(correlation_matrix.iloc[i, j]) > 0.7:
            print(f"  {correlation_matrix.columns[i]} <-> {correlation_matrix.columns[j]}: {correlation_matrix.iloc[i, j]:.3f}")

## 4. Distribution Plots (Histograms + KDE)

In [None]:
# Distribution plots for all channels
n_channels = len(channel_names)
n_cols = 2
n_rows = (n_channels + n_cols - 1) // n_cols

fig, axes = plt.subplots(n_rows, n_cols, figsize=(14, 4*n_rows))
axes = axes.flatten() if n_channels > 1 else [axes]

fig.suptitle('Channel Value Distributions', fontsize=16, fontweight='bold')

for i, channel in enumerate(channel_names[:len(axes)]):
    axes[i].hist(data[channel], bins=50, alpha=0.7, color='steelblue', edgecolor='black', density=True)
    
    # Add KDE
    from scipy.stats import gaussian_kde
    kde = gaussian_kde(data[channel])
    x_range = np.linspace(data[channel].min(), data[channel].max(), 200)
    axes[i].plot(x_range, kde(x_range), 'r-', linewidth=2, label='KDE')
    
    axes[i].set_xlabel(channel, fontsize=10)
    axes[i].set_ylabel('Density', fontsize=10)
    axes[i].set_title(f'{channel} Distribution', fontsize=11, fontweight='bold')
    axes[i].legend()
    axes[i].grid(True, alpha=0.3)

# Hide extra subplots
for i in range(len(channel_names), len(axes)):
    axes[i].axis('off')

plt.tight_layout()
plt.show()

## 5. Box Plots for Outlier Detection

In [None]:
# Box plots
fig, ax = plt.subplots(figsize=(14, 6))

# Normalize data for comparison
normalized_data = []
labels = []

for channel in channel_names:
    channel_data = data[channel]
    normalized = (channel_data - np.mean(channel_data)) / np.std(channel_data)
    normalized_data.append(normalized)
    labels.append(channel)

bp = ax.boxplot(normalized_data, labels=labels, patch_artist=True, 
                showmeans=True, meanline=True)

# Color boxes
colors = plt.cm.Set3(np.linspace(0, 1, len(normalized_data)))
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)

ax.set_ylabel('Normalized Value (z-score)', fontsize=11, fontweight='bold')
ax.set_title('Channel Value Distributions - Box Plots', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3, axis='y')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## 6. Scatter Plot Matrix (Pair Plot)

In [None]:
# Select subset of channels for pair plot (max 4 for readability)
channels_for_pairplot = channel_names[:min(4, len(channel_names))]
df_subset = df[channels_for_pairplot]

# Create pair plot
pairplot = sns.pairplot(df_subset, diag_kind='kde', plot_kws={'alpha': 0.6, 's': 20})
pairplot.fig.suptitle('Channel Relationships - Scatter Plot Matrix', 
                      fontsize=16, fontweight='bold', y=1.02)
plt.show()

## 7. FFT Spectrum Visualization

In [None]:
# Compute and visualize FFT
freqs, fft_features, feature_matrix = process_all_channels_from_asc(
    data, window_size=1024, overlap=0.5, max_freq=500.0
)

# Plot FFT for multiple channels
channels_for_fft = channel_names[:min(4, len(channel_names))]
fig, axes = plt.subplots(len(channels_for_fft), 1, figsize=(15, 4*len(channels_for_fft)))

if len(channels_for_fft) == 1:
    axes = [axes]

fig.suptitle('Frequency Domain Analysis (FFT)', fontsize=16, fontweight='bold')

for i, channel in enumerate(channels_for_fft):
    axes[i].plot(freqs, fft_features[channel], linewidth=1.5, color=plt.cm.tab10(i))
    axes[i].set_ylabel('Magnitude', fontsize=11)
    axes[i].set_title(f'{channel} - Frequency Spectrum', fontsize=12, fontweight='bold')
    axes[i].grid(True, alpha=0.3)
    axes[i].set_xlim([0, min(200, freqs[-1])])  # Focus on low frequencies
    
    # Mark top 3 peaks
    top_indices = np.argsort(fft_features[channel])[-3:][::-1]
    for idx in top_indices:
        axes[i].plot(freqs[idx], fft_features[channel][idx], 'ro', markersize=8)
        axes[i].annotate(f'{freqs[idx]:.1f} Hz', 
                        xy=(freqs[idx], fft_features[channel][idx]),
                        xytext=(10, 10), textcoords='offset points',
                        fontsize=9, color='darkred',
                        bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow', alpha=0.7))

axes[-1].set_xlabel('Frequency [Hz]', fontsize=11, fontweight='bold')
plt.tight_layout()
plt.show()

## 8. Spectrogram (Time-Frequency Analysis)

In [None]:
from scipy import signal

# Select one channel for spectrogram
test_channel = channel_names[0]
test_signal = data[test_channel]

# Estimate sampling rate
t = data[time_key]
sampling_rate = 1.0 / np.mean(np.diff(t))

# Compute spectrogram
f, t_spec, Sxx = signal.spectrogram(test_signal, fs=sampling_rate, 
                                      nperseg=512, noverlap=256)

# Plot spectrogram
fig, ax = plt.subplots(figsize=(15, 6))
im = ax.pcolormesh(t_spec, f, 10 * np.log10(Sxx), shading='gouraud', cmap='viridis')
ax.set_ylabel('Frequency [Hz]', fontsize=11, fontweight='bold')
ax.set_xlabel('Time [s]', fontsize=11, fontweight='bold')
ax.set_title(f'Spectrogram - {test_channel}', fontsize=14, fontweight='bold')
ax.set_ylim([0, min(100, f[-1])])  # Focus on low frequencies
plt.colorbar(im, ax=ax, label='Power [dB]')
plt.tight_layout()
plt.show()

## 9. Dashboard-Style Comprehensive View

In [None]:
# Create comprehensive dashboard
fig = plt.figure(figsize=(18, 12))
gs = GridSpec(4, 3, figure=fig, hspace=0.3, wspace=0.3)

# Title
fig.suptitle('Industrial Data System - Comprehensive Dashboard', 
             fontsize=18, fontweight='bold', y=0.98)

# 1. Time series (top row)
ax1 = fig.add_subplot(gs[0, :])
for i, channel in enumerate(channel_names[:3]):
    normalized = (data[channel] - np.mean(data[channel])) / np.std(data[channel])
    ax1.plot(data[time_key], normalized, label=channel, linewidth=1.0, alpha=0.8)
ax1.set_ylabel('Normalized Value')
ax1.set_title('Time Series (Normalized)', fontweight='bold')
ax1.legend(loc='upper right')
ax1.grid(True, alpha=0.3)

# 2. FFT Spectrum
ax2 = fig.add_subplot(gs[1, :])
for i, channel in enumerate(channel_names[:3]):
    ax2.plot(freqs, fft_features[channel], label=channel, linewidth=1.5, alpha=0.8)
ax2.set_xlabel('Frequency [Hz]')
ax2.set_ylabel('Magnitude')
ax2.set_title('Frequency Spectrum', fontweight='bold')
ax2.set_xlim([0, 100])
ax2.legend()
ax2.grid(True, alpha=0.3)

# 3. Distributions
for i, channel in enumerate(channel_names[:3]):
    ax = fig.add_subplot(gs[2, i])
    ax.hist(data[channel], bins=30, alpha=0.7, color=f'C{i}', edgecolor='black')
    ax.set_xlabel(channel)
    ax.set_ylabel('Frequency')
    ax.set_title(f'{channel}\nDistribution', fontsize=10, fontweight='bold')
    ax.grid(True, alpha=0.3)

# 4. Correlation heatmap
ax6 = fig.add_subplot(gs[3, 0])
corr_subset = correlation_matrix.iloc[:4, :4]  # First 4x4
sns.heatmap(corr_subset, annot=True, fmt='.2f', cmap='coolwarm', 
            center=0, square=True, cbar=False, ax=ax6)
ax6.set_title('Correlation Matrix', fontsize=10, fontweight='bold')

# 5. Box plot
ax7 = fig.add_subplot(gs[3, 1])
bp_data = [data[ch] for ch in channel_names[:3]]
ax7.boxplot(bp_data, labels=[ch[:15] for ch in channel_names[:3]])
ax7.set_title('Value Ranges', fontsize=10, fontweight='bold')
ax7.grid(True, alpha=0.3, axis='y')
plt.setp(ax7.xaxis.get_majorticklabels(), rotation=45, ha='right')

# 6. Statistics table
ax8 = fig.add_subplot(gs[3, 2])
ax8.axis('tight')
ax8.axis('off')
stats_data = []
for channel in channel_names[:3]:
    stats_data.append([
        channel[:15],
        f"{np.mean(data[channel]):.2f}",
        f"{np.std(data[channel]):.2f}",
        f"{np.min(data[channel]):.2f}",
        f"{np.max(data[channel]):.2f}"
    ])
table = ax8.table(cellText=stats_data, 
                 colLabels=['Channel', 'Mean', 'Std', 'Min', 'Max'],
                 cellLoc='center', loc='center')
table.auto_set_font_size(False)
table.set_fontsize(9)
table.scale(1, 2)
ax8.set_title('Statistics Summary', fontsize=10, fontweight='bold', pad=20)

plt.show()

## 10. Summary

In [None]:
print("\n" + "=" * 70)
print("VISUALIZATION TEST SUMMARY")
print("=" * 70)
print(f"✓ Loaded data: {df.shape[0]} samples, {df.shape[1]} channels")
print(f"✓ Time series plots: Created")
print(f"✓ Correlation heatmap: Created")
print(f"✓ Distribution plots: Created")
print(f"✓ Box plots: Created")
print(f"✓ Pair plots: Created")
print(f"✓ FFT spectrum: Created")
print(f"✓ Spectrogram: Created")
print(f"✓ Comprehensive dashboard: Created")
print("=" * 70)