In [7]:
import numpy as np
import pandas as pd
from pathlib import Path
from matplotlib import pyplot as plt
from plotly.offline import plot

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from utils import save_plotly_figure_as_html
from ex_01_read_data import get_welding_data



In [8]:
data_path = Path("data/Welding/data.csv")
plot_path = Path("plots/ex_02")

## Exercise 2.1: Dataset Statistics Analysis

In this exercise, we'll calculate and present comprehensive descriptive statistics for the welding dataset. This statistical overview will help us understand the fundamental characteristics of our data before visualization or modeling.

1. Calculate the class distribution to understand data balance
2. Count samples per experiment to assess data volume across experimental conditions
3. Determine min/max values for voltage and current to understand the range of measurements
4. Calculate mean and standard deviation to assess central tendency and variability
5. Find median values to identify central points unaffected by outliers

In [None]:
data, labels, exp_ids = get_welding_data(data_path)

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from ex_01_read_data import get_welding_data

# 1) Pfad zur CSV-Datei
data_path = Path("data/Welding/data.csv")

# 2) Daten laden
#    WICHTIG: get_welding_data liefert (labels, exp_ids, data)
labels, exp_ids, data = get_welding_data(data_path)

# data hat Shape (n_samples, timesteps, 2), 
#  Feature-Dimension 0 = current, 1 = voltage

# --- Exercise 2.1: Dataset Statistics Analysis ---

# 1. Klassenverteilung (je Sample ein Label)
class_counts = pd.Series(labels).value_counts().sort_index()
print("1) Klassenverteilung:")
print(class_counts)

# 2. Anzahl Samples pro Experiment
exp_counts = pd.Series(exp_ids).value_counts().sort_index()
print("\n2) Samples pro Experiment:")
print(exp_counts)

# 3. Min/Max-Werte über alle Messpunkte hinweg
#    Wir „flatten“ Samples und Timesteps, getrennt für current und voltage
curr_values = data[:, :, 0].ravel()
volt_values = data[:, :, 1].ravel()

print(f"\n3) Spannung: min = {volt_values.min():.3f}, max = {volt_values.max():.3f}")
print(f"   Strom:     min = {curr_values.min():.3f}, max = {curr_values.max():.3f}")

# 4. Mittelwert & Standardabweichung
print(f"\n4) Spannung: mean = {volt_values.mean():.3f}, std = {volt_values.std(ddof=1):.3f}")
print(f"   Strom:     mean = {curr_values.mean():.3f}, std = {curr_values.std(ddof=1):.3f}")

# 5. Medianwerte
print(f"\n5) Spannung: median = {np.median(volt_values):.3f}")
print(f"   Strom:     median = {np.median(curr_values):.3f}")


ValueError: Data must be 1-dimensional, got ndarray of shape (157965, 200, 2) instead

## Exercise 2.2: Current and Voltage Distribution Visualization

In this exercise, we'll create interactive boxplots to visualize and compare the distributions of voltage and current measurements in the welding data. Boxplots will help us identify central tendencies, spread, and potential outliers in our measurements.

1. Create side-by-side boxplots for voltage and current using Plotly
2. Display key statistics (median, quartiles, etc.) in a visual format in the plot
3. Enable interactive exploration of the distributions
4. Save the visualization for future reference

In [None]:
data, labels, exp_ids = get_welding_data(data_path, n_samples=10_000)

save_path =  plot_path / "voltage_current_distribution"

## Exercise 2.3: Time-Series Sequence Visualization

In this exercise, we'll implement functions to visualize the time-series patterns of welding cycles. These visualizations will help us understand the temporal relationships between voltage and current during the welding process and identify patterns associated with quality outcomes.

1. Create dual-axis plots showing voltage and current over time (10 welding cycles -> sequence_length=10) using matplotlib
2. Implement clear legends and labels for data interpretation
3. Enable saving of visualizations for reporting and comparison

In [None]:

data, labels, exp_ids = get_welding_data(data_path, n_samples=100, return_sequences=True, sequence_length=10)
save_path = plot_path / "welding_sample.png"    

## Exercise 2.4: Interactive Time-Series Visualization with Plotly

In this exercise, we'll create enhanced interactive visualizations of welding samples using Plotly. These interactive plots will provide more advanced exploration capabilities for analyzing the time-series patterns.

1. Create interactive plots with dual y-axes for voltage and current
2. Implement time-based range sliders for detailed exploration
3. Add unified tooltips for precise data reading
4. Display quality metrics in the plot title
5. Save interactive visualizations as HTML for sharing

In [None]:
def create_plotly_plot(data: np.ndarray, labels: np.ndarray, exp_ids: np.ndarray = None) -> go.Figure:
    """
    Create an interactive Plotly visualization of a random welding sample.

    Args:
        data (np.ndarray): Array containing voltage and current data
        labels (np.ndarray): Array containing class labels
        exp_ids (np.ndarray, optional): Array containing experiment IDs. Defaults to None.

    Returns:
        plotly.graph_objects.Figure: Interactive Plotly figure object
    """
    pass


fig = create_plotly_plot(data, labels, exp_ids)
save_plotly_figure_as_html(fig, plot_path / "welding_samples")
fig.show()

## Exercise 2.5: Multiple Sample Comparison

In this exercise, we'll generate and compare visualizations from multiple random welding samples. This comparison will help us identify common patterns and variations across different welding cycles.
 

1. Generate multiple random sample visualizations using matplotlib of single welding cycles
2. Create dual-axis plots showing voltage and current over time
3. Implement clear legends and labels for data interpretation
4. Save each visualization for comparison

In [None]:
data, labels, exp_ids = get_welding_data(data_path, n_samples=1_000)

In [None]:
for i in range(5):
    plot_random_sample(data, labels, save_path=plot_path / f"welding_sample_{i}.png")
    