In [1]:
import numpy as np
from scipy import stats
from scipy.fft import fft
from sklearn.linear_model import LinearRegression

class TimeSeriesFeatureExtractor:
    def __init__(self, metrics_file='full_metrics.pickle', window_size=10):
        """
        Initialize feature extractor
        
        Args:
            metrics_file (str): Path to metrics pickle file
            window_size (int): Number of frames to remove from start/end
        """
        with open(metrics_file, 'rb') as f:
            self.metrics = pickle.load(f)
        self.window_size = window_size
        
    def _prepare_time_series(self, data):
        """Prepare time series by removing first/last frames."""
        return data[self.window_size:-self.window_size]
        
    def _calculate_trend_features(self, data):
        """Calculate trend-related features."""
        X = np.arange(len(data)).reshape(-1, 1)
        y = data.reshape(-1, 1)
        reg = LinearRegression().fit(X, y)
        
        # Calculate moving average stability (std of moving average)
        ma = np.convolve(data, np.ones(10)/10, mode='valid')
        ma_stability = np.std(ma)
        
        return {
            'slope': reg.coef_[0][0],
            'r2': reg.score(X, y),
            'ma_stability': ma_stability
        }
    
    def _calculate_statistical_features(self, data):
        """Calculate statistical features."""
        return {
            'median': np.median(data),
            'percentile_5': np.percentile(data, 5),
            'percentile_95': np.percentile(data, 95),
            'std': np.std(data),
            'kurtosis': stats.kurtosis(data),
            'skewness': stats.skew(data),
            'iqr': stats.iqr(data),
            'cv': stats.variation(data)
        }
    
    def _calculate_signal_features(self, data):
        """Calculate signal processing features."""
        # Autocorrelation (using first lag)
        acf = np.correlate(data - np.mean(data), data - np.mean(data), mode='full')
        acf = acf[len(acf)//2:] / acf[len(acf)//2]
        
        # FFT features
        fft_vals = np.abs(fft(data))
        fft_freq = np.fft.fftfreq(len(data))
        dominant_freq = fft_freq[np.argmax(fft_vals[1:]) + 1]  # Skip DC component
        
        # Signal energy
        energy = np.sum(np.square(data))
        
        return {
            'autocorr_lag1': acf[1],
            'dominant_freq': dominant_freq,
            'signal_energy': energy
        }
    
    def extract_features(self, cube, condition, line):
        """Extract all features for a specific video."""
        metrics = next((m for m in self.metrics 
                       if m['cube'] == cube and 
                          m['condition'] == condition and 
                          m['line'] == line), None)
        
        if metrics is None:
            return None
            
        features = {}
        
        # Process spatter intensity (th=50 only)
        spatter_data = self._prepare_time_series(np.array(metrics['spatter_intensity'][50]))
        spatter_features = {}
        spatter_features.update(self._calculate_statistical_features(spatter_data))
        spatter_features.update(self._calculate_trend_features(spatter_data))
        spatter_features.update(self._calculate_signal_features(spatter_data))
        features['spatter'] = spatter_features
        
        # Process melt pool areas (all thresholds)
        area_features = {}
        for threshold in metrics['areas'].keys():
            area_data = self._prepare_time_series(np.array(metrics['areas'][threshold]))
            threshold_features = {}
            threshold_features.update(self._calculate_statistical_features(area_data))
            threshold_features.update(self._calculate_trend_features(area_data))
            threshold_features.update(self._calculate_signal_features(area_data))
            area_features[threshold] = threshold_features
        features['area'] = area_features
        
        return features
    
    def extract_all_features(self):
        """Extract features for all videos."""
        all_features4 cubes  = {}
        
        for cube in range(1, 5):
            cube_features = {}
            for condition in range(1, 13):
                condition_features = {}
                for line in range(1, 6):
                    features = self.extract_features(cube, condition, line)
                    if features is not None:
                        condition_features[line] = features
                if condition_features:
                    cube_features[condition] = condition_features
            if cube_features:
                all_features[cube] = cube_features
                
        return all_features


In [2]:
import pickle 

extractor = TimeSeriesFeatureExtractor()


In [3]:
all_features = extractor.extract_all_features()


In [4]:
all_features

{1: {1: {1: {'spatter': {'median': np.float64(619.375824175824),
     'percentile_5': np.float64(439.44581196581197),
     'percentile_95': np.float64(808.9418803418802),
     'std': np.float64(113.10365487505167),
     'kurtosis': np.float64(-0.7299446935593799),
     'skewness': np.float64(0.15633887550147213),
     'iqr': np.float64(180.11414326414297),
     'cv': np.float64(0.18193580066443724),
     'slope': np.float64(1.4307729287291255),
     'r2': 0.8671227616308081,
     'ma_stability': np.float64(106.52461197127937),
     'autocorr_lag1': np.float64(0.935036056427444),
     'dominant_freq': np.float64(0.00392156862745098),
     'signal_energy': np.float64(101812184.22247198)},
    'area': {1: {'median': np.float64(9275.0),
      'percentile_5': np.float64(2769.6),
      'percentile_95': np.float64(17444.0),
      'std': np.float64(4361.743164639469),
      'kurtosis': np.float64(0.19539122541304366),
      'skewness': np.float64(0.5410619497887829),
      'iqr': np.float64(53

In [5]:
# Save features
with open('extracted_features.pickle', 'wb') as f:
    pickle.dump(all_features, f)

median: Middle value of the time series. Represents typical process behavior.

percentile_5: Value below which 5% of measurements fall. Captures lower bound of normal operation.

percentile_95: Value below which 95% of measurements fall. Captures upper bound of normal operation.

std: Standard deviation - measures spread of values around mean. Higher values = more variable process.

kurtosis: Measures presence of extreme values. High values indicate more outliers/extreme events.

slope: Rate of change over time from linear regression. Positive = increasing trend, negative = decreasing.

r2: How well linear trend fits (0-1). Higher values mean more consistent trend.

ma_stability: Standard deviation of 10-point moving average. Lower values = more stable process.

autocorr_lag1: Correlation between consecutive measurements. Higher values = stronger temporal patterns.

skewness: Asymmetry of distribution. Positive = right tail longer, negative = left tail longer.

iqr: Difference between 75th and 25th percentiles. Robust measure of spread.

cv: Standard deviation divided by mean. Normalized measure of variability.

dominant_freq: Most prominent frequency in signal. Indicates main periodic component.

signal_energy: Sum of squared values. Overall magnitude of variations in signal.