In [1]:
import numpy as np
from video_loader import VideoLoader
import cv2
from tqdm.notebook import tqdm
import pandas as pd
from collections import defaultdict
import gc

class MetricCollector:
    def __init__(self, data_dir="segmented_videos_data"):
        self.data_dir = data_dir
        self.loader = VideoLoader(data_dir, "metadata.pkl")
        self.thresholds = [10, 25, 50, 75, 100, 150]
        
    def get_melt_pool_mask(self, frame, threshold=50):
        """Get refined mask using center of mass and connected components."""
        frame_normalized = ((frame - frame.min()) * 255 / (frame.max() - frame.min())).astype(np.uint8)
        _, binary = cv2.threshold(frame_normalized, threshold, 255, cv2.THRESH_BINARY)
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary)
        
        if num_labels < 2:
            return np.zeros_like(frame, dtype=bool)
            
        largest_label = 1
        largest_size = stats[1][cv2.CC_STAT_AREA]
        for i in range(2, num_labels):
            size = stats[i][cv2.CC_STAT_AREA]
            if size > largest_size:
                largest_size = size
                largest_label = i
                
        return labels == largest_label
    
    def get_spatter_intensity(self, frame, melt_pool_mask):
        """Calculate spatter intensity by masking out the melt pool."""
        spatter_mask = ~melt_pool_mask
        frame_norm = (frame - frame.min()) / (frame.max() - frame.min())
        return np.sum(frame_norm * spatter_mask)
    
    def process_single_video(self, cube, condition, line):
        """Process a single video and return its metrics."""
        frames = self.loader.get_video(cube, condition, line)
        if frames is None:
            return None
            
        # Initialize metrics storage for this video
        video_metrics = {
            'cube': cube,
            'condition': condition,
            'line': line,
            'frame_count': len(frames),
            'areas': {th: [] for th in self.thresholds},
            'spatter_intensity': []
        }
        
        from skimage import measure
        
        print(f"Processing Cube: {cube}, condition: {condition}, line: {line}")
        
        # Process each frame
        for frame in frames:
            # Calculate melt pool areas for each threshold
            for th in self.thresholds:
                mask = self.get_melt_pool_mask(frame, threshold=th)
                regions = measure.regionprops(mask.astype(int))
                area = regions[0].area if regions else 0
                video_metrics['areas'][th].append(area)
            
            # Calculate spatter intensity (using threshold=50)
            mask_50 = self.get_melt_pool_mask(frame, threshold=50)
            spatter = self.get_spatter_intensity(frame, mask_50)
            video_metrics['spatter_intensity'].append(spatter)
            
        # Convert lists to numpy arrays to save memory
        for th in self.thresholds:
            video_metrics['areas'][th] = np.array(video_metrics['areas'][th])
        video_metrics['spatter_intensity'] = np.array(video_metrics['spatter_intensity'])
        
        # Clear frames from memory
        del frames
        gc.collect()
        
        return video_metrics
    
    def collect_all_metrics(self):
        """Process all videos and collect metrics."""
        # Calculate total number of videos
        total_videos = 4 * 12 * 5  # cubes * conditions * lines
        
        # Create progress bar
        pbar = tqdm(total=total_videos, desc="Processing videos")
        
        # Initialize storage for all metrics
        all_metrics = []
        
        try:
            for cube in range(1, 5):
                for condition in range(1, 13):
                    for line in range(1, 6):
                        metrics = self.process_single_video(cube, condition, line)
                        if metrics is not None:
                            all_metrics.append(metrics)
                        pbar.update(1)
                        
        finally:
            pbar.close()
            
        return all_metrics
    
    def calculate_summary_statistics(self, all_metrics):
        """Calculate summary statistics for all processed videos."""
        summary_data = []
        
        for metrics in all_metrics:
            summary_dict = {
                'cube': metrics['cube'],
                'condition': metrics['condition'],
                'line': metrics['line'],
                'frame_count': metrics['frame_count']
            }
            
            # Add area statistics for each threshold
            for th in self.thresholds:
                areas = metrics['areas'][th]
                summary_dict.update({
                    f'area_mean_th{th}': np.mean(areas),
                    f'area_std_th{th}': np.std(areas),
                    f'area_min_th{th}': np.min(areas),
                    f'area_max_th{th}': np.max(areas)
                })
            
            # Add spatter intensity statistics
            spatter = metrics['spatter_intensity']
            summary_dict.update({
                'spatter_mean': np.mean(spatter),
                'spatter_std': np.std(spatter),
                'spatter_min': np.min(spatter),
                'spatter_max': np.max(spatter)
            })
            
            summary_data.append(summary_dict)
        
        return pd.DataFrame(summary_data)


In [2]:
collector = MetricCollector("segmented_videos_data")


In [3]:
metrics = collector.collect_all_metrics()


Processing videos:   0%|          | 0/240 [00:00<?, ?it/s]

Processing Cube: 1, condition: 1, line: 1
Processing Cube: 1, condition: 1, line: 2
Processing Cube: 1, condition: 1, line: 3
Processing Cube: 1, condition: 1, line: 4
Processing Cube: 1, condition: 1, line: 5
Processing Cube: 1, condition: 2, line: 1
Processing Cube: 1, condition: 2, line: 2
Processing Cube: 1, condition: 2, line: 3
Processing Cube: 1, condition: 2, line: 4
Processing Cube: 1, condition: 2, line: 5
Processing Cube: 1, condition: 3, line: 1
Processing Cube: 1, condition: 3, line: 2
Processing Cube: 1, condition: 3, line: 3
Processing Cube: 1, condition: 3, line: 4
Processing Cube: 1, condition: 3, line: 5
Processing Cube: 1, condition: 4, line: 1
Processing Cube: 1, condition: 4, line: 2
Processing Cube: 1, condition: 4, line: 3
Processing Cube: 1, condition: 4, line: 4
Processing Cube: 1, condition: 4, line: 5
Processing Cube: 1, condition: 5, line: 1
Processing Cube: 1, condition: 5, line: 2
Processing Cube: 1, condition: 5, line: 3
Processing Cube: 1, condition: 5, 

Processing Cube: 4, condition: 4, line: 1
Processing Cube: 4, condition: 4, line: 2
Processing Cube: 4, condition: 4, line: 3
Processing Cube: 4, condition: 4, line: 4
Processing Cube: 4, condition: 4, line: 5
Processing Cube: 4, condition: 5, line: 1
Processing Cube: 4, condition: 5, line: 2
Processing Cube: 4, condition: 5, line: 3
Processing Cube: 4, condition: 5, line: 4
Processing Cube: 4, condition: 5, line: 5
Processing Cube: 4, condition: 6, line: 1
Processing Cube: 4, condition: 6, line: 2
Processing Cube: 4, condition: 6, line: 3
Processing Cube: 4, condition: 6, line: 4
Processing Cube: 4, condition: 6, line: 5
Processing Cube: 4, condition: 7, line: 1
Processing Cube: 4, condition: 7, line: 2
Processing Cube: 4, condition: 7, line: 3
Processing Cube: 4, condition: 7, line: 4
Processing Cube: 4, condition: 7, line: 5
Processing Cube: 4, condition: 8, line: 1
Processing Cube: 4, condition: 8, line: 2
Processing Cube: 4, condition: 8, line: 3
Processing Cube: 4, condition: 8, 

In [4]:
import pickle
with open('full_metrics.pickle', 'wb') as f:
    pickle.dump(metrics, f)
    

In [8]:
pd.set_option('display.max_rows', None)

# Create summary statistics
summary_df = collector.calculate_summary_statistics(metrics)
summary_df

Unnamed: 0,cube,condition,line,frame_count,area_mean_th10,area_std_th10,area_min_th10,area_max_th10,area_mean_th25,area_std_th25,area_min_th25,area_max_th25,area_mean_th50,area_std_th50,area_min_th50,area_max_th50,area_mean_th75,area_std_th75,area_min_th75,area_max_th75,area_mean_th100,area_std_th100,area_min_th100,area_max_th100,area_mean_th150,area_std_th150,area_min_th150,area_max_th150,spatter_mean,spatter_std,spatter_min,spatter_max
0,1,1,1,275,1011.709091,180.398818,174.0,1587.0,503.072727,84.496295,106.0,773.0,297.752727,46.721192,78.0,489.0,209.589091,36.180485,57.0,326.0,157.243636,29.185717,43.0,256.0,93.345455,23.419975,22.0,184.0,619.134012,116.352734,339.354823,893.256899
1,1,1,2,275,973.574545,171.983141,258.0,1949.0,486.938182,84.81312,183.0,976.0,277.807273,53.387666,126.0,468.0,195.214545,38.662754,101.0,348.0,146.476364,31.254416,84.0,261.0,85.6,23.219741,27.0,186.0,582.148716,89.86795,346.37851,1425.435366
2,1,1,3,275,916.043636,161.732321,176.0,1483.0,452.069091,91.200871,115.0,821.0,267.054545,56.078587,78.0,534.0,188.290909,39.581112,60.0,392.0,141.556364,29.597535,43.0,276.0,83.021818,20.428311,24.0,162.0,599.086898,79.650852,357.952828,882.058246
3,1,1,4,274,913.456204,184.239748,304.0,1724.0,451.937956,96.380669,205.0,856.0,271.178832,59.261336,150.0,550.0,194.547445,40.515739,120.0,362.0,147.485401,32.169399,90.0,294.0,87.474453,23.281546,50.0,205.0,615.094946,91.768334,363.248107,962.108688
4,1,1,5,275,1014.4,295.712366,321.0,4655.0,483.403636,100.057006,208.0,990.0,280.807273,55.430966,147.0,505.0,197.68,39.941313,114.0,369.0,148.498182,31.536919,95.0,284.0,85.967273,22.790644,51.0,181.0,664.039749,135.495371,392.486691,2284.289821
5,1,2,1,216,984.685185,179.665798,330.0,1537.0,507.337963,83.211587,223.0,748.0,300.634259,49.670307,162.0,451.0,211.666667,35.260407,124.0,309.0,158.305556,27.508066,98.0,233.0,89.550926,19.751634,45.0,151.0,472.868545,87.074435,231.958242,727.738413
6,1,2,2,216,943.847222,169.818339,248.0,1459.0,499.490741,88.67643,163.0,761.0,296.62037,54.423361,113.0,465.0,206.333333,38.767589,87.0,318.0,153.643519,30.79183,63.0,255.0,86.560185,22.562384,37.0,163.0,457.6446,68.185406,248.112009,740.762005
7,1,2,3,216,908.398148,169.78341,302.0,1491.0,457.37037,73.095966,203.0,676.0,271.773148,41.657104,139.0,430.0,192.990741,30.111212,106.0,301.0,143.569444,23.326284,81.0,214.0,79.634259,15.362609,47.0,135.0,465.5201,73.573275,259.957265,684.766234
8,1,2,4,217,888.732719,193.909387,288.0,2151.0,455.599078,78.693834,187.0,791.0,275.695853,46.337369,134.0,457.0,196.009217,31.762655,107.0,327.0,147.073733,24.901171,84.0,256.0,81.599078,15.987916,48.0,155.0,465.783757,74.27542,263.525275,946.645875
9,1,2,5,216,958.25,202.942367,465.0,1862.0,485.944444,87.214119,245.0,760.0,288.25463,48.08485,172.0,441.0,203.476852,32.129792,133.0,310.0,150.287037,23.890976,99.0,243.0,82.074074,14.463366,51.0,137.0,504.243672,93.440993,277.542125,783.281373


In [6]:
# Save to CSV
summary_df.to_csv('video_metrics_summary.csv', index=False)