In [1]:
import numpy as np
from video_loader import VideoLoader
import cv2
from tqdm.notebook import tqdm
import pandas as pd
from collections import defaultdict
import gc

class MetricCollector:
    def __init__(self, data_dir="segmented_videos_data"):
        self.data_dir = data_dir
        self.loader = VideoLoader(data_dir, "metadata.pkl")
        self.thresholds = [1, 2, 3, 5, 10, 25, 50, 75, 100, 150]
        
    def get_melt_pool_mask(self, frame, threshold=50):
        """Get refined mask using center of mass and connected components."""
        frame_normalized = ((frame - frame.min()) * 255 / (frame.max() - frame.min())).astype(np.uint8)
        _, binary = cv2.threshold(frame_normalized, threshold, 255, cv2.THRESH_BINARY)
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary)
        
        if num_labels < 2:
            return np.zeros_like(frame, dtype=bool)
            
        largest_label = 1
        largest_size = stats[1][cv2.CC_STAT_AREA]
        for i in range(2, num_labels):
            size = stats[i][cv2.CC_STAT_AREA]
            if size > largest_size:
                largest_size = size
                largest_label = i
                
        return labels == largest_label
    
    def get_spatter_intensity(self, frame, melt_pool_mask):
        """Calculate spatter intensity by masking out the melt pool."""
        spatter_mask = ~melt_pool_mask
        frame_norm = (frame - frame.min()) / (frame.max() - frame.min())
        return np.sum(frame_norm * spatter_mask)
    
    def process_single_video(self, cube, condition, line):
        """Process a single video and return its metrics."""
        frames = self.loader.get_video(cube, condition, line)
        if frames is None:
            return None
            
        # Initialize metrics storage for this video
        video_metrics = {
            'cube': cube,
            'condition': condition,
            'line': line,
            'frame_count': len(frames),
            'areas': {th: [] for th in self.thresholds},
            'spatter_intensity': {th: [] for th in self.thresholds}  # Modified to store spatter for each threshold
        }
        
        from skimage import measure
        
        print(f"Processing Cube: {cube}, condition: {condition}, line: {line}")
        
        # Process each frame
        for frame in frames:
            # Calculate melt pool areas and spatter intensity for each threshold
            for th in self.thresholds:
                mask = self.get_melt_pool_mask(frame, threshold=th)
                # Calculate area
                regions = measure.regionprops(mask.astype(int))
                area = regions[0].area if regions else 0
                video_metrics['areas'][th].append(area)
                
                # Calculate spatter intensity for this threshold
                spatter = self.get_spatter_intensity(frame, mask)
                video_metrics['spatter_intensity'][th].append(spatter)
            
        # Convert lists to numpy arrays to save memory
        for th in self.thresholds:
            video_metrics['areas'][th] = np.array(video_metrics['areas'][th])
            video_metrics['spatter_intensity'][th] = np.array(video_metrics['spatter_intensity'][th])
        
        # Clear frames from memory
        del frames
        gc.collect()
        
        return video_metrics
    
    def collect_all_metrics(self):
        """Process all videos and collect metrics."""
        # Calculate total number of videos
        total_videos = 4 * 12 * 5  # cubes * conditions * lines
        
        # Create progress bar
        pbar = tqdm(total=total_videos, desc="Processing videos")
        
        # Initialize storage for all metrics
        all_metrics = []
        
        try:
            for cube in range(1, 5):
                for condition in range(1, 13):
                    for line in range(1, 6):
                        metrics = self.process_single_video(cube, condition, line)
                        if metrics is not None:
                            all_metrics.append(metrics)
                        pbar.update(1)
                        
        finally:
            pbar.close()
            
        return all_metrics
    
    def calculate_summary_statistics(self, all_metrics):
        """Calculate summary statistics for all processed videos."""
        summary_data = []
        
        for metrics in all_metrics:
            summary_dict = {
                'cube': metrics['cube'],
                'condition': metrics['condition'],
                'line': metrics['line'],
                'frame_count': metrics['frame_count']
            }
            
            # Add area statistics for each threshold
            for th in self.thresholds:
                areas = metrics['areas'][th]
                summary_dict.update({
                    f'area_mean_th{th}': np.mean(areas),
                    f'area_std_th{th}': np.std(areas),
                    f'area_min_th{th}': np.min(areas),
                    f'area_max_th{th}': np.max(areas)
                })
            
                # Add spatter intensity statistics for each threshold
                spatter = metrics['spatter_intensity'][th]
                summary_dict.update({
                    f'spatter_mean_th{th}': np.mean(spatter),
                    f'spatter_std_th{th}': np.std(spatter),
                    f'spatter_min_th{th}': np.min(spatter),
                    f'spatter_max_th{th}': np.max(spatter)
                })
            
            summary_data.append(summary_dict)
        
        return pd.DataFrame(summary_data)

In [2]:
collector = MetricCollector("segmented_videos_data")


In [3]:
metrics = collector.collect_all_metrics()


Processing videos:   0%|          | 0/240 [00:00<?, ?it/s]

Processing Cube: 1, condition: 1, line: 1
Processing Cube: 1, condition: 1, line: 2
Processing Cube: 1, condition: 1, line: 3
Processing Cube: 1, condition: 1, line: 4
Processing Cube: 1, condition: 1, line: 5
Processing Cube: 1, condition: 2, line: 1
Processing Cube: 1, condition: 2, line: 2
Processing Cube: 1, condition: 2, line: 3
Processing Cube: 1, condition: 2, line: 4
Processing Cube: 1, condition: 2, line: 5
Processing Cube: 1, condition: 3, line: 1
Processing Cube: 1, condition: 3, line: 2
Processing Cube: 1, condition: 3, line: 3
Processing Cube: 1, condition: 3, line: 4
Processing Cube: 1, condition: 3, line: 5
Processing Cube: 1, condition: 4, line: 1
Processing Cube: 1, condition: 4, line: 2
Processing Cube: 1, condition: 4, line: 3
Processing Cube: 1, condition: 4, line: 4
Processing Cube: 1, condition: 4, line: 5
Processing Cube: 1, condition: 5, line: 1
Processing Cube: 1, condition: 5, line: 2
Processing Cube: 1, condition: 5, line: 3
Processing Cube: 1, condition: 5, 

Processing Cube: 4, condition: 4, line: 1
Processing Cube: 4, condition: 4, line: 2
Processing Cube: 4, condition: 4, line: 3
Processing Cube: 4, condition: 4, line: 4
Processing Cube: 4, condition: 4, line: 5
Processing Cube: 4, condition: 5, line: 1
Processing Cube: 4, condition: 5, line: 2
Processing Cube: 4, condition: 5, line: 3
Processing Cube: 4, condition: 5, line: 4
Processing Cube: 4, condition: 5, line: 5
Processing Cube: 4, condition: 6, line: 1
Processing Cube: 4, condition: 6, line: 2
Processing Cube: 4, condition: 6, line: 3
Processing Cube: 4, condition: 6, line: 4
Processing Cube: 4, condition: 6, line: 5
Processing Cube: 4, condition: 7, line: 1
Processing Cube: 4, condition: 7, line: 2
Processing Cube: 4, condition: 7, line: 3
Processing Cube: 4, condition: 7, line: 4
Processing Cube: 4, condition: 7, line: 5
Processing Cube: 4, condition: 8, line: 1
Processing Cube: 4, condition: 8, line: 2
Processing Cube: 4, condition: 8, line: 3
Processing Cube: 4, condition: 8, 

In [4]:
import pickle
with open('full_metrics.pickle', 'wb') as f:
    pickle.dump(metrics, f)
    

In [5]:
pd.set_option('display.max_rows', None)

# Create summary statistics
summary_df = collector.calculate_summary_statistics(metrics)
summary_df

Unnamed: 0,cube,condition,line,frame_count,area_mean_th1,area_std_th1,area_min_th1,area_max_th1,spatter_mean_th1,spatter_std_th1,...,spatter_min_th100,spatter_max_th100,area_mean_th150,area_std_th150,area_min_th150,area_max_th150,spatter_mean_th150,spatter_std_th150,spatter_min_th150,spatter_max_th150
0,1,1,1,275,9443.945455,4450.815371,443.0,22978.0,411.647757,61.546923,...,354.445665,984.937143,93.345455,23.419975,22.0,184.0,689.876818,118.880619,369.421978,1014.358095
1,1,1,2,275,7926.654545,3063.324225,1106.0,16642.0,405.19242,58.360599,...,358.370208,1487.82378,85.6,23.219741,27.0,186.0,648.793471,91.914193,372.447863,1525.129268
2,1,1,3,275,8479.709091,2861.609882,513.0,20396.0,415.607537,56.940541,...,373.063719,922.892016,83.021818,20.428311,24.0,162.0,663.024609,80.230064,385.790072,952.367474
3,1,1,4,274,9476.423358,3201.95729,1427.0,19854.0,424.328855,58.887751,...,380.349206,1001.511596,87.474453,23.281546,50.0,205.0,679.278855,93.389675,393.232967,1031.087574
4,1,1,5,275,10503.949091,3627.269991,998.0,33289.0,440.650692,72.485994,...,412.092552,2350.686092,85.967273,22.790644,51.0,181.0,731.711424,139.920748,434.459341,2416.466977
5,1,2,1,216,8571.319444,3213.373898,1674.0,20457.0,283.890133,56.345777,...,247.432967,772.432187,89.550926,19.751634,45.0,151.0,546.419853,89.110538,264.647375,803.873187
6,1,2,2,216,8099.199074,2604.568979,700.0,17289.0,284.970653,53.656481,...,269.358317,796.882477,86.560185,22.562384,37.0,163.0,530.553092,73.393323,292.781276,840.744313
7,1,2,3,216,8419.837963,2598.095376,1230.0,17235.0,287.171515,48.613252,...,281.404396,724.56596,79.634259,15.362609,47.0,135.0,532.913454,77.449896,300.821001,758.620643
8,1,2,4,217,8028.778802,2443.255941,912.0,15675.0,303.023987,64.228528,...,283.244444,995.671362,81.599078,15.987916,48.0,155.0,534.027853,78.822416,303.46569,1034.798122
9,1,2,5,216,9185.893519,2935.638888,2878.0,17537.0,309.128751,57.743922,...,302.059341,829.464483,82.074074,14.463366,51.0,137.0,576.546298,95.820994,325.173138,865.845699


In [6]:
# Save to CSV
summary_df.to_csv('video_metrics_summary.csv', index=False)