In [21]:
import os
import pandas as pd

# Define ground truth values corresponding to each file
# ground_truth_values = ['3.5', '4.0', '4.5', '5.0', '5.5', '6.0', '6.5', '7.0', '7.5']

ground_truth_values = ['4.0', '4.0', '4.5', '5.0', '5.5', '6.0', '6.5', '7.0', '7.5']
# Input directory and file names
input_dir = './output'
# file_names = [
#     'realsense_detections3500', 'realsense_detections4000', 'realsense_detections4500',
#     'realsense_detections5000', 'realsense_detections5500', 'realsense_detections6000',
#     'realsense_detections6500', 'realsense_detections7000', 'realsense_detections7500'
# ]

file_names = ['4_meter', '4_2_meter', '5_meter', 
              '5_5_meter', '6_meter', '6_5_meter',
              '7_meter', '7_5_meter']
# Output file for the merged data
output_file = os.path.join(input_dir, 'merged_PLC_tracking.csv')

# Step 1: First update each file with its ground truth value
all_dataframes = []

for idx, base_name in enumerate(file_names):
    csv_file_path = os.path.join(input_dir, base_name + '.csv')
    ground_truth = ground_truth_values[idx]
    
    if not os.path.exists(csv_file_path):
        print(f"File not found: {csv_file_path}")
        continue
    
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file_path)
        
        # Add ground truth column if it doesn't exist
        if 'ground_truth' not in df.columns:
            df['ground_truth'] = ground_truth
        
        # Add source filename column for tracking
        df['source_file'] = base_name
        
        # Append to our list of dataframes
        all_dataframes.append(df)
        
        print(f"Processed {csv_file_path}")
    
    except Exception as e:
        print(f"Error processing {csv_file_path}: {str(e)}")

# Step 2: Concatenate all dataframes
if all_dataframes:
    merged_df = pd.concat(all_dataframes, ignore_index=True)
    
    # Step 3: Save the merged dataframe to a new CSV file
    merged_df.to_csv(output_file, index=False)
    
    print(f"\nSuccessfully merged {len(all_dataframes)} files into {output_file}")
    print(f"Total rows in merged file: {len(merged_df)}")
else:
    print("No data to merge. Please check the input files.")

Processed ./output/4_meter.csv
Processed ./output/4_2_meter.csv
Processed ./output/5_meter.csv
Processed ./output/5_5_meter.csv
Processed ./output/6_meter.csv
Processed ./output/6_5_meter.csv
Processed ./output/7_meter.csv
Processed ./output/7_5_meter.csv

Successfully merged 8 files into ./output/merged_PLC_tracking.csv
Total rows in merged file: 22974


In [None]:
%pwd

In [None]:
%pwd

In [None]:
import os
import csv
import pandas as pd

# input_dir = '../Callback'

# ground_truth_values = ['9','8.75','8.5','8','7.75', '7.5','7.25', '7','6.75','6.5', '6.25', '6','5.75', '5.5', '5.25','5', '4.75', '4.5', '4.25', '4']

# input_dir = '../Callback'
# file_names = ['Day2_9_depth_measurements','Day2_8_7_5_depth_measurements','Day2_8_5_depth_measurements' , 'Day2_8_depth_measurements', 'Day2_7_7_5_depth_measurements', 'Day2_7_5_depth_measurements', 'Day2_7_2_5_depth_measurements','Day2_7_depth_measurements', 'Day2_6_7_5_depth_measurements', 'Day2_6_5_depth_measurements', 'Day2_6_2_5_depth_measurements', 'Day2_6_depth_measurements'
#               , 'Day2_5_7_5_depth_measurements', 'Day2_5_5_depth_measurements', 'Day2_5_2_5_depth_measurements', 'Day2_5_depth_measurements', 'Day2_4_7_5_depth_measurements', 'Day2_4_5_depth_measurements', 'Day2_4_2_5_depth_measurements', 'Day2_4_depth_measurements']

# Ensure the number of filenames matches the number of ground truth values
if len(file_names) != len(ground_truth_values):
    raise ValueError("Mismatch between number of filenames and ground truth values.")

# headers = [
#     'timestamp', 'x', 'y', 'w','h','depth',
#     'gyro_data.x', 'gyro_data.y', 'gyro_data.z',
#     'accel_data.x', 'accel_data.y', 'accel_data.z',
#     'ground_truth'
# ]


# List to store all dataframes for merging
all_dataframes = []

# Process each file
for idx, base_name in enumerate(file_names):
    txt_file_path = os.path.join(input_dir, base_name + '.txt')
    ground_truth = ground_truth_values[idx]  # Assign corresponding ground truth value

    if not os.path.exists(txt_file_path):
        print(f"File not found: {txt_file_path}")
        continue
    # Read the .txt file and prepare data for the .csv
    csv_data = []
    with open(txt_file_path, 'r') as txt_file:
        for line in txt_file:
            # Assuming each line contains 10 comma-separated values
            data = line.strip().split(',')
            if len(data) == 12:  # Ensure correct data format
                data.append(ground_truth)  # Add the ground truth value
                csv_data.append(data)
            else:
                print(f"Skipping line in {txt_file_path} due to unexpected format: {line.strip()}")

    # Define the output .csv file path
    csv_file_path = os.path.join(input_dir, base_name + '.csv')

    # Write data to the .csv file
    with open(csv_file_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(headers)  # Write headers
        writer.writerows(csv_data)  # Write data rows

    print(f"Converted: {txt_file_path} → {csv_file_path} with ground_truth = {ground_truth}")

    # Read the CSV file into a dataframe
    df = pd.read_csv(csv_file_path)
    all_dataframes.append(df)

# Merge all CSVs into one dataframe
merged_df = pd.concat(all_dataframes, ignore_index=True)

# Define the output merged CSV file path
merged_csv_path = os.path.join(input_dir, 'merged_output.csv')

# Save the merged dataframe to a CSV file
merged_df.to_csv(merged_csv_path, index=False)

print(f"All files successfully merged into {merged_csv_path}!")


In [None]:
if len(data) == 10:  # Ensure correct data format
    data.append(ground_truth)  # Add the ground truth value
    csv_data.append(data)
else:
    print(f"Skipping line in {txt_file_path} due to unexpected format: {line.strip()}")


In [None]:
# import os
# import csv
# import pandas as pd

# # Define ground truth values corresponding to each file
# ground_truth_values = ['8', '7.5', '7', '6.5', '6', '5.5', '5', '4.5', '4', '3.5', '3']

# # List of filenames without extensions
# file_names = [
#     'm8_1_depth_measurements', '7.5_depth_measurements', '7_depth_measurements', 
#     'm_65_depth_measurements', 'm6_depth_measurements', 'm_5_5depth_measurements', 
#     'm_5_depth_measurements', 'm_4_5_depth_measurements', 'm_4_depth_measurements', 
#     'm3_5_depth_measurements', 'm_3_depth_measurements'
# ]

# # Define the input directory
# input_dir = '../Callback'

# # Ensure the number of filenames matches the number of ground truth values
# if len(file_names) != len(ground_truth_values):
#     raise ValueError("Mismatch between number of filenames and ground truth values.")

# # Define the headers
# headers = [
#     'timestamp', 'x', 'y', 'depth',
#     'gyro_data.x', 'gyro_data.y', 'gyro_data.z',
#     'accel_data.x', 'accel_data.y', 'accel_data.z',
#     'ground_truth', 'error'
# ]

# # List to store all dataframes for merging
# all_dataframes = []

# # Process each file
# for idx, base_name in enumerate(file_names):
#     txt_file_path = os.path.join(input_dir, base_name + '.txt')
#     ground_truth = float(ground_truth_values[idx])  # Assign corresponding ground truth value

#     if not os.path.exists(txt_file_path):
#         print(f"File not found: {txt_file_path}")
#         continue

#     # Read the .txt file and prepare data for the .csv
#     csv_data = []
#     with open(txt_file_path, 'r') as txt_file:
#         for line in txt_file:
#             # Assuming each line contains 10 comma-separated values
#             data = line.strip().split(',')
#             if len(data) == 10:  # Ensure correct data format
#                 depth_value = float(data[3])  # Convert depth to float
#                 error_value = depth_value - ground_truth  # Calculate error
#                 data.append(str(ground_truth))  # Add the ground truth value
#                 data.append(str(error_value))  # Add the error value
#                 csv_data.append(data)
#             else:
#                 print(f"Skipping line in {txt_file_path} due to unexpected format: {line.strip()}")

#     # Define the output .csv file path
#     csv_file_path = os.path.join(input_dir, base_name + '.csv')

#     # Write data to the .csv file
#     with open(csv_file_path, 'w', newline='') as csv_file:
#         writer = csv.writer(csv_file)
#         writer.writerow(headers)  # Write headers
#         writer.writerows(csv_data)  # Write data rows

#     print(f"Converted: {txt_file_path} → {csv_file_path} with ground_truth = {ground_truth}")

#     # Read the CSV file into a dataframe
#     df = pd.read_csv(csv_file_path)
#     all_dataframes.append(df)

# # Merge all CSVs into one dataframe
# merged_df = pd.concat(all_dataframes, ignore_index=True)

# # Define the output merged CSV file path
# merged_csv_path = os.path.join(input_dir, 'merged_output.csv')

# # Save the merged dataframe to a CSV file
# merged_df.to_csv(merged_csv_path, index=False)

# print(f"All files successfully merged into {merged_csv_path}!")


In [None]:
##JSON

In [None]:
import json
import pandas as pd
import argparse
from pathlib import Path
import glob
from tqdm import tqdm
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def load_json_data(file_path):
    """Load JSON data from file with error handling"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        logger.error(f"Error loading {file_path}: {e}")
        return None

def convert_detections_to_rows(data, source_file):
    """Convert detection data to rows for DataFrame"""
    rows = []
    
    if not data or 'detections' not in data:
        return rows
    
    for detection in data.get('detections', []):
        row = {
            # Source file info
            'source_file': source_file,
            'sample_id': data.get('sample_id'),
            'timestamp': data.get('timestamp'),
            'session_start': data.get('session_start'),
            'sample_count': data.get('sample_count'),
            
            # Camera info
            'camera_width': data.get('camera_info', {}).get('width'),
            'camera_height': data.get('camera_info', {}).get('height'),
            'depth_scale': data.get('camera_info', {}).get('depth_scale'),
            'origin_x': data.get('camera_info', {}).get('origin_x'),
            'origin_y': data.get('camera_info', {}).get('origin_y'),
            
            # Detection info
            'detection_id': detection.get('detection_id'),
            'class_id': detection.get('class_id'),
            'confidence': detection.get('confidence'),
            
            # Bounding box
            'bbox_x1': detection.get('bounding_box', {}).get('x1'),
            'bbox_y1': detection.get('bounding_box', {}).get('y1'),
            'bbox_x2': detection.get('bounding_box', {}).get('x2'),
            'bbox_y2': detection.get('bounding_box', {}).get('y2'),
            'bbox_center_x': detection.get('bounding_box', {}).get('center_x'),
            'bbox_center_y': detection.get('bounding_box', {}).get('center_y'),
            'bbox_width': detection.get('bounding_box', {}).get('width'),
            'bbox_height': detection.get('bounding_box', {}).get('height'),
            
            # Relative coordinates
            'rel_x': detection.get('relative_coordinates', {}).get('rel_x'),
            'rel_y': detection.get('relative_coordinates', {}).get('rel_y'),
            
            # Depth statistics
            'mean_depth': detection.get('depth_information', {}).get('statistics', {}).get('mean_depth'),
            'median_depth': detection.get('depth_information', {}).get('statistics', {}).get('median_depth'),
            'min_depth': detection.get('depth_information', {}).get('statistics', {}).get('min_depth'),
            'max_depth': detection.get('depth_information', {}).get('statistics', {}).get('max_depth'),
            'std_depth': detection.get('depth_information', {}).get('statistics', {}).get('std_depth'),
            'valid_pixels': detection.get('depth_information', {}).get('statistics', {}).get('valid_pixels'),
            'total_pixels': detection.get('depth_information', {}).get('statistics', {}).get('total_pixels'),
            'fill_ratio': detection.get('depth_information', {}).get('statistics', {}).get('fill_ratio'),
            
            # Center point depth
            'center_depth_raw': detection.get('depth_information', {}).get('realsense_depth_data', {}).get('center_point', {}).get('depth_raw'),
            'center_depth_meters': detection.get('depth_information', {}).get('realsense_depth_data', {}).get('center_point', {}).get('depth_meters'),
        }
        rows.append(row)
    
    return rows

def convert_depth_points_to_rows(data, source_file):
    """Convert depth sample points to rows for DataFrame"""
    rows = []
    
    if not data or 'detections' not in data:
        return rows
    
    for detection in data.get('detections', []):
        sample_points = detection.get('depth_information', {}).get('realsense_depth_data', {}).get('sample_points', [])
        
        for point in sample_points:
            row = {
                'source_file': source_file,
                'sample_id': data.get('sample_id'),
                'timestamp': data.get('timestamp'),
                'detection_id': detection.get('detection_id'),
                'class_id': detection.get('class_id'),
                'pixel_x': point.get('pixel_x'),
                'pixel_y': point.get('pixel_y'),
                'depth_raw': point.get('depth_raw'),
                'depth_meters': point.get('depth_meters')
            }
            rows.append(row)
    
    return rows

def convert_summary_to_rows(data, source_file):
    """Convert session summary to rows for DataFrame"""
    if not data:
        return []
    
    detections = data.get('detections', [])
    
    # Count detections by class
    class_counts = {}
    total_confidence = 0
    
    for detection in detections:
        class_id = detection.get('class_id')
        class_counts[class_id] = class_counts.get(class_id, 0) + 1
        total_confidence += detection.get('confidence', 0)
    
    avg_confidence = total_confidence / len(detections) if detections else 0
    
    row = {
        'source_file': source_file,
        'sample_id': data.get('sample_id'),
        'timestamp': data.get('timestamp'),
        'session_start': data.get('session_start'),
        'sample_count': data.get('sample_count'),
        'camera_width': data.get('camera_info', {}).get('width'),
        'camera_height': data.get('camera_info', {}).get('height'),
        'depth_scale': data.get('camera_info', {}).get('depth_scale'),
        'origin_x': data.get('camera_info', {}).get('origin_x'),
        'origin_y': data.get('camera_info', {}).get('origin_y'),
        'total_detections': len(detections),
        'class_0_count': class_counts.get(0, 0),
        'class_1_count': class_counts.get(1, 0),
        'avg_confidence': avg_confidence,
        
        # Additional stats
        'max_confidence': max([d.get('confidence', 0) for d in detections]) if detections else 0,
        'min_confidence': min([d.get('confidence', 0) for d in detections]) if detections else 0,
    }
    
    return [row]

def find_json_files(folder_path, pattern="*.json"):
    """Find all JSON files in folder and subfolders"""
    folder_path = Path(folder_path)
    
    # Search in current folder and subfolders
    json_files = []
    json_files.extend(folder_path.glob(pattern))
    json_files.extend(folder_path.glob(f"**/{pattern}"))
    
    return sorted(list(set(json_files)))  # Remove duplicates and sort

def merge_json_folder_to_csv(folder_path, output_dir=".", pattern="*.json", formats=['all']):
    """Merge all JSON files in folder to CSV"""
    
    # Find all JSON files
    json_files = find_json_files(folder_path, pattern)
    
    if not json_files:
        logger.warning(f"No JSON files found in {folder_path}")
        return {}
    
    logger.info(f"Found {len(json_files)} JSON files")
    
    # Initialize data containers
    all_detections = []
    all_depth_points = []
    all_summaries = []
    
    # Process each file
    processed_files = 0
    failed_files = 0
    
    for json_file in tqdm(json_files, desc="Processing JSON files"):
        try:
            data = load_json_data(json_file)
            if data is None:
                failed_files += 1
                continue
            
            source_file = json_file.name
            
            # Convert data based on requested formats
            if 'detections' in formats or 'all' in formats:
                detection_rows = convert_detections_to_rows(data, source_file)
                all_detections.extend(detection_rows)
            
            if 'depth_points' in formats or 'all' in formats:
                depth_rows = convert_depth_points_to_rows(data, source_file)
                all_depth_points.extend(depth_rows)
            
            if 'summary' in formats or 'all' in formats:
                summary_rows = convert_summary_to_rows(data, source_file)
                all_summaries.extend(summary_rows)
            
            processed_files += 1
            
        except Exception as e:
            logger.error(f"Error processing {json_file}: {e}")
            failed_files += 1
    
    logger.info(f"Processed: {processed_files}, Failed: {failed_files}")
    
    # Create output directory
    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True)
    
    # Generate timestamp for output files
    from datetime import datetime
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    results = {}
    
    # Save DataFrames to CSV
    if all_detections and ('detections' in formats or 'all' in formats):
        df_detections = pd.DataFrame(all_detections)
        output_file = output_dir / f"merged_detections_{timestamp}.csv"
        df_detections.to_csv(output_file, index=False)
        results['detections'] = output_file
        logger.info(f"Saved detections CSV: {output_file} ({len(df_detections)} rows)")
    
    if all_depth_points and ('depth_points' in formats or 'all' in formats):
        df_depth = pd.DataFrame(all_depth_points)
        output_file = output_dir / f"merged_depth_points_{timestamp}.csv"
        df_depth.to_csv(output_file, index=False)
        results['depth_points'] = output_file
        logger.info(f"Saved depth points CSV: {output_file} ({len(df_depth)} rows)")
    
    if all_summaries and ('summary' in formats or 'all' in formats):
        df_summary = pd.DataFrame(all_summaries)
        output_file = output_dir / f"merged_summary_{timestamp}.csv"
        df_summary.to_csv(output_file, index=False)
        results['summary'] = output_file
        logger.info(f"Saved summary CSV: {output_file} ({len(df_summary)} rows)")
    
    # Generate processing report
    report = {
        'total_files_found': len(json_files),
        'files_processed': processed_files,
        'files_failed': failed_files,
        'total_detections': len(all_detections),
        'total_depth_points': len(all_depth_points),
        'output_files': results
    }
    
    return report

def main():
    parser = argparse.ArgumentParser(description='Merge all JSON detection files in a folder to CSV')
    parser.add_argument('folder_path', help='Path to folder containing JSON files')
    parser.add_argument('--output_dir', '-o', default='.', help='Output directory (default: current directory)')
    parser.add_argument('--pattern', '-p', default='*.json', help='File pattern to match (default: *.json)')
    parser.add_argument('--format', '-f', nargs='+', 
                       choices=['detections', 'depth_points', 'summary', 'all'], 
                       default=['all'], help='Output formats (default: all)')
    parser.add_argument('--recursive', '-r', action='store_true', 
                       help='Search subfolders recursively')
    
    args = parser.parse_args()
    
    if args.recursive:
        pattern = f"**/{args.pattern}"
    else:
        pattern = args.pattern
    
    print(f"Merging JSON files from: {args.folder_path}")
    print(f"Pattern: {pattern}")
    print(f"Output directory: {args.output_dir}")
    print(f"Formats: {args.format}")
    print("-" * 50)
    
    report = merge_json_folder_to_csv(
        folder_path=args.folder_path,
        output_dir=args.output_dir,
        pattern=pattern,
        formats=args.format
    )
    
    print("\n" + "="*50)
    print("PROCESSING REPORT")
    print("="*50)
    print(f"Files found: {report['total_files_found']}")
    print(f"Files processed: {report['files_processed']}")
    print(f"Files failed: {report['files_failed']}")
    print(f"Total detections: {report['total_detections']}")
    print(f"Total depth points: {report['total_depth_points']}")
    print(f"Output files: {list(report['output_files'].values())}")

# Example usage functions
def quick_merge(folder_path, output_dir="."):
    """Quick merge with default settings"""
    return merge_json_folder_to_csv(folder_path, output_dir, formats=['all'])

def merge_detections_only(folder_path, output_dir="."):
    """Merge only detection data"""
    return merge_json_folder_to_csv(folder_path, output_dir, formats=['detections'])



In [20]:
report = quick_merge('../training_data/8000', './output')
# print(f"Processed {report['files_proc/'essed']} files")



In [1]:
%pwd


'/home/lworakan/Documents/GitHub/FIBOXVISION2025/main'