In [2]:
import os
import cv2
import pandas as pd

def get_video_frame_count(video_path):
    """Get the total number of frames in a video"""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video file: {video_path}")
        return None
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.release()
    return frame_count

def check_video_and_dlc(folder_path, output_filename="dlc_check_results.xlsx"):
    """Check all .avi videos and their corresponding DLC files in the folder"""
    # Store results
    results = []
    
    # Get all .avi files
    avi_files = [f for f in os.listdir(folder_path) if f.endswith('.avi')]
    
    # DLC file suffix pattern
    dlc_suffix = "DLC_DlcrnetStride32Ms5_CR_implant_DLCnetNov30shuffle1_snapshot_350_sk_filtered.h5"
    
    for avi_file in avi_files:
        print(f"\nProcessing video: {avi_file}")
        
        # Construct the corresponding DLC filename
        base_name = avi_file.replace('.avi', '')
        dlc_file = f"{base_name}{dlc_suffix}"
        dlc_path = os.path.join(folder_path, dlc_file)
        
        # Check if DLC file exists
        if not os.path.exists(dlc_path):
            print(f"Corresponding DLC file not found: {dlc_file}")
            results.append({
                "Video File": avi_file,
                "DLC File": "Not found",
                "Frame Count": None,
                "Coords Length": None,
                "Match": None,
                "Error": "DLC file missing"
            })
            continue
        
        try:
            # Get video frame count
            video_path = os.path.join(folder_path, avi_file)
            frame_count = get_video_frame_count(video_path)
            if frame_count is None:
                results.append({
                    "Video File": avi_file,
                    "DLC File": dlc_file,
                    "Frame Count": None,
                    "Coords Length": None,
                    "Match": None,
                    "Error": "Failed to read video"
                })
                continue
                
            # Read DLC file
            df = pd.read_hdf(dlc_path)
            bodyparts = df.columns.get_level_values("bodyparts").unique().to_list()
            scorer = df.columns.get_level_values(0)[0]
            
            # Get coordinate data (using snout x-coordinate as example)
            coords = df[scorer, 'individual1'][[('snout', 'x')]]
            coords_length = len(coords)
            
            # Compare frame count and coords length
            match = frame_count == coords_length
            print(f"Frame count: {frame_count}")
            print(f"Coords length: {coords_length}")
            print(f"Match: {match}")
            
            # Store result
            results.append({
                "Video File": avi_file,
                "DLC File": dlc_file,
                "Frame Count": frame_count,
                "Coords Length": coords_length,
                "Match": match,
                "Error": None
            })
            
        except Exception as e:
            print(f"Error processing file: {dlc_file}, Error: {str(e)}")
            results.append({
                "Video File": avi_file,
                "DLC File": dlc_file,
                "Frame Count": None,
                "Coords Length": None,
                "Match": None,
                "Error": str(e)
            })
    
    # Define the full output path for the Excel file
    output_excel = os.path.join(folder_path, output_filename)
    
    # Convert results to DataFrame and save to Excel
    results_df = pd.DataFrame(results)
    results_df.to_excel(output_excel, index=False)
    print(f"\nResults saved to {output_excel}")
    
    return results_df

# Example usage
folder_path = r"S:/Sachuriga/Ephys_Vedio/CR_CA1/pytorch_model/"  # Replace with your actual folder path
check_video_and_dlc(folder_path, output_filename="dlc_check_results.xlsx")


Processing video: 63383_Open_Field_50Hz_A2024-07-13T14_10_11.avi
Frame count: 76908
Coords length: 76908
Match: True

Processing video: 63383_Open_Field_50Hz_A2024-07-15T14_51_23.avi
Frame count: 60307
Coords length: 60307
Match: True

Processing video: 63383_Open_Field_50Hz_A2024-07-18T14_13_16.avi
Frame count: 59875
Coords length: 59875
Match: True

Processing video: 63383_Open_Field_50Hz_A2024-07-20T14_22_03.avi
Frame count: 61160
Coords length: 61160
Match: True

Processing video: 63383_Open_Field_50Hz_A2024-07-22T15_12_15.avi
Frame count: 62363
Coords length: 62363
Match: True

Processing video: 63383_Open_Field_50Hz_A2024-07-25T12_57_53.avi
Frame count: 64462
Coords length: 64462
Match: True

Processing video: 63383_Open_Field_50Hz_A2024-07-28T12_22_35.avi
Frame count: 63316
Coords length: 63316
Match: True

Processing video: 63383_Open_Field_50Hz_A2024-07-30T12_34_07.avi
Frame count: 61606
Coords length: 61606
Match: True

Processing video: 63383_Open_Field_50Hz_B2024-07-13T14_

Unnamed: 0,Video File,DLC File,Frame Count,Coords Length,Match,Error
0,63383_Open_Field_50Hz_A2024-07-13T14_10_11.avi,63383_Open_Field_50Hz_A2024-07-13T14_10_11DLC_...,76908,76908,True,
1,63383_Open_Field_50Hz_A2024-07-15T14_51_23.avi,63383_Open_Field_50Hz_A2024-07-15T14_51_23DLC_...,60307,60307,True,
2,63383_Open_Field_50Hz_A2024-07-18T14_13_16.avi,63383_Open_Field_50Hz_A2024-07-18T14_13_16DLC_...,59875,59875,True,
3,63383_Open_Field_50Hz_A2024-07-20T14_22_03.avi,63383_Open_Field_50Hz_A2024-07-20T14_22_03DLC_...,61160,61160,True,
4,63383_Open_Field_50Hz_A2024-07-22T15_12_15.avi,63383_Open_Field_50Hz_A2024-07-22T15_12_15DLC_...,62363,62363,True,
...,...,...,...,...,...,...
316,66537_Open_Field_50Hz_C2024-12-19T17_30_32.avi,66537_Open_Field_50Hz_C2024-12-19T17_30_32DLC_...,63185,63185,True,
317,66537_Open_Field_50Hz_C2024-12-21T16_10_44.avi,66537_Open_Field_50Hz_C2024-12-21T16_10_44DLC_...,73698,73698,True,
318,66537_Open_Field_50Hz_C2024-12-22T16_19_49.avi,66537_Open_Field_50Hz_C2024-12-22T16_19_49DLC_...,60770,60770,True,
319,66538_Open_Field_50Hz_A2024-12-21T14_48_49.avi,66538_Open_Field_50Hz_A2024-12-21T14_48_49DLC_...,60507,60507,True,
