In [None]:
import subprocess
import os
import pandas as pd
from pathlib import Path
import pynapple as nap
import h5py
import logging
import numpy as np
from scipy.stats import zscore
import matplotlib.pyplot as plt
from caiman.source_extraction.cnmf.cnmf import load_CNMF

In [None]:
def find_file(directory, substrings=None, extension=".csv", recursive=False):
    """
    Find a single file in a directory based on specified substrings and extension.
    """
    substrings = substrings or ["timestamps"]
    directory = Path(directory)

    if not directory.exists():
        raise FileNotFoundError(f"Directory {directory} does not exist.")

    files = directory.rglob("*") if recursive else directory.iterdir()
    matched_files = [
        file for file in files
        if file.is_file()
        and all(sub.lower() in file.name.lower() for sub in substrings)
        and file.suffix.lower() == extension.lower()
    ]

    if not matched_files:
        raise FileNotFoundError(
            f"No file found in {directory} matching substrings {substrings} and extension '{extension}'."
        )
    if len(matched_files) > 1:
        raise ValueError(
            f"Multiple files found in {directory} matching substrings {substrings} and extension '{extension}': "
            f"{', '.join(f.name for f in matched_files)}"
        )
    return matched_files[0]


def load_dlc_data(dlc_dir):
    """
    Load DLC data by searching for specific files.
    """
    try:
        dlc_file = find_file(dlc_dir, ["linearMay29shuffle1", "filtered"], ".h5")
        return pd.read_hdf(dlc_file), dlc_file.name
    except (FileNotFoundError, ValueError) as e:
        logging.error(f"DLC data loading failed: {e}")
        return None, None


def load_caiman_data(caiman_dir):
    """
    Load Caiman results from the directory.
    """
    caiman_file = caiman_dir / "caiman_results.hdf5"
    if caiman_file.exists():
        try:
            return load_CNMF(str(caiman_file)), caiman_file.name
        except Exception as e:
            logging.error(f"Error loading Caiman data: {e}")
    else:
        logging.error(f"Caiman results file not found at {caiman_file}")
    return None, None


def load_session_data_old(base_dir, date_number):
    """
    Load DLC results, Caiman results, and timestamps for a given session.
    """
    session_dir = Path(base_dir) / str(date_number)
    data = {}

    # Load DLC data
    data["dlc"], data["dlc_filename"] = load_dlc_data(session_dir / "dlc")

    # Load Caiman data
    data["caiman"], data["caiman_filename"] = load_caiman_data(session_dir / "caiman")

    # Load Timestamps
    try:
        timestamp_file = find_file(session_dir, ["timestamps"], ".csv")
        data["timestamps"] = pd.read_csv(timestamp_file)
        data["timestamps_filename"] = timestamp_file.name
    except (FileNotFoundError, ValueError) as e:
        logging.error(f"Timestamps loading failed: {e}")
        data["timestamps"], data["timestamps_filename"] = None, None

    return data

def load_session_data(base_dir):
    """
    Load DLC results, Caiman results, and timestamps for a given session.
    """
    session_dir = Path(base_dir)
    data = {}

    # Load DLC data
    data["dlc"], data["dlc_filename"] = load_dlc_data(session_dir / "dlc")

    return data


In [None]:
# Define base directory and date number
base_dir = "/media/toor/T7Shield/AgingMiceNWB/sub-Mouse1637"
date_number = 20240628
Mouse_id = 'Mouse1637'

# Load session data
session_data = load_session_data_old(base_dir, date_number)

In [None]:
# Extract data from session data
dlc_data = session_data.get('dlc')
dlc_filename = session_data.get('dlc_filename')

caiman_data = session_data.get('caiman')
caiman_filename = session_data.get('caiman_filename')

timestamps = session_data.get('timestamps')
timestamps_filename = session_data.get('timestamps_filename')

In [None]:
display(dlc_data.head())

In [None]:
# choose bodypart with higest confidence (loosely defined as the bodypart with the highest likelihood average)
# usually it's just bodypart 2
likelihood_averages = {
    "bodypart1": dlc_data.iloc[:, 2].astype(float).mean(),
    "bodypart2": dlc_data.iloc[:, 5].astype(float).mean(),
    "bodypart3": dlc_data.iloc[:, 8].astype(float).mean(),
    "objectA": dlc_data.iloc[:, 11].astype(float).mean()
}
highest_confidence = max(likelihood_averages, key=likelihood_averages.get)
print(str(highest_confidence))

In [None]:
# Extract bodypart1's x and y coordinates, starting from the third row (index 2)
bodypart2_x = dlc_data.iloc[:, 3].astype(float)  # x coordinates
bodypart2_y = dlc_data.iloc[:, 4].astype(float)  # y coordinates
bodypart2_likelihood = dlc_data.iloc[:, 5].astype(float)  
frame_idx = dlc_data.index.astype(float)  

In [None]:
# Define a likelihood threshold
likelihood_threshold = 0.9

# Create a mask for low-likelihood points
low_likelihood_mask = bodypart2_likelihood < likelihood_threshold

# Handle low-likelihood points: Option 1 - Set them to NaN
bodypart2_x[low_likelihood_mask] = None
bodypart2_y[low_likelihood_mask] = None

# Interpolate missing values for continuity =  calculate from previous and next values
# Ensures data remain continuous and smooth
bodypart2_x = bodypart2_x.interpolate()
bodypart2_y = bodypart2_y.interpolate()

# Display a summary
print(f"Total points: {len(bodypart2_likelihood)}")
print(f"Low-likelihood points: {low_likelihood_mask.sum()}")

In [None]:
import plotly.graph_objects as go

# Create an interactive line chart
fig = go.Figure()

# Add the data for the plot
fig.add_trace(go.Scatter(y=bodypart2_y, mode='lines', name='X Coordinates'))

# Add titles and labels
fig.update_layout(
    title='Bodypart1 X Coordinate Over Time',
    xaxis_title='Time (frames)',
    yaxis_title='X Coordinates'
)

# Show the interactive plot
fig.show()


#### linear track start time based on y thrshold -  I think this is enough

In [None]:
first_surpass_index = bodypart2_y[bodypart2_y < 730].index[0]
frame_rate = 25
start_time_seconds = first_surpass_index/frame_rate

### Simple Video Trimming for a single directory

In [None]:
# Base directory
base_directory = "/media/toor/T7Shield/AgingMiceNWB/sub-Mouse1637/ThirdRound/Linear"

likelihood_threshold = 0.9
frame_rate = 25

# Directory to change to before running the bash script
bash_working_directory = "/home/toor/Desktop/stability-preprocessing/"
os.chdir(bash_working_directory)

In [None]:
for root, dirs, files in os.walk(base_directory, topdown=False):    
    # dlc_file = None
    miniscope_file = None
    behavior_file = None

    for file in files:
        if file.endswith(".h5") and 'filtered' in file:
            print(file)
            dlc_file = os.path.join(root, file)
            dlc_data = pd.read_hdf(dlc_file)
            bodypart2_y = dlc_data.iloc[:, 4].astype(float)  # y coordinates
            bodypart2_likelihood = dlc_data.iloc[:, 5].astype(float)
            low_likelihood_mask = bodypart2_likelihood < likelihood_threshold
            bodypart2_y[low_likelihood_mask] = None
            bodypart2_y = bodypart2_y.interpolate()
            first_surpass_index = bodypart2_y[bodypart2_y < 715].index[0]
            start_time_seconds = first_surpass_index/frame_rate 
            # # DELETE AFTER VISUAL CHECK
            # fig = go.Figure()
            # fig.add_trace(go.Scatter(y=bodypart2_y, mode='lines', name='X Coordinates'))
            # # Add titles and labels
            # fig.update_layout(
            #     title=str(file),
            #     xaxis_title='Time (frames)',
            #     yaxis_title='X Coordinates')
            # fig.show()
            # print(str(first_surpass_index)) 
            # #STOP DELETING   
        # Look for the video files in the current directory
        if "miniscope" in file and file.endswith(".avi"):
            miniscope_file = os.path.join(root, file)
            # print(f"Miniscope file found: {miniscope_file}")
        elif file.startswith("behavior") and not file.startswith("behaviorLinear") and file.endswith(".avi"):
            behavior_file = os.path.join(root, file)
            # print(f"Behavior file found: {behavior_file}")

        # Check if all required files are found
    if miniscope_file and behavior_file and dlc_file:
        print(str(start_time_seconds))
        print(str(dlc_file))
        print(str(miniscope_file))
        print(str(behavior_file))   
    # Trim each video file to 15 minutes using ffmpeg
        for video_file in [miniscope_file, behavior_file]:  # no need to trim home cage video
            # Define output file name
            output_file = os.path.join(
                root, 
                f"/media/toor/T7Shield/Mouse1637/Linear/Linear_{os.path.basename(video_file)}"
            )
            
            # Construct the ffmpeg command
            ffmpeg_command = [
                "ffmpeg",
                "-n",  # Overwrite output files without asking
                "-ss", str(start_time_seconds),  # Start time in seconds
                "-i", video_file,          # Input file
                "-t", "900",             # Duration (15 minutes in seconds)
                "-c:v", "copy",           # Copy video codec (no re-encoding)
                "-c:a", "copy",           # Copy audio codec (no re-encoding)
                output_file               # Output file
            ]
            
            print(f"Trimming video: {' '.join(ffmpeg_command)}")
            try:
                subprocess.run(ffmpeg_command, check=True)
            except subprocess.CalledProcessError as e:
                print(f"Error while trimming video {video_file}: {e}")
    else:
        print(f"Missing required files in directory: {root}")
        if not miniscope_file:
            print("  - Miniscope file is missing.")
        if not behavior_file:
            print("  - Behavior file is missing.")
    