In [None]:
from pathlib import Path
import re
import numpy as np
import numpy.typing as npt
import matplotlib.pyplot as plt
from lumicks import pylake
from enum import Enum

In [None]:
data_dirs = [Path("/Users/sylvi/optical_data/loading_markers/data")]

for data_dir in data_dirs:
    assert data_dir.exists(), f"Data directory {data_dir} does not exist."

output_folder = Path("/Users/sylvi/optical_data/loading_markers/processed/")

markers = {}

for data_dir in data_dirs:
    for file in data_dir.glob("*.h5"):
        if file.is_file() and "Marker" in file.name:
            print(f"Loading file {file}")
            marker_data = pylake.File(file)
            print(type(marker_data))

            # get metadata from the file name
            tel_reps = re.search(r"Tel(\d+)", file.name)
            if tel_reps:
                tel_reps = int(tel_reps.group(1))
            else:
                raise ValueError(f"Could not find telereps in file name {file.name}")

            protein_name = re.search(r" (\w+)(?= Marker \d+)", file.name)
            if protein_name:
                protein_name = protein_name.group(1)
            else:
                raise ValueError(f"Could not find protein name in file name {file.name}")

            # extract the curves
            for curve_id, curve_data in marker_data.fdcurves.items():
                print(f"Curve ID: {curve_id}")
                force_data = curve_data.f.data
                curve_data.plot_scatter()
                plt.title(f"Force-distance plot from built-in plot_scatter() method")
                plt.show()
                print(curve_data.f)
                plt.plot(force_data)
                plt.title(f"Force data retrieved via file.fdcurves[{curve_id}].f.data")
                plt.show()

                distance_data = curve_data.d.data
                plt.plot(distance_data)
                plt.title(f"Distance data retrieved via file.fdcurves[{curve_id}].d.data")
                plt.show()

                plt.plot(distance_data, force_data)
                plt.title(f"Force vs Distance for curve {curve_id}")
                plt.xlabel("Distance (nm)")
                plt.ylabel("Force (pN)")
                plt.show()

                plt.scatter(distance_data, force_data, s=8)
                plt.title(f"Force vs Distance Scatter for curve {curve_id}")
                plt.xlabel("Distance (nm)")
                plt.ylabel("Force (pN)")
                plt.show()

                distance_diffs = np.diff(distance_data)
                plt.plot(distance_diffs)
                plt.title(f"Distance differences for curve {curve_id}")
                plt.xlabel("Index")
                plt.ylabel("Distance difference (nm)")
                plt.show()

                distance_diff_direction_threshold = 0.02
                distance_increasing = distance_diffs > distance_diff_direction_threshold
                distance_decreasing = distance_diffs < -distance_diff_direction_threshold
                distance_stable = np.abs(distance_diffs) <= distance_diff_direction_threshold
                distance_state = np.zeros_like(distance_diffs)
                distance_state[distance_increasing] = 1
                distance_state[distance_decreasing] = -1
                plt.scatter(range(len(distance_state)), distance_state, s=8)
                plt.title(f"Distance state for curve {curve_id}")
                plt.xlabel("Index")
                plt.ylabel("State (1: increasing, -1: decreasing, 0: stable)")
                plt.show()
                break
        break
        
    break