In [None]:
from pathlib import Path
import h5py
import matplotlib.pyplot as plt
import numpy as np

from scipy.optimize import curve_fit

from topostats.io import hdf5_to_dict
import h5glance

In [None]:
data_dir = Path("/Users/sylvi/topo_data/pleng/data/")
assert data_dir.exists()
filename = "20231218_2ngSCcats.0_00003.topostats"
file_path = data_dir / filename
assert file_path.exists()

with h5py.File(file_path, "r") as f:
    image_data = hdf5_to_dict(f, group_path="/")
    print(image_data.keys())
    spline_data = image_data["splining"]["above"]
    p_to_nm = image_data["pixel_to_nm_scaling"]
    print(f"pixel to nm scaling: {p_to_nm}")

    image = image_data["image"]
    plt.imshow(image)
    plt.show()

    spline_data = spline_data["grain_0"]["mol_0"]
    print(spline_data.keys())
    spline_coords = spline_data["spline_coords"]

    # plot spline coords
    plt.plot(spline_coords[:, 0], spline_coords[:, 1])

In [None]:
custom_coords = np.array(
    [
        [50, 50],
        [50, 60],
        [60, 70],
        [70, 60],
        [70, 50],
        [70, 40],
        [70, 30],
        [60, 20],
        [50, 30],
        [50, 40],
    ]
)
fig, ax = plt.subplots(figsize=(5, 5))
ax.plot(custom_coords[:, 0], custom_coords[:, 1], "o", color="red")
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)
plt.show()

# interpolate the data
from scipy.interpolate import splprep, splev

tck, u = splprep(custom_coords.T, s=0, per=True)
custom_coords = splev(np.linspace(0, 1, 100), tck)
fig, ax = plt.subplots(figsize=(5, 5))
ax.plot(custom_coords[0], custom_coords[1], "-", color="red")
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)
plt.show()

custom_coords = np.array(custom_coords).T
print(custom_coords.shape)

In [None]:
def mypleng(points: np.ndarray, max_length: float):

    fig, ax = plt.subplots(figsize=(5, 5))

    ax.plot(points[:, 1], points[:, 0], c="red")
    ax.scatter(points[0, 1], points[0, 0], c="blue")

    # starting from the first vector, calculate the angle difference of each vector to the first one
    maximum_length_nm = 20
    current_length = 0
    group_count = 0

    distances = []
    angle_diffs = []

    distance_angle_diff_pairs = {}

    # first vector
    first_vector = points[1] - points[0]
    first_angle = np.arctan2(first_vector[1], first_vector[0])
    # iterate over the coordinates
    for coord_index, coord in enumerate(points):
        if coord_index < 2:
            continue
        if coord_index == len(points) - 1:
            break
        vector = coord - points[coord_index - 1]
        angle = np.arctan2(vector[1], vector[0])
        angle_diff = angle - first_angle
        # print(f"angle diff: {angle_diff}")

        # calculate the length of the vector
        vector_length = np.linalg.norm(vector)
        current_length += vector_length
        if current_length > maximum_length_nm:
            # set as new first vector
            first_vector = vector
            first_angle = angle
            # Add current distance angle diffs to list
            distance_angle_diff_pairs[coord_index] = {"distances": distances, "angle_diffs": angle_diffs}
            # reset current length to start counting up again
            current_length = 0
            distances = []
            angle_diffs = []
            # plot point on spline to mark it
            ax.scatter(coord[1], coord[0], c="green")
            group_count += 1
            # if group_count == 3:
            #     break
        else:
            distances.append(current_length)
            angle_diffs.append(angle_diff)

    ax.set_aspect("equal")
    plt.show()

    print(distance_angle_diff_pairs.keys())

    # distances_156, angle_diffs_156 = distance_angle_diff_pairs[156]["distances"], distance_angle_diff_pairs[156]["angle_diffs"]
    # print(f"distances: {distances_156}")
    # print(f"angle diffs: {angle_diffs_156}")
    # cos_angle_diffs_156 = np.cos(angle_diffs_156)
    # print(f"cos angle diffs: {cos_angle_diffs_156}")

    # plot angle difference as a function of distance
    fig, ax = plt.subplots()
    for coord_index, distance_angle_diffs_result in distance_angle_diff_pairs.items():
        angle_diffs = distance_angle_diffs_result["angle_diffs"]
        angle_diffs = np.cos(angle_diffs)
        angle_diffs = -np.log(angle_diffs)
        distances = distance_angle_diffs_result["distances"]
        ax.plot(distances, angle_diffs, label=f"point {coord_index}")
    ax.legend()
    plt.show()

    # create an average of the angle differences despite not having the same lengths
    # calculate the average of the angle differences
    average_angle_diffs = []
    for coord_index, distance_angle_diffs_result in distance_angle_diff_pairs.items():
        angle_diffs = distance_angle_diffs_result["angle_diffs"]
        angle_diffs = np.cos(angle_diffs)
        angle_diffs = -np.log(angle_diffs)
        distances = distance_angle_diffs_result["distances"]
        # interpolate the angle differences to a common distance
        interpolated_angle_diffs = np.interp(np.arange(0, maximum_length_nm, 0.1), distances, angle_diffs)
        average_angle_diffs.append(interpolated_angle_diffs)
    average_angle_diffs = np.mean(average_angle_diffs, axis=0)
    fig, ax = plt.subplots()
    ax.plot(np.arange(0, maximum_length_nm, 0.1), average_angle_diffs)
    plt.show()

    # fit a line to the average angle differences
    from scipy.optimize import curve_fit

    def linear(x, a, b):
        return a * x + b

    popt, pcov = curve_fit(linear, np.arange(0, maximum_length_nm, 0.1), average_angle_diffs)
    print(f"popt: {popt} = 1/2p. p = {1/(2*popt[0])}")
    fig, ax = plt.subplots()
    ax.plot(np.arange(0, maximum_length_nm, 0.1), linear(np.arange(0, maximum_length_nm, 0.1), *popt))
    ax.plot(np.arange(0, maximum_length_nm, 0.1), average_angle_diffs)
    plt.show()


mypleng(custom_coords, 10)

In [None]:
vector1 = np.array([1, 0], dtype=float)
vector2 = np.array([-1, 1], dtype=float)
# normalise
vector1 /= np.linalg.norm(vector1)
vector2 /= np.linalg.norm(vector2)

dotproduct = np.dot(vector1, vector2)
# the dot product of two vectors is equal to the cos(angle) between the vectors

fig, ax = plt.subplots(figsize=(5, 5))
ax.plot([0, vector1[1]], [0, vector1[0]], c="red")
ax.plot([0, vector2[1]], [0, vector2[0]], c="blue")
ax.set_title(f"dot product: {dotproduct}")
ax.set_xlim(-1, 1)
ax.set_ylim(-1, 1)
plt.show()

In [None]:
def betterpleng(points: np.ndarray, maximum_length_nm: float, plot=False) -> tuple[float, float]:
    if plot:
        fig, ax = plt.subplots(figsize=(5, 5))
        ax.plot(points[:, 1], points[:, 0], c="red")
        ax.scatter(points[0, 1], points[0, 0], c="blue")

    current_distance = 0

    distance_cos_angle_pairs = {}
    distances = []
    cos_angles = []

    # iterate over each point
    first_vector = points[1] - points[0]
    first_vector /= np.linalg.norm(first_vector)
    for point_index, point in enumerate(points):
        if point_index <= 2:
            continue
        if point_index == len(points) - 1:
            # don't include the last set of points since they're not complete
            break
        vector = point - points[point_index - 1]
        vector_distance = np.linalg.norm(vector)
        vector /= vector_distance
        cos_angle = np.dot(first_vector, vector)
        if cos_angle == 0:
            print("[warning] cos(angle) is orthogonal to first vector, can't be fitted to log plot")
        elif cos_angle < 0:
            print("[warning] cos(angle) is negative, can't be fitted to log plot")
        current_distance += vector_distance
        if current_distance > maximum_length_nm:
            # reset this section and add stats to dictionary
            first_vector = vector
            current_distance = 0
            # add the distances and cos angles to the list
            distance_cos_angle_pairs[point_index] = {"distances": distances, "cos_angles": cos_angles}
            distances = []
            cos_angles = []
            if plot:
                ax.scatter(point[1], point[0], c="green")
        else:
            if plot:
                ax.scatter(point[1], point[0], c="blue")
            distances.append(current_distance)
            cos_angles.append(cos_angle)

    if plot:
        ax.set_aspect("equal")
        plt.show()

    # plot the distances and cos angles
    if plot:
        fig, ax = plt.subplots()
    for point_index, distance_cos_angle_pair in distance_cos_angle_pairs.items():
        distances = distance_cos_angle_pair["distances"]
        cos_angles = distance_cos_angle_pair["cos_angles"]
        if plot:
            ax.plot(distances, cos_angles, label=f"point {point_index}")
    if plot:
        ax.legend()
        plt.show()

    # calculate the average of the cos angles
    average_cos_angles = []
    for point_index, distance_cos_angle_pair in distance_cos_angle_pairs.items():
        cos_angles = distance_cos_angle_pair["cos_angles"]
        distances = distance_cos_angle_pair["distances"]
        interpolated_cos_angles = np.interp(np.arange(0, maximum_length_nm, 0.1), distances, cos_angles)
        average_cos_angles.append(interpolated_cos_angles)
    average_cos_angles = np.mean(average_cos_angles, axis=0)

    inverse_log_cos_angles = -np.log(average_cos_angles)

    if plot:
        fig, ax = plt.subplots()
        ax.plot(np.arange(0, maximum_length_nm, 0.1), inverse_log_cos_angles)
        plt.show()

    def linear(x, a, b):
        return a * x + b

    popt, pcov = curve_fit(linear, np.arange(0, maximum_length_nm, 0.1), inverse_log_cos_angles)

    if plot:
        fig, ax = plt.subplots()
        ax.plot(np.arange(0, maximum_length_nm, 0.1), linear(np.arange(0, maximum_length_nm, 0.1), *popt))
        ax.plot(np.arange(0, maximum_length_nm, 0.1), inverse_log_cos_angles)
        plt.show()

    # the slope is 1/(2p) where p is the pleng
    pleng = 1 / (2 * popt[0])
    print(f"pleng: {pleng}")

    # what is a good way to measure confidence in the fit?
    # one way is to calculate the residuals of the fit
    residuals = inverse_log_cos_angles - linear(np.arange(0, maximum_length_nm, 0.1), *popt)
    if plot:
        fig, ax = plt.subplots()
        ax.plot(np.arange(0, maximum_length_nm, 0.1), residuals)
        plt.show()

    # another way is to calculate the covariance matrix
    print(pcov)
    # the diagonal of the covariance matrix gives the variance of the parameters
    # the square root of the variance gives the standard deviation
    print(np.sqrt(np.diag(pcov)))

    # calculate root mean squared error
    rmse = np.sqrt(np.mean(residuals**2))
    print(f"rmse: {rmse}")

    return pleng, rmse


# betterpleng(custom_coords, 10)

In [None]:
best_pleng = None
best_rmse = None
best_max_length = None
for maximum_length_nm in [10, 20, 30, 40, 50]:
    pleng, rmse = betterpleng(spline_coords, maximum_length_nm, plot=False)
    # check if better rmse
    if best_rmse is None or rmse < best_rmse:
        best_rmse = rmse
        best_pleng = pleng
        best_max_length = maximum_length_nm

pleng, rmse = betterpleng(spline_coords, maximum_length_nm=best_max_length, plot=True)

print(f"best pleng: {pleng} with rmse: {rmse} at maximum length: {best_max_length}")