In [None]:
import sys
sys.path.append("../..")

import pickle
import numpy as np
import matplotlib.pyplot as plt

def dictionary_to_matrix(dictionary: dict, ordered_keys: list) -> np.ndarray:
    # Create a map from key to index
    key_to_index = {key: index for index, key in enumerate(ordered_keys)}

    # Initialize the matrix
    size = len(ordered_keys)
    matrix = np.ones((size, size)) * -1 

    # Populate the matrix
    for (key1, key2), value in dictionary.items():
        i = key_to_index[key1]
        j = key_to_index[key2]
        matrix[i][j] = value
        matrix[j][i] = value  
    
    assert np.all(matrix >= 0), "Matrix was not populated correctly"

    return matrix, key_to_index


def load_data(file_path: str) -> dict:
    with open(file_path, 'rb') as f:
        return pickle.load(f) 


from sklearn.metrics import silhouette_score

def calculate_silhouette_score(distance_matrix: np.ndarray, true_labels: list) -> float:
    return silhouette_score(distance_matrix, true_labels, metric="precomputed")

motions = np.array(['Forward Jump']*9 + ['Run/Jog']*9 + ['Walk']*10 + ['Boxing']*9 + ['Climb Stairs']*7)

In [None]:
# 9 movements 
FORWARD_JUMP = {
    "16_05.amc" : {"start":90, "end":220},
    "16_06.amc" : {"start":200, "end":330},
    "16_07.amc" : {"start":200, "end":330},
    "16_09.amc" : {"start":240, "end":370},
    "16_10.amc" : {"start":260, "end":390},
    "13_11.amc" : {"start":190, "end":320},
    "13_13.amc" : {"start":160, "end":290},
    "13_19.amc" : {"start":205, "end":335},
    "13_32.amc" : {"start":125, "end":255},
}

# 9 movements
RUN_JOG = {
    "16_45.amc" : {"start": 0, "end": 130},
    "16_46.amc" : {"start": 0, "end": 130},
    "35_26.amc" : {"start": 0, "end": 130},
    "35_22.amc" : {"start": 0, "end": 130},
    "16_35.amc" : {"start": 0, "end": 130},
    "16_36.amc" : {"start": 0, "end": 130},
    "35_18.amc" : {"start": 0, "end": 130},
    "02_03.amc" : {"start": 0, "end": 130},
    "16_56.amc" : {"start": 0, "end": 130},
}

# 10 movements
WALK = {
    "16_16.amc" : {"start": 0, "end": 130},
    "35_12.amc" : {"start": 0, "end": 130},
    "16_58.amc" : {"start": 0, "end": 130 },
    "35_32.amc" : {"start": 0, "end": 130 },
    "35_11.amc" : {"start": 0, "end": 130 },
    "16_21.amc" : {"start": 0, "end": 130 },
    "16_22.amc" : {"start": 0, "end": 130 },
    "16_15.amc" : {"start": 40, "end": 170 },
    "16_31.amc" : {"start": 40, "end": 170 },
    "16_47.amc" : {"start": 40, "end": 170 },
}

# 9 movements
BOXING = {
    "13_17.amc" : {"start": 30, "end": 160},
    "13_18.amc" : {"start": 30, "end": 160},
    "14_01.amc" : {"start": 40, "end": 170},
    "14_02.amc" : {"start": 40, "end": 170},
    "14_03.amc" : {"start": 80, "end": 210},
    "15_13.amc" : {"start": 80, "end": 210},
    "17_10.amc" : {"start": 80, "end": 210},
    "15_04.amc" : {"start": 22200, "end": 22330},
    "15_05.amc" : {"start": 22400, "end": 22530},
}

# 7 movements
CLIMB_STAIRS = {
    "13_35.amc" : {"start": 200, "end": 330},
    "13_36.amc" : {"start": 230, "end": 360},
    "13_37.amc" : {"start": 220, "end": 350},
    "13_38.amc" : {"start": 220, "end": 350},
    "14_21.amc" : {"start": 220, "end": 350},
    "14_22.amc" : {"start": 220, "end": 350},
    "14_23.amc" : {"start": 220, "end": 350},
}
# ordered_keys = list(FORWARD_JUMP.keys()) + list(RUN_JOG.keys()) + list(WALK.keys())
ordered_keys = list(FORWARD_JUMP.keys()) + list(RUN_JOG.keys()) + list(WALK.keys() ) + list(BOXING.keys()) + list(CLIMB_STAIRS.keys())


In [None]:
import numpy as np
import matplotlib.pyplot as plt
my_list = [1]*9 + [2]*9 + [3]*10 + [4]*9 + [5]*7
my_list = np.array(my_list)

matrix = np.zeros((len(my_list), len(my_list)))
for i, val1 in enumerate(my_list):
    for j, val2 in enumerate(my_list):
        if val1 == val2:
            matrix[i][j] = 0
        else:
            matrix[i][j] = my_list[i] * my_list[j]

mapping = { 0. : 0,
            2. : 11,
            3. : 12, 
            4. : 13,
            5. : 14,
            6. : 15,
            8. : 16,
            10.: 17,
            12.: 18,
            15.: 19,
            20.: 20,
            }
def map_values(x):
    return mapping.get(x, x)

# Vectorize the function
vfunc = np.vectorize(map_values)

# Apply the function to the matrix
mapped_matrix = vfunc(matrix)

print(np.unique(matrix))


plt.figure(figsize=(8, 5))
plt.imshow(mapped_matrix, cmap='hot', interpolation='nearest')
plt.colorbar()
# plt.show()
# path = "../../Report/figures/motion-capture-data/"
# plt.savefig(f"{path}/example_heatmap.png", bbox_inches='tight', pad_inches=0)

In [None]:
depth = 10
level = 3
# path = f'pickle_data/reparameterized_distances/distances_{depth}.pkl'
path = f'pickle_data/logsig_distances/distances_{level}.pkl'
distances = load_data(path)

print(distances.keys())

matrix, key_to_index = dictionary_to_matrix(distances, ordered_keys)

print(key_to_index)

# Plot the distance matrix
plt.figure(figsize=(8, 5))
plt.imshow(matrix, cmap='hot', interpolation='nearest')
plt.colorbar()

path = "../../Report/figures/motion-capture-data/heatmaps"
# plt.savefig(f"{path}/logsig_{level}.png", bbox_inches='tight', pad_inches=0)
# plt.savefig(f"{path}/dynprog_{depth}.png", bbox_inches='tight', pad_inches=0)
plt.show()

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import pdist, squareform


# Standardize the data
scaler = StandardScaler()
X_standardized = scaler.fit_transform(matrix)

# Apply PCA
num_components = 4
pca = PCA(n_components=num_components)
X_reduced = pca.fit_transform(X_standardized)

print(X_reduced.shape)

# Compute the pairwise distance matrix
distance_matrix = squareform(pdist(X_reduced, metric='cosine'))

print(calculate_silhouette_score(matrix, motions))
print(calculate_silhouette_score(distance_matrix, motions))

# Visualize the distance matrix
plt.figure(figsize=(8, 5))
plt.imshow(distance_matrix, cmap='hot', interpolation='nearest')
plt.colorbar()
plt.show()
# plt.savefig(f"{path}/pca_{level}.png", bbox_inches='tight', pad_inches=0)

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.spatial.distance import pdist, squareform


# Standardize the data
scaler = StandardScaler()
X_standardized = scaler.fit_transform(matrix)

# Apply PCA
num_components = 2
pca = PCA(n_components=num_components)
X_reduced = pca.fit_transform(X_standardized)

plt.figure(figsize=(8, 5))
plt.scatter(X_reduced[:, 0], X_reduced[:, 1])
plt.show()


In [None]:
def cMDS(matrix: np.ndarray) -> np.ndarray:
    # Center the matrix
    n = len(matrix)
    J = np.eye(n) - np.ones((n, n)) / n
    B = -J.dot(matrix ** 2).dot(J) / 2

    # Diagonalize the matrix
    eigenvalues, eigenvectors = np.linalg.eig(B)
    eigenvalues = np.where((eigenvalues < 0) & (np.abs(eigenvalues) < 1e-6), 0, eigenvalues)

    #assert np.all(eigenvalues >= 0), f"Eigenvalues were not all positive {eigenvalues}"
    if not np.all(eigenvalues >= 0):
        print(f"Eigenvalues were not all positive {eigenvalues}")

    # Make sure all eigenvalues are positive
    eigenvalues = np.maximum(eigenvalues, 0)


    idx = eigenvalues.argsort()[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]

    # Compute the coordinates
    coordinates = eigenvectors * np.sqrt(eigenvalues)
    return coordinates

from sklearn.manifold import MDS

def perform_cMDS(matrix: np.ndarray, n_components: int = 2) -> np.ndarray:
    mds = MDS(n_components=n_components, dissimilarity='precomputed', metric=True)
    coordinates = mds.fit_transform(matrix)
    return coordinates

from sklearn.cluster import KMeans 

def cluster_distances(matrix: np.ndarray, number_of_clusters: int) -> list:
    kmeans = KMeans(n_clusters=number_of_clusters, n_init = 10, random_state=0).fit(matrix)
    labels = kmeans.labels_
    return labels

In [None]:
from sklearn_extra.cluster import KMedoids

# data_cmds = cMDS(matrix)
data_cmds = perform_cMDS(distance_matrix, n_components=2)

# Color the points based on the movement
colors = np.array([0] * 9 + [1] * 9 + [2] * 10 + [3] * 9 + [4] * 7)

from matplotlib.lines import Line2D

kmedoids = KMedoids(n_clusters=5, metric='precomputed', random_state=0, init = 'random', max_iter=1000)

kmedoids.fit(distance_matrix)
labels = kmedoids.labels_

markers = {0:'p', 1:'s', 2:'*', 3:'h', 4:'D'}
color_map = {0:'red', 1:'yellow', 2:'blue', 3:'orange', 4:'black'}
label_names = {0:'Forward Jump', 1:'Run/Jog', 2:'Walk', 3:'Boxing', 4:'Climbing Stairs'}

plt.figure(figsize=(8, 8))

for label in np.unique(colors):
    # Get the indices of the points that have the current label
    indices = np.where(colors == label)
    
    # Plot these points with a different marker and color
    plt.scatter(data_cmds[indices, 0], data_cmds[indices, 1], c=color_map[label], marker=markers[label])

# Create a list of Line2D objects to use in the legend
legend_elements = [Line2D([0], [0], marker=markers[i], color='w', markerfacecolor=color_map[i], markersize=10) for i in range(5)]

# Add a legend to the plot
plt.legend(legend_elements, [label_names[i] for i in range(5)], loc='upper center', bbox_to_anchor=(0.5, -0.005), fancybox=True, shadow=True, ncol=5)

for i, label in enumerate(labels):
    plt.annotate(label, (data_cmds[i, 0], data_cmds[i, 1]), xytext=(2, 2), textcoords='offset points')

# Remove ticks
plt.xticks([])
plt.yticks([])

plt.show()
path = "../../Report/figures/motion-capture-data/2d_plots"
# plt.savefig(f"{path}/logsig_level3_red.png", bbox_inches='tight', pad_inches=0.2)


In [None]:
import pandas as pd

depth = 10
path = f'pickle_data/reparameterized_distances/distances_{depth}.pkl'
distances = load_data(path)
matrix, key_to_index = dictionary_to_matrix(distances, ordered_keys)

kmedoids = KMedoids(n_clusters=5, metric='precomputed', random_state=0, init = 'random', max_iter=1000)
kmedoids.fit(matrix)
rep_labels = kmedoids.labels_

sil_score_rep = silhouette_score(matrix, rep_labels, metric='precomputed')

# Reduce the dimensionality of the data
scaler = StandardScaler()
X_standardized = scaler.fit_transform(matrix)
num_components = 4
pca = PCA(n_components=num_components)
X_reduced = pca.fit_transform(X_standardized)
distance_matrix = squareform(pdist(X_reduced, metric='cosine'))

kmedoids = KMedoids(n_clusters=5, metric='precomputed', random_state=0, init = 'random', max_iter=1000)
kmedoids.fit(distance_matrix)
rep_labels_red = kmedoids.labels_

sil_score_rep_red = silhouette_score(distance_matrix, rep_labels_red, metric='precomputed')

level = 3
path = f'pickle_data/logsig_distances/distances_{level}.pkl'
distances = load_data(path)
matrix, key_to_index = dictionary_to_matrix(distances, ordered_keys)

kmedoids = KMedoids(n_clusters=5, metric='precomputed', random_state=0, init = 'random', max_iter=1000)
kmedoids.fit(matrix)

logsig_labels = kmedoids.labels_
sil_score_logsig = silhouette_score(matrix, logsig_labels, metric='precomputed')

# Reduce the dimensionality of the data
scaler = StandardScaler()
X_standardized = scaler.fit_transform(matrix)
num_components = 4
pca = PCA(n_components=num_components)
X_reduced = pca.fit_transform(X_standardized)
distance_matrix = squareform(pdist(X_reduced, metric='cosine'))

kmedoids = KMedoids(n_clusters=5, metric='precomputed', random_state=0, init = 'random', max_iter=1000)
kmedoids.fit(distance_matrix)
logsig_labels_red = kmedoids.labels_

sil_score_logsig_red = silhouette_score(distance_matrix, logsig_labels_red, metric='precomputed')

true_labels = np.array([0] * 9 + [1] * 9 + [2] * 10 + [3] * 9 + [4] * 7)
motions = np.array(['Forward Jump']*9 + ['Run/Jog']*9 + ['Walk']*10 + ['Boxing']*9 + ['Climb Stairs']*7)
                    

# I want to map the labels to the true labels, s.t. it is easier to compare
mapping_rep = {3:0, 4:1, 2:2, 0:3, 1:4}
mapping_logsig = {3: 0, 4:1, 2:2, 0:3,1:4}
mapping_rep_red = {3:0, 4:1, 2:2,0:3,1:4}
mapping_logsig_red = {3:0,4:1,2:2,0:3,1:4}


rep_labels = np.array([mapping_rep[label] for label in rep_labels])
logsig_labels = np.array([mapping_logsig[label] for label in logsig_labels])
rep_labels_red = np.array([mapping_rep_red[label] for label in rep_labels_red])
logsig_labels_red = np.array([mapping_logsig_red[label] for label in logsig_labels_red])


df = pd.DataFrame({
    "Motion": motions, 
    "Reparam": rep_labels, 
    "LogSig": logsig_labels,
    "Reparam (Red)": rep_labels_red,
    "LogSig (Red)": logsig_labels_red,
})
df

df_sil = pd.DataFrame({
    "Method": ["Reparam", "LogSig", "Reparam (Red)", "LogSig (Red)"],
    "Silhouette Score": [sil_score_rep, sil_score_logsig, sil_score_rep_red, sil_score_logsig_red]
})

df_sil = df_sil.T

# latex = df_sil.to_latex(index=False, header=False)

# # Write the LaTeX table to a file
# with open('../../Report/figures/motion-capture-data/cluster-silhouette.tex', 'w') as f:
#     f.write(latex)


# with open('../../Report/figures/motion-capture-data/cluster.tex', 'w') as f:
#     f.write(df.to_latex(index=False))


# # Split the DataFrame into two parts
# df1 = df.iloc[:28]  # First 28 rows
# df2 = df.iloc[28:]  # Remaining rows

# # Save the first part
# with open('../../Report/figures/motion-capture-data/cluster_part1.tex', 'w') as f:
#     f.write(df1.to_latex(index=False))

# # Save the second part
# with open('../../Report/figures/motion-capture-data/cluster_part2.tex', 'w') as f:
#     f.write(df2.to_latex(index=False))

In [None]:
def inner_cluster_distance(distance_matrix: np.ndarray, true_labels: list) -> dict:
    # Initialize a dictionary to store distances for each cluster
    distances = {}
    
    n = len(true_labels)
    
    # Loop through the distance matrix
    for i in range(n):
        for j in range(i + 1, n):
            # Skip if the elements are not in the same cluster
            if true_labels[i] != true_labels[j]:
                continue

            # Add the distance to the appropriate list
            cluster = true_labels[i]
            if cluster not in distances:
                distances[cluster] = {"distances": []}
            
            distances[cluster]["distances"].append(distance_matrix[i][j])
        
    for cluster in distances:
        if len(distances[cluster]["distances"]) == 0:
            raise AssertionError(f"No distances were added to the {cluster} cluster")
        distances[cluster]["mean"] = np.mean(distances[cluster]["distances"])

    return distances

result = inner_cluster_distance(matrix, motions)
for cluster, data in result.items():
    print(f"Cluster {cluster}: {data['mean']:.2f}")

from sklearn.metrics import silhouette_score

def calculate_silhouette_score(distance_matrix: np.ndarray, true_labels: list) -> float:
    return silhouette_score(distance_matrix, true_labels, metric="precomputed")

from sklearn.manifold import MDS
from scipy.spatial.distance import cdist

# Step 1: Convert the distance matrix back to a feature space using MDS
def convert_to_feature_space(distance_matrix: np.ndarray) -> np.ndarray:
    n_components = distance_matrix.shape[0]  # Use as many dimensions as there are data points
    mds = MDS(n_components=n_components, dissimilarity='precomputed', random_state=42)
    feature_space = mds.fit_transform(distance_matrix)
    return feature_space

# Step 2: Calculate centroids of clusters
def calculate_centroids(data: np.ndarray, labels: np.ndarray) -> np.ndarray:
    unique_labels = np.unique(labels)
    centroids = np.array([data[labels == label].mean(axis=0) for label in unique_labels])
    return centroids

# Step 3: Calculate pairwise distances between centroids
def centroid_distances(centroids: np.ndarray) -> np.ndarray:
    return cdist(centroids, centroids, metric='euclidean')

# def outer_cluster_distance(dict_of_movements: dict, dict_of_distances: dict) -> float:
#     # Initialize a list to hold distances between movements of different types
#     distances = []

#     # Loop through the dictionary of distances
#     for key, distance in dict_of_distances.items():
#         # Check if the movements are of different types
#         if dict_of_movements[key[0]]["description"] != dict_of_movements[key[1]]["description"]:
#             # If they are, add the distance to the list of distances
#             distances.append(distance)

#     assert len(distances) > 0, f"No distances were added to the outer cluster"

#     # Calculate the average distance and return it
#     return np.mean(distances)

In [None]:
import pandas as pd

depth = 10
path = f'pickle_data/reparameterized_distances/distances_{depth}.pkl'
distances = load_data(path)
matrix, key_to_index = dictionary_to_matrix(distances, ordered_keys)

rep_labels = cluster_distances(matrix, number_of_clusters=5)
rep_silhouette_score_rep = calculate_silhouette_score(matrix, motions)

feature_space = convert_to_feature_space(matrix)
centroids = calculate_centroids(feature_space, true_labels)
centroid_dist_matrix = centroid_distances(centroids)
center_distance_rep = centroid_dist_matrix.mean(axis=0)

result = inner_cluster_distance(matrix, motions)
inner_mean_rep = [data['mean'] for _, data in result.items()]


level = 3
path = f'pickle_data/logsig_distances/distances_{level}.pkl'
distances = load_data(path)
matrix, key_to_index = dictionary_to_matrix(distances, ordered_keys)

logsig_labels = cluster_distances(matrix, number_of_clusters=5)
logsig_silhouette_score_sig = calculate_silhouette_score(matrix, motions)

feature_space = convert_to_feature_space(matrix)
centroids = calculate_centroids(feature_space, true_labels)
centroid_dist_matrix = centroid_distances(centroids)
center_distance_sig = centroid_dist_matrix.mean(axis=0)

result = inner_cluster_distance(matrix, motions)
inner_mean_sig = [data['mean'] for _, data in result.items()]


In [None]:
df = pd.DataFrame({'Movements' : np.unique(motions),
                     'Mean Inner Distance Reparameterized': inner_mean_rep,
                        'Mean Innter Distance Logsig': inner_mean_sig,
                        'Mean Center Distance Reparameterized': center_distance_rep,
                        'Mean Center Distance Logsig': center_distance_sig,
})

# with open('../../Report/figures/motion-capture-data/cluster-distances.tex', 'w') as f:
#     f.write(df.to_latex(index=False))

In [None]:
df = pd.DataFrame({'Reparameterized': [rep_silhouette_score_rep],
                     'Logsig': [logsig_silhouette_score_sig]})

# with open('../../Report/figures/motion-capture-data/silhouette-scores.tex', 'w') as f:
#     f.write(df.to_latex(index=False))

In [None]:
v1 = [1.2,0.5]
v2 = [-1,2.2]
       
v1 = v1 / np.linalg.norm(v1)
v2 = v2 / np.linalg.norm(v2)

print(1-np.dot(v1, v2))
