In [1]:
import sys
import os
import glob
import pandas as pd
import gensim.downloader as api
import numpy as np
import importlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.cross_decomposition import CCA
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import caption_to_embedding
from caption_to_embedding import transform_captions
from load_hdf5 import load_keypoints_from_hdf5


print(sys.path)
input_map = "/tank/tgn252/metadata_annotations/result"
video_names = os.listdir(input_map)

['/tank/tgn252/metadata_annotations/scripts', '/tank/tgn252/anaconda3/envs/metadata_stats/lib/python39.zip', '/tank/tgn252/anaconda3/envs/metadata_stats/lib/python3.9', '/tank/tgn252/anaconda3/envs/metadata_stats/lib/python3.9/lib-dynload', '', '/tank/tgn252/.local/lib/python3.9/site-packages', '/tank/tgn252/anaconda3/envs/metadata_stats/lib/python3.9/site-packages', '/tank/tgn252/metadata_annotations/src']


In [2]:
def get_dirs_with_subdir(dirs, subdir_name):
    """
    Returns a dictionary where the keys are directory names and the values are True if the directory contains a subdirectory with the given name, and False otherwise.

    Args:
        dirs (list): List of directory paths.
        subdir_name (str): Name of the subdirectory to check for.

    Returns:
        dict: Dictionary where the keys are directory names and the values are True if the directory contains a subdirectory with the given name, and False otherwise.
    """
    dirs_with_subdir = {}
    for d in dirs:
        subdirs = os.listdir(d)
        dirs_with_subdir[d] = subdir_name in subdirs
    return dirs_with_subdir

In [8]:
full_paths = [os.path.join(input_map, video_name) for video_name in video_names]
maps = get_dirs_with_subdir(full_paths, 'action_classification')
true_dirs = [keys for keys, values in maps.items() if values]
print(len(true_dirs))
print(len(maps.keys()))
print(true_dirs)

970
1137
['/tank/tgn252/metadata_annotations/result/R_S_yt--jzScB82vKI_212.mp4', '/tank/tgn252/metadata_annotations/result/R_S_wc-hmW1OnCPF_xn_37.mp4', '/tank/tgn252/metadata_annotations/result/R_S_yt-r5kRl-2lVxc_2.mp4', '/tank/tgn252/metadata_annotations/result/S_yt-3V8US8ZD3NU_229.mp4', '/tank/tgn252/metadata_annotations/result/S_yt-tMP5WhHMtWE_55.mp4', '/tank/tgn252/metadata_annotations/result/R_S_yt-y3tyx6cZjmA_412.mp4', '/tank/tgn252/metadata_annotations/result/S_production_ID_5102663.mp4', '/tank/tgn252/metadata_annotations/result/S_yt-oPeZEUtqKNM_27.mp4', '/tank/tgn252/metadata_annotations/result/R_S_yt-STL3ejY9u_Q_73.mp4', '/tank/tgn252/metadata_annotations/result/R_S_wc-3IayTPy_A3nF_17.mp4', '/tank/tgn252/metadata_annotations/result/R_S_3dzza09VbXY_235.mp4', '/tank/tgn252/metadata_annotations/result/S_ILSVRC2015_train_00042002.mp4', '/tank/tgn252/metadata_annotations/result/R_S_yt-lCb_efvJcdE_5.mp4', '/tank/tgn252/metadata_annotations/result/S_yt-un00Oks5uVc_7.mp4', '/tank/tgn

In [5]:
def find_files(video_name, dir_name, file_type, base_path="/tank/tgn252/metadata_annotations/result"):
    """
    Find all files of a given type in a specified directory (and its subdirectories) for a given video.

    Args:
        video_name (str): The name of the video.
        dir_name (str): The name of the directory to search.
        file_type (str): The type of the files to find (e.g., "csv" or "hdf5").
        base_path (str): The base path where the video directories are located.

    Returns:
        files (list): A list of paths to the files.
    """
    search_path = os.path.join(base_path, video_name, dir_name)
    files = glob.glob(os.path.join(search_path, f"**/*.{file_type}"), recursive=True)
    return files

In [6]:
keypoints_map = [
        "nose", "left_eye", "right_eye", "left_ear", "right_ear",
        "left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
        "left_wrist", "right_wrist", "left_hip", "right_hip",
        "left_knee", "right_knee", "left_ankle", "right_ankle",
    ]

In [9]:
data = {}
no_hdf5_data = {}

for video_name in true_dirs:
    video_data = {}
    # Load depth map from HDF5 file
    hdf5_files = find_files(video_name, 'keypoints', 'hdf5')
    if hdf5_files:
        keypoints_map = load_keypoints_from_hdf5(hdf5_files[0])
        video_data["keypoints"] = keypoints_map

    # Find CSV file in "captioning" subdirectory
    action_csv = find_files(video_name, 'action_classification', 'csv')
    if action_csv:
        actions_df = pd.read_csv(action_csv[0]) 
        # Extract the first set of predicted classes and their corresponding values
        pred_classes = actions_df["pred_class"].iloc[0]
        pred_values = actions_df["pred_value"].iloc[0]
        video_data["pred_classes"] = pred_classes
        video_data["pred_values"] = pred_values    
    else:
        print(f"No CSV file found in action_class directory for video {video_name}")
        continue  # Skip to the next video if no caption is found

    # Only add the video to the dictionary if it has both a depth map and a caption
    if "keypoints" in video_data and "pred_classes" in video_data:
        data[video_name] = video_data
    else:
        no_hdf5_data[video_name] = video_data

print(len(data.keys()))
print(len(no_hdf5_data))

In [None]:
model = api.load("glove-wiki-gigaword-100")
embedding_vector_size = 100


In [None]:
keypoints_present = {}

for video in data:
    keypoints_counter = [0]*17  # Initialize a counter for each keypoint
    for frame in data[video]['keypoints']:
        if frame['keypoints_scores'].size > 0:
            # Check if each keypoint_score is greater than or equal to 5
            for person_scores in frame['keypoints_scores']:
                for i, score in enumerate(person_scores):
                    if score >= 5:
                        keypoints_counter[i] += 1
    # Determine if each keypoint is present (occurs more than 50 frames)
    keypoints_present[video] = [1 if count > 50 else 0 for count in keypoints_counter]

print(keypoints_present)

In [None]:
# Initialize lists to store the embeddings, keypoints presence information, and video names
embeddings_list = []
keypoints_list = []
video_names = []

for video in data:
    # Add the embeddings for the video to the embeddings list
    embeddings_list.append(data[video]["embeddings"])
    # Add the keypoints presence information for the video to the keypoints list
    keypoints_list.append(keypoints_present[video])
    # Add the video name to the video names list
    video_names.append(video)

# Convert the lists to numpy arrays
embeddings_matrix = np.array(embeddings_list)
keypoints_matrix = np.array(keypoints_list)

In [None]:
mean_kp = np.mean(keypoints_matrix)
std_kp = np.std(keypoints_matrix)

print(f"Mean of keypoints: {mean_kp}")
print(f"Standard deviation of keypoints: {std_kp}")

In [None]:
# Calculate the mean and standard deviation of the embeddings
mean_embeddings = np.mean(embeddings_matrix)
std_embeddings = np.std(embeddings_matrix)

print(f"Mean of embeddings: {mean_embeddings}")
print(f"Standard deviation of embeddings: {std_embeddings}")

In [None]:
print(embeddings_matrix.shape, keypoints_matrix.shape)

In [None]:
import numpy as np
from sklearn.cross_decomposition import CCA

# Assuming 'X' is your embeddings matrix and 'Y' is your keypoints matrix
cca = CCA(n_components=min(embeddings_matrix.shape[1], keypoints_matrix.shape[1]))
cca.fit(embeddings_matrix, keypoints_matrix)

# Transforming the data based on the CCA fit
X_c, Y_c = cca.transform(embeddings_matrix, keypoints_matrix)


In [None]:
X = embeddings_matrix
Y = keypoints_matrix

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cross_decomposition import CCA

# Assuming 'X' is your embeddings matrix and 'Y' is your keypoints matrix
cca = CCA(n_components=min(X.shape[1], Y.shape[1]))
cca.fit(X, Y)

# Transform the datasets
X_c, Y_c = cca.transform(X, Y)

# Display canonical correlations
canonical_correlations = np.corrcoef(X_c.T, Y_c.T).diagonal(offset=X_c.shape[1])


# Loadings: Correlation of original variables with canonical variables
loadings_X = np.corrcoef(X.T, X_c.T)[:X.shape[1], X.shape[1]:]
loadings_Y = np.corrcoef(Y.T, Y_c.T)[:Y.shape[1], Y.shape[1]:]

# Visualize the first pair of canonical variables
plt.scatter(X_c[:, 0], Y_c[:, 0])
plt.xlabel('First Canonical Variable from Embeddings')
plt.ylabel('First Canonical Variable from Keypoints')
plt.title('Canonical Correlation Analysis - First Pair of Canonical Variables')
plt.show()

fig, axs = plt.subplots(2, 1, figsize=(8, 10))

# Plot heatmap of loadings for embeddings
axs[0].imshow(loadings_X, cmap='coolwarm', aspect='auto')
axs[0].set_xticks(range(loadings_X.shape[1]))
axs[0].set_yticks(range(loadings_X.shape[0]))
axs[0].set_xticklabels(range(1, loadings_X.shape[1]+1))
axs[0].set_yticklabels(range(1, loadings_X.shape[0]+1))
axs[0].set_xlabel('Canonical Variables')
axs[0].set_ylabel('Embeddings Variables')
axs[0].set_title('Loadings for Embeddings')

# Plot heatmap of loadings for keypoints
axs[1].imshow(loadings_Y, cmap='coolwarm', aspect='auto')
axs[1].set_xticks(range(loadings_Y.shape[1]))
axs[1].set_yticks(range(loadings_Y.shape[0]))
axs[1].set_xticklabels(range(1, loadings_Y.shape[1]+1))
axs[1].set_yticklabels(range(1, loadings_Y.shape[0]+1))
axs[1].set_xlabel('Canonical Variables')
axs[1].set_ylabel('Keypoints Variables')
axs[1].set_title('Loadings for Keypoints')

plt.tight_layout()
plt.show()
fig, axs = plt.subplots(3, 1, figsize=(8, 12))

for i in range(3):
    axs[i].scatter(X_c[:, i], Y_c[:, i])
    axs[i].set_xlabel(f'Canonical Variable {i+1} from Embeddings')
    axs[i].set_ylabel(f'Canonical Variable {i+1} from Keypoints')
    axs[i].set_title(f'Canonical Correlation Analysis - Pair {i+1} of Canonical Variables')

plt.tight_layout()
plt.show()





In [None]:
from sklearn.cluster import KMeans
combined_canonical_vars = np.concatenate((X_c, Y_c), axis=1)
k = 5  # or another number based on your analysis
kmeans = KMeans(n_clusters=k, random_state=0).fit(combined_canonical_vars)
labels = kmeans.labels_

In [None]:
import matplotlib.pyplot as plt

plt.scatter(X_c[:, 0], X_c[:, 1], c=labels, cmap='viridis')  # Using X_c for plotting
plt.xlabel('First Canonical Variable of Actions')
plt.ylabel('Second Canonical Variable of Actions')
plt.title('Cluster Diagram of Actions and Keypoints')
plt.colorbar(label='Cluster Label')
plt.show()

In [None]:
# Assume 'labels' contains the cluster labels for each video
clustered_data = {
    'Video Name': video_names,
    'Cluster Label': labels
}

# Convert to DataFrame
clustered_df = pd.DataFrame(clustered_data)


In [None]:
# Convert the original embeddings and keypoints data to DataFrames
embeddings_df = pd.DataFrame(embeddings_matrix, columns=[f'Embedding_{i}' for i in range(embeddings_matrix.shape[1])])
keypoints_df = pd.DataFrame(keypoints_matrix, columns=[f'Keypoint_{i}' for i in range(keypoints_matrix.shape[1])])

# Add video names to these DataFrames for merging
embeddings_df['Video Name'] = video_names
keypoints_df['Video Name'] = video_names

# Merge all data into a single DataFrame
full_data_df = clustered_df.merge(embeddings_df, on='Video Name').merge(keypoints_df, on='Video Name')


In [None]:
for i in range(k):  # Assuming 'k' is the number of clusters
    cluster_i_data = full_data_df[full_data_df['Cluster Label'] == i]
    
    # Calculate the mean of embeddings for the cluster
    mean_embeddings = cluster_i_data.loc[:, 'Embedding_0':'Embedding_99'].mean()
    
    # Calculate the mean of keypoints presence for the cluster
    mean_keypoints = cluster_i_data.loc[:, 'Keypoint_0':'Keypoint_16'].mean()
    
    print(f"Cluster {i} average embeddings:\n{mean_embeddings}")
    print(f"Cluster {i} average keypoints presence:\n{mean_keypoints}")


In [None]:
import seaborn as sns

# Visualize the mean embeddings for each cluster
for i in range(k):
    plt.figure(figsize=(10, 6))
    mean_embeddings = full_data_df[full_data_df['Cluster Label'] == i].loc[:, 'Embedding_0':'Embedding_99'].mean()
    sns.barplot(x=mean_embeddings.index, y=mean_embeddings.values)
    plt.title(f'Cluster {i} Average Embeddings')
    plt.xticks(rotation=45)
    plt.show()

    # Visualize the mean keypoints presence for each cluster
    plt.figure(figsize=(10, 6))
    mean_keypoints = full_data_df[full_data_df['Cluster Label'] == i].loc[:, 'Keypoint_0':'Keypoint_16'].mean()
    sns.barplot(x=mean_keypoints.index, y=mean_keypoints.values)
    plt.title(f'Cluster {i} Average Keypoints Presence')
    plt.xticks(rotation=45)
    plt.show()


In [None]:
from sklearn.cluster import KMeans

# Assuming combined_canonical_vars is your combined CCA result
k = 5  # The number of clusters
kmeans = KMeans(n_clusters=k, random_state=0).fit(combined_canonical_vars)


In [None]:
# Recalculate centroids if needed
kmeans_embeddings = KMeans(n_clusters=k, random_state=0).fit(embeddings_matrix)
centroids_embeddings = kmeans_embeddings.cluster_centers_

# Find the closest words for each centroid
closest_words_per_cluster = {}
for i, centroid in enumerate(centroids_embeddings):
    if centroid.shape[0] == 100:  # Ensure centroid is 100-dimensional
        closest_words = model.similar_by_vector(centroid, topn=5)
        closest_words_per_cluster[i] = [word for word, _ in closest_words]
    else:
        print(f"Centroid {i} is not in the correct dimensional space.")


In [None]:
print(closest_words_per_cluster)

In [None]:
top_keypoints_per_cluster = {}

for i in range(k):  # 'k' is the number of clusters
    # Filter the DataFrame for the current cluster
    cluster_data = full_data_df[full_data_df['Cluster Label'] == i]
    
    # Calculate the mean for keypoints in this cluster
    mean_keypoints = cluster_data.loc[:, 'Keypoint_0':'Keypoint_16'].mean()

    # Sort the keypoints by their mean and get the top 5
    top_keypoints = mean_keypoints.sort_values(ascending=False).head(8)
    top_keypoints_per_cluster[i] = top_keypoints


In [None]:
keypoints_map = [
    "nose", "left_eye", "right_eye", "left_ear", "right_ear",
    "left_shoulder", "right_shoulder", "left_elbow", "right_elbow",
    "left_wrist", "right_wrist", "left_hip", "right_hip",
    "left_knee", "right_knee", "left_ankle", "right_ankle"]

for cluster, keypoints in top_keypoints_per_cluster.items():
    if cluster == 0:
        print(f"Cluster {cluster} top keypoints:")
        mapped_keypoints = keypoints.rename(index=lambda x: keypoints_map[int(x.split('_')[1])])
        print(mapped_keypoints)
        print()

In [None]:
cluster_counts = full_data_df['Cluster Label'].value_counts()
print(cluster_counts)


In [None]:
videos_in_clusters = {}

for i in range(k):  # 'k' is the number of clusters
    # Get the video names for the current cluster
    videos = full_data_df[full_data_df['Cluster Label'] == i]['Video Name'].tolist()
    videos_in_clusters[i] = videos

In [None]:
for cluster, videos in videos_in_clusters.items():
    print(f"Cluster {cluster} videos:")
    print(videos)
    print()

In [None]:
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Assuming embeddings is a (n_videos, 100) matrix and keypoints is a (n_videos, 17) matrix

# Concatenate the embeddings and keypoints along the second axis (columns)
features = np.concatenate((embeddings_matrix, keypoints_matrix), axis=1)

# Use the elbow method to find the optimal number of clusters
distortions = []
K = range(1,20)
for k in K:
    kmeanModel = KMeans(n_clusters=k)
    kmeanModel.fit(features)
    distortions.append(kmeanModel.inertia_)

plt.figure(figsize=(20,8))
plt.plot(K, distortions, 'bx-')
plt.xlabel('k')
plt.ylabel('Distortion')
plt.title('The Elbow Method showing the optimal k')
plt.show()

In [None]:
from sklearn.metrics import silhouette_score

silhouette_scores = []
for i in range(2, 20):  # Silhouette score is not defined for k=1
    kmeans = KMeans(n_clusters=i, random_state=0).fit(features)
    score = silhouette_score((features), kmeans.labels_)
    silhouette_scores.append(score)

plt.plot(range(2, 20), silhouette_scores)
plt.title('Silhouette Score Method')
plt.xlabel('Number of clusters')
plt.ylabel('Silhouette Score')
plt.show()


In [None]:
from sklearn.metrics import davies_bouldin_score

db_scores = []
for i in range(2, 20):
    kmeans = KMeans(n_clusters=i, random_state=0).fit(features)
    score = davies_bouldin_score(features, kmeans.labels_)
    db_scores.append(score)

# Plot Davies-Bouldin Scores, aiming for a lower score
plt.plot(range(2, 20), db_scores)
plt.title('Davies-Bouldin Index')
plt.xlabel('Number of clusters')
plt.ylabel('DB Index')
plt.show()


In [None]:
from sklearn.cluster import DBSCAN

# Assuming features is a (n_videos, 117) matrix that is the concatenation of embeddings and keypoints
print(features.shape)
# Apply DBSCAN
dbscan = DBSCAN(eps=5, min_samples=3)
clusters = dbscan.fit_predict(features)

# clusters is an array where each element is the cluster number of the corresponding video.
# -1 indicates that the video is an outlier and not part of any cluster.
print(clusters)

In [None]:
from sklearn.cluster import KMeans
combined_canonical_vars = np.concatenate((X_c, Y_c), axis=1)
k = 14  # or another number based on your analysis
kmeans = KMeans(n_clusters=k, random_state=0).fit(combined_canonical_vars)
labels = kmeans.labels_

import matplotlib.pyplot as plt

plt.scatter(X_c[:, 0], X_c[:, 1], c=labels, cmap='viridis')  # Using X_c for plotting
plt.xlabel('First Canonical Variable of Actions')
plt.ylabel('Second Canonical Variable of Actions')
plt.title('Cluster Diagram of Actions and Keypoints')
plt.colorbar(label='Cluster Label')
plt.show()
# Assume 'labels' contains the cluster labels for each video
clustered_data = {
    'Video Name': video_names,
    'Cluster Label': labels
}

# Convert to DataFrame
clustered_df = pd.DataFrame(clustered_data)

# Convert the original embeddings and keypoints data to DataFrames
embeddings_df = pd.DataFrame(embeddings_matrix, columns=[f'Embedding_{i}' for i in range(embeddings_matrix.shape[1])])
keypoints_df = pd.DataFrame(keypoints_matrix, columns=[f'Keypoint_{i}' for i in range(keypoints_matrix.shape[1])])

# Add video names to these DataFrames for merging
embeddings_df['Video Name'] = video_names
keypoints_df['Video Name'] = video_names

# Merge all data into a single DataFrame
full_data_df = clustered_df.merge(embeddings_df, on='Video Name').merge(keypoints_df, on='Video Name')

for i in range(k):  # Assuming 'k' is the number of clusters
    cluster_i_data = full_data_df[full_data_df['Cluster Label'] == i]
    
    # Calculate the mean of embeddings for the cluster
    mean_embeddings = cluster_i_data.loc[:, 'Embedding_0':'Embedding_99'].mean()
    
    # Calculate the mean of keypoints presence for the cluster
    mean_keypoints = cluster_i_data.loc[:, 'Keypoint_0':'Keypoint_16'].mean()
    
    print(f"Cluster {i} average embeddings:\n{mean_embeddings}")
    print(f"Cluster {i} average keypoints presence:\n{mean_keypoints}")


import seaborn as sns

# Visualize the mean embeddings for each cluster
for i in range(k):
    plt.figure(figsize=(10, 6))
    mean_embeddings = full_data_df[full_data_df['Cluster Label'] == i].loc[:, 'Embedding_0':'Embedding_99'].mean()
    sns.barplot(x=mean_embeddings.index, y=mean_embeddings.values)
    plt.title(f'Cluster {i} Average Embeddings')
    plt.xticks(rotation=45)
    plt.show()

    # Visualize the mean keypoints presence for each cluster
    plt.figure(figsize=(10, 6))
    mean_keypoints = full_data_df[full_data_df['Cluster Label'] == i].loc[:, 'Keypoint_0':'Keypoint_16'].mean()
    sns.barplot(x=mean_keypoints.index, y=mean_keypoints.values)
    plt.title(f'Cluster {i} Average Keypoints Presence')
    plt.xticks(rotation=45)
    plt.show()
# Recalculate centroids if needed
kmeans_embeddings = KMeans(n_clusters=k, random_state=0).fit(embeddings_matrix)
centroids_embeddings = kmeans_embeddings.cluster_centers_

# Find the closest words for each centroid
closest_words_per_cluster = {}
for i, centroid in enumerate(centroids_embeddings):
    if centroid.shape[0] == 100:  # Ensure centroid is 100-dimensional
        closest_words = model.similar_by_vector(centroid, topn=5)
        closest_words_per_cluster[i] = [word for word, _ in closest_words]
    else:
        print(f"Centroid {i} is not in the correct dimensional space.")

print(closest_words_per_cluster)

In [None]:
# Assuming keypoints_map is a list of keypoint names
keypoints_map = ['nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder', 'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist', 'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle']

negative_scores = {}

for frame_number, frame in enumerate(single_sample_low_keypoints):
    # Get the indices of keypoints_scores that are less than 0
    person_indices, keypoint_indices = np.where(frame['keypoints_scores'] < 0)
    # For each person index, get the corresponding keypoint indices and add them to the dictionary
    for person_index in set(person_indices):
        if frame_number not in negative_scores:
            negative_scores[frame_number] = []
        # Map the keypoint indices to keypoint names
        negative_keypoints = [keypoints_map[index] for index in keypoint_indices[person_indices == person_index]]
        negative_scores[frame_number].append(('person ' + str(person_index), negative_keypoints))

print(negative_scores)

In [None]:
import numpy as np

# Dictionary to store the results
results = {}

for video_name in videos_with_consistent_negative_values:
# Iterate over the videos with consistent negative values
    video_data = data[video_name]
    for frame in video_data['keypoints']:
        # Initialize a dictionary to store the indices of keypoints with negative values for each person
        negative_keypoints = {i: set(range(17)) for i in range(frame['keypoints_scores'].shape[0])}
        # Get the indices of keypoints with non-negative values for each person in the current frame
        non_negative_keypoints = np.where(frame['keypoints_scores'] >= 0)
        # Remove these indices from negative_keypoints
        for person_index, keypoint_index in zip(*non_negative_keypoints):
            negative_keypoints[person_index] -= {keypoint_index}
        # If negative_keypoints for all persons are empty, there are no keypoints with negative values in all frames, so break the loop
        if all(not keypoints for keypoints in negative_keypoints.values()):
            break
    # If negative_keypoints for any person is not empty after checking all frames, add the video name and the negative keypoints to the dictionary
    if any(keypoints for keypoints in negative_keypoints.values()):
        results[video_name] = {person: keypoints for person, keypoints in negative_keypoints.items() if keypoints}

print(results)

In [None]:
videos_with_three_consecutive_empty_frames = []

for video_name, values in data.items():
    empty_frame_count = 0
    for frame in values['keypoints']:
        # Check if there are no persons detected in the current frame
        if len(frame['boxes']) == 0:
            empty_frame_count += 1
            # If there are three consecutive frames with no persons, break the loop and move on to the next video
            if empty_frame_count == 3:
                videos_with_three_consecutive_empty_frames.append(video_name)
                break
        else:
            empty_frame_count = 0

print(videos_with_three_consecutive_empty_frames)
len(videos_with_three_consecutive_empty_frames)

In [None]:
for video in videos_with_three_consecutive_empty_frames:
    all_scores = []
    present_scores = []
    empty_frames = 0
    for frame in data[video]['keypoints']:
        if np.array(frame['keypoints_scores']).size > 0:
            # Flatten the keypoints_scores and extend both all_scores and present_scores
            flattened_scores = np.array(frame['keypoints_scores']).flatten()
            all_scores.extend(flattened_scores)
            present_scores.extend(flattened_scores)
        else:
            # Add a penalty of -2 for each keypoint when keypoints_scores is empty
            all_scores.extend([-5]*17)
            empty_frames += 1
    avg_all_scores = np.mean(all_scores)
    avg_present_scores = np.mean(present_scores) if present_scores else "No keypoints_scores present"
    print(f"Video {video} - Average keypoints_scores (penalizing): {avg_all_scores}, Average keypoints_scores (present): {avg_present_scores}, Frames without predictions: {empty_frames}")

In [None]:
for video in videos_with_three_consecutive_empty_frames:
    all_scores = []
    for frame in data[video]['keypoints']:
        if np.array(frame['keypoints_scores']).size > 0:
            all_scores.extend(np.array(frame['keypoints_scores']).flatten())
    if all_scores:  # Check if all_scores is not empty
        print(f"Average keypoints_scores for video {video}: {np.mean(all_scores)}")
    else:
        print(f"No keypoints_scores present for video {video}")

In [None]:
# Assuming 'data' is a dictionary where the keys are video names
all_videos = set(data.keys())

# Convert videos_with_three_consecutive_empty_frames to a set for faster lookup
videos_with_three_consecutive_empty_frames_set = set(videos_with_three_consecutive_empty_frames)

# Find the difference between the two sets to get the videos that do not fall into videos_with_three_consecutive_empty_frames
videos_not_in_list = list(all_videos - videos_with_three_consecutive_empty_frames_set)

print(videos_not_in_list)

In [None]:
test_vid = data["/tank/tgn252/metadata_annotations/result/R_S_yt-ZtS7ZrJ_rtk_525.mp4"]["keypoints"]
for frame in test_vid:
    print(frame['keypoints_scores']) if frame['scores'].size > 5 else None

In [None]:
print(len(videos_not_in_list))
for video in videos_not_in_list:
    all_scores = []
    for frame in data[video]['keypoints']:
        all_scores.extend(frame['keypoints_scores'])
    print(f"Average keypoints_scores for video {video}: {np.mean(all_scores)}")

In [None]:
for video in videos_with_three_consecutive_empty_frames:
    all_scores = []
    for frame in data[video]['keypoints']:
        if np.array(frame['keypoints_scores']).size > 0:
            all_scores.extend(np.array(frame['keypoints_scores']).flatten())
        else:
            all_scores.append(-2)
    print(f"Average keypoints_scores for video {video}: {np.mean(all_scores)}")

In [None]:
# Assuming keypoints_map is a list of keypoint names
keypoints_map = ['nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder', 'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist', 'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle']

positive_scores = {}

for frame_number, frame in enumerate(single_sample_low_keypoints):
    # Get the indices of keypoints_scores that are higher than 0
    person_indices, keypoint_indices = np.where(frame['keypoints_scores'] > 1)
    # Map the keypoint indices to keypoint names and find the highest score for each keypoint
    positive_keypoints = {}
    for index in set(keypoint_indices):
        keypoint_name = keypoints_map[index]
        # Find the highest score for this keypoint
        highest_score = np.max(frame['keypoints_scores'][person_indices[keypoint_indices == index], index])
        positive_keypoints[keypoint_name] = highest_score
    positive_scores[frame_number] = positive_keypoints

print(positive_scores)