In [1]:
import os
import pandas as pd
import numpy as np
import torch
import ast
from sklearn.preprocessing import StandardScaler, LabelEncoder
from scipy.spatial.distance import cityblock, chebyshev, minkowski, cosine, mahalanobis, euclidean
from tqdm import tqdm
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from local_utils import init_embeddings
from scipy.spatial.distance import euclidean
from scipy.spatial.distance import directed_hausdorff
from scipy.special import betainc
from scipy.integrate import quad
from scipy.optimize import minimize_scalar
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
from scipy.spatial.distance import euclidean, directed_hausdorff, minkowski

# Ensure the base directory exists
def create_directory(base_path):
    os.makedirs(base_path, exist_ok=True)

# Read midpoints and max distances from CSV
def read_csv(midpoints_csv_path):
    return pd.read_csv(midpoints_csv_path)

# Function to calculate Euclidean distance-based overlap
def euclidean_overlap(midpoint1, radius1, midpoint2, radius2):
    distance = euclidean(midpoint1, midpoint2)
    if distance < radius1 + radius2:
        return (1 - distance / (radius1 + radius2)) * 100
    else:
        return 0

# Function to calculate volume overlap
def volume_overlap(midpoint1, radius1, midpoint2, radius2):
    distance = euclidean(midpoint1, midpoint2)
    if distance >= radius1 + radius2:
        return 0
    if distance <= abs(radius1 - radius2):
        return 4/3 * np.pi * min(radius1, radius2)**3
    r = (radius1 + radius2 + distance) / 2
    h1 = 2 * np.sqrt(r * (r - radius1) * (r - radius2) * (r - distance)) / distance
    h2 = h1 / 2 * (radius1**2 - radius2**2) / distance
    return np.pi / 3 * (h1**2 + h2**2) * (3 * r - distance)

# Function to calculate overlap ratio
def overlap_ratio(midpoint1, radius1, midpoint2, radius2):
    intersection = volume_overlap(midpoint1, radius1, midpoint2, radius2)
    union = (4/3 * np.pi * radius1**3) + (4/3 * np.pi * radius2**3) - intersection
    return intersection / union

# Function to calculate Hausdorff distance
def hausdorff_distance(midpoint1, radius1, midpoint2, radius2):
    midpoint1 = np.reshape(midpoint1, (1, -1))  # Reshape to 2D array
    midpoint2 = np.reshape(midpoint2, (1, -1))  # Reshape to 2D array
    return max(directed_hausdorff(midpoint1, midpoint2)[0], directed_hausdorff(midpoint2, midpoint1)[0])

# Function to calculate Intersection-over-Union (IoU)
def intersection_over_union(midpoint1, radius1, midpoint2, radius2):
    intersection = volume_overlap(midpoint1, radius1, midpoint2, radius2)
    union = (4/3 * np.pi * radius1**3) + (4/3 * np.pi * radius2**3) - intersection
    return intersection / union



In [2]:
# Function to calculate all metrics and save to CSV
def calculate_all_metrics(midpoints_csv_path, overlap_output_base_path):
    create_directory(overlap_output_base_path)
    
    midpoints_df = read_csv(midpoints_csv_path)
    hyperspheres = []
    overlap_results = []
    overlap_filename = os.path.join(overlap_output_base_path, "Hypersphere_Overlap_All_Metrics.csv")

    for index, row in tqdm(midpoints_df.iterrows(), total=midpoints_df.shape[0], desc="Calculating Hyperspheres for Each Person"):
        person_index = row['person_index']
        class_num = row['class_num']
        class_name = row['class']
        midpoint_str = row['midpoint']
        midpoint = np.array([float(x.strip()) for x in midpoint_str.split(',')])
        max_distance = row['max_distance']
        
        hyperspheres.append({
            'person_index': person_index,
            'class_num': class_num,
            'class': class_name,
            'midpoint': midpoint,
            'radius': max_distance
        })

    for i in range(len(hyperspheres)):
        for j in range(i + 1, len(hyperspheres)):
            hypersphere_1 = hyperspheres[i]
            hypersphere_2 = hyperspheres[j]
            
            midpoint1 = hypersphere_1['midpoint']
            radius1 = hypersphere_1['radius']
            midpoint2 = hypersphere_2['midpoint']
            radius2 = hypersphere_2['radius']
            
            euclidean_overlap_value = euclidean_overlap(midpoint1, radius1, midpoint2, radius2)
            volume_overlap_value = volume_overlap(midpoint1, radius1, midpoint2, radius2)
            overlap_ratio_value = overlap_ratio(midpoint1, radius1, midpoint2, radius2)
            hausdorff_distance_value = hausdorff_distance(midpoint1, radius1, midpoint2, radius2)
            iou_value = intersection_over_union(midpoint1, radius1, midpoint2, radius2)
            
            overlap_results.append({
                'person_index_1': hypersphere_1['person_index'],
                'class_1': hypersphere_1['class'],
                'person_index_2': hypersphere_2['person_index'],
                'class_2': hypersphere_2['class'],
                'euclidean_overlap': euclidean_overlap_value,
                'volume_overlap': volume_overlap_value,
                'overlap_ratio': overlap_ratio_value,
                'hausdorff_distance': hausdorff_distance_value,
                'intersection_over_union': iou_value
            })

    overlap_df = pd.DataFrame(overlap_results)
    overlap_df.to_csv(overlap_filename, index=False)
    return overlap_df

In [3]:
# Example usage
midpoints_csv_path = "/home/nmichelotti/Desktop/Embeddings/embeddings_for_n8/00hypersphere_Comparison/results/Cosine/Cosine_Midpoints_Per_Person_With_Class_Names.csv"
overlap_output_base_path = "/home/nmichelotti/Desktop/Embeddings/embeddings_for_n8/00hypersphere_Comparison/results/Cosine"
overlap_df = calculate_all_metrics(midpoints_csv_path, overlap_output_base_path)



Calculating Hyperspheres for Each Person:   0%|          | 0/390 [00:00<?, ?it/s]

Calculating Hyperspheres for Each Person: 100%|██████████| 390/390 [00:00<00:00, 6612.15it/s]


In [1]:
import pandas as pd
import os

# Function to create ordered CSVs based on each overlap metric
def create_ordered_csvs(input_csv_path, output_dir):
    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)
    
    # Read the input CSV
    df = pd.read_csv(input_csv_path)
    
    # List of overlap metrics
    metrics = ['euclidean_overlap', 'volume_overlap', 'overlap_ratio', 'hausdorff_distance', 'intersection_over_union']
    
    # Create and save ordered CSVs for each metric
    for metric in metrics:
        ordered_df = df[['person_index_1', 'class_1', 'person_index_2', 'class_2', metric]].sort_values(by=metric, ascending=True)
        output_csv_path = os.path.join(output_dir, f"ordered_by_{metric}.csv")
        ordered_df.to_csv(output_csv_path, index=False)
        print(f"Saved ordered CSV by {metric} to {output_csv_path}")

# Example usage
input_csv_path = "/home/nmichelotti/Desktop/Embeddings/embeddings_for_n8/00hypersphere_Comparison/results/Cosine/Hypersphere_Overlap_All_Metrics.csv"
output_dir = "/home/nmichelotti/Desktop/Embeddings/embeddings_for_n8/00hypersphere_Comparison/results/Cosine/Ordered_Metrics"
create_ordered_csvs(input_csv_path, output_dir)


Saved ordered CSV by euclidean_overlap to /home/nmichelotti/Desktop/Embeddings/embeddings_for_n8/00hypersphere_Comparison/results/Cosine/Ordered_Metrics/ordered_by_euclidean_overlap.csv
Saved ordered CSV by volume_overlap to /home/nmichelotti/Desktop/Embeddings/embeddings_for_n8/00hypersphere_Comparison/results/Cosine/Ordered_Metrics/ordered_by_volume_overlap.csv
Saved ordered CSV by overlap_ratio to /home/nmichelotti/Desktop/Embeddings/embeddings_for_n8/00hypersphere_Comparison/results/Cosine/Ordered_Metrics/ordered_by_overlap_ratio.csv
Saved ordered CSV by hausdorff_distance to /home/nmichelotti/Desktop/Embeddings/embeddings_for_n8/00hypersphere_Comparison/results/Cosine/Ordered_Metrics/ordered_by_hausdorff_distance.csv
Saved ordered CSV by intersection_over_union to /home/nmichelotti/Desktop/Embeddings/embeddings_for_n8/00hypersphere_Comparison/results/Cosine/Ordered_Metrics/ordered_by_intersection_over_union.csv
