In [None]:
import os
import numpy as np
import pandas as pd
from scipy.spatial.distance import euclidean, cosine
import matplotlib.pyplot as plt

feature_folder = 'Target_video/Sword_output'  # Folder where CSVs are stored
video_file = 'Target_video/Sword/AHF_longsword_against_Rapier_and_Dagger_Fight_sword_f_cm_np2_ri_bad_1.avi'
m = 10  # Number of similar videos to return

def main(video_file, m, feature_folder, similarity_metric='euclidean'):
    histograms = load_histograms(feature_folder)
    target_video_name = os.path.splitext(os.path.basename(video_file))[0]     # Extract target video name
    print(f"Target video: {target_video_name}")
    target_histograms = {}     # Load histograms for each frame of the target video
    for i in range(1, 4):  # Since we have 3 histogram files per video, each file is of csv format storing the 16x12(histogram vectorsxno. of bins) format
        target_histogram_csv = os.path.join(feature_folder, f'{target_video_name}_output_histogram_frame_{i}.csv')
        if os.path.exists(target_histogram_csv):
            print(f"Loading: {target_histogram_csv}")
            target_histograms[i] = pd.read_csv(target_histogram_csv).iloc[0].values
        else:
            print(f"File not found: {target_histogram_csv}")
    
    if not target_histograms:
        raise FileNotFoundError("No histogram files found for target video")
    
    scores = compute_similarity(histograms, target_histograms, target_video_name)     # Compute similarity scores
    assert target_video_name not in scores, "Target video should not be included in the results."     # Ensure no self-comparison
    similar_videos = find_most_similar_videos(scores, m)      # Find most similar "m" videos. m needs to be defined in the top of the code
    # Print the names and scores of the most similar videos
    print(f"\nTop {m} most similar videos (Name: Score):")
    for i, (video_name, score) in enumerate(similar_videos, 1):
        print(f"{i}. {video_name}: {score:.4f}")
    visualize_results(similar_videos) #Give a plot as well for the similarity visuals


def load_histograms(folder_path):  # Load the histogram vectors from CSV files
    histograms = {}
    for file in os.listdir(folder_path):
        if file.endswith(".csv"):
            video_name = os.path.splitext(file)[0].split('_output_histogram_frame_')[0]
            csv_file = os.path.join(folder_path, file)
            df = pd.read_csv(csv_file)
            frame_number = int(file.split('_')[-1].split('.')[0])
            if video_name not in histograms:
                histograms[video_name] = {}
            histograms[video_name][frame_number] = df.iloc[0].values
    return histograms

def compute_similarity(histograms, target_histograms, target_video_name): # Compute the similarity scores
    scores = {}
    for video_name, frames in histograms.items():
        if video_name == target_video_name:
            continue                                      # Skip comparing with the same video. This resolves the issue where it was checking similarity against its own histograms
        total_score = 0
        num_frames = 0
        for frame_number, target_histogram in target_histograms.items():
            if frame_number in frames:
                score = euclidean(target_histogram, frames[frame_number])
                total_score += score
                num_frames += 1
        if num_frames > 0:
            average_score = total_score / num_frames
            scores[video_name] = average_score
    return scores

# Find most similar videos
def find_most_similar_videos(scores, m):
    sorted_scores = sorted(scores.items(), key=lambda x: x[1])
    return sorted_scores[:m]

# Visualize results
def visualize_results(similar_videos):
    names, scores = zip(*similar_videos)
    plt.figure(figsize=(12, 6))
    bars = plt.barh(names, scores, color='skyblue')
    plt.xlabel('Distance/Similarity Score')
    plt.title('Top Similar Videos')
    plt.gca().invert_yaxis()  # Highest scores on top
    
    # Add text labels with scores
    for i, (bar, score) in enumerate(zip(bars, scores)):
        plt.text(bar.get_width(), bar.get_y() + bar.get_height()/2, 
                 f'{score:.4f}', va='center', ha='left', fontweight='bold')
    
    plt.tight_layout()
    plt.show()

main(video_file, m, feature_folder)