In [2]:
import os
import glob
import random
import numpy as np

# Define the directory path
data_dir = r'C:\Users\11581\Desktop\test_dataset\cjh'
target_dir = r'C:\Users\11581\Desktop\test_dataset_fakeMismatch\cjh'
# target_dir = data_dir
output_dir = r'D:\CodeSpace\PROJECT\UltraPrint\data\show_results'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Helper function to get pairs
def get_file_pairs(directory):
    files = os.listdir(directory)
    pairs = {}
    for f in files:
        if f.endswith('_v.npy'):
            base_name = f.replace('_v.npy', '')
            if base_name + '_fusion.npy' in files:
                pairs[base_name] = (os.path.join(directory, f), os.path.join(directory, base_name + '_fusion.npy'))
    return pairs

# 1. Select a random sample pair from data_dir
source_pairs = get_file_pairs(data_dir)
if not source_pairs:
    raise ValueError("No valid file pairs found in data_dir")

random_key = random.choice(list(source_pairs.keys()))
src_v_path, src_fusion_path = source_pairs[random_key]

# Load and sum the source pair
src_vec = np.load(src_v_path) + np.load(src_fusion_path)

# 2. Compare against all pairs in target_dir
target_pairs = get_file_pairs(target_dir)
results = []

for name, (tgt_v_path, tgt_fusion_path) in target_pairs.items():
    tgt_vec = np.load(tgt_v_path) + np.load(tgt_fusion_path)
    
    # Calculate similarity (Cosine Similarity)
    norm_src = np.linalg.norm(src_vec)
    norm_tgt = np.linalg.norm(tgt_vec)
    
    if norm_src == 0 or norm_tgt == 0:
        similarity = 0.0
    else:
        similarity = np.dot(src_vec, tgt_vec) / (norm_src * norm_tgt)
        
    results.append(f"{name}: {similarity}\n")

# 3. Save results
output_file = os.path.join(output_dir, f'similarity_results_{random_key}.txt')
with open(output_file, 'w') as f:
    f.writelines(results)

print(f"Comparison completed. Source: {random_key}. Results saved to {output_file}")

# Get all files in the directory

Comparison completed. Source: cjh_seg_0159_video. Results saved to D:\CodeSpace\PROJECT\UltraPrint\data\show_results\similarity_results_cjh_seg_0159_video.txt


In [5]:
# 1. Define the new root target directory
data_dir = r'C:\Users\11581\Desktop\test_dataset\cjh'
fake_mismatch_root = r'C:\Users\11581\Desktop\test_dataset_fakeMismatch'
output_dir = r'D:\CodeSpace\PROJECT\UltraPrint\data\show_results'

# 2. Select a valid source pair from data_dir again (or use the existing one if preferred)
# Since the request says "pick a sample pair", let's pick a fresh random one or reuse the logic.
source_pairs = get_file_pairs(data_dir)
if not source_pairs:
    raise ValueError("No valid file pairs found in data_dir")

random_key_mismatch = random.choice(list(source_pairs.keys()))
src_v_path_mismatch, src_fusion_path_mismatch = source_pairs[random_key_mismatch]

# Load and sum the source pair vectors
# src_vec_mismatch = np.load(src_v_path_mismatch) + np.load(src_fusion_path_mismatch)
src_vec_mismatch = np.load(src_fusion_path_mismatch)

norm_src_mismatch = np.linalg.norm(src_vec_mismatch)

# 3. Traverse all subdirectories in the fake mismatch root
mismatch_results = []

# Walk through the directory tree
for root, dirs, files in os.walk(fake_mismatch_root):
    # Get pairs in the current directory
    current_pairs = get_file_pairs(root)
    
    for name, (tgt_v_path, tgt_fusion_path) in current_pairs.items():
        # Load target vectors
        # tgt_vec = np.load(tgt_v_path) + np.load(tgt_fusion_path)
        tgt_vec = np.load(tgt_fusion_path)  
        
        # Calculate Cosine Similarity
        norm_tgt = np.linalg.norm(tgt_vec)
        
        if norm_src_mismatch == 0 or norm_tgt == 0:
            similarity = 0.0
        else:
            similarity = np.dot(src_vec_mismatch, tgt_vec) / (norm_src_mismatch * norm_tgt)
        
        # Store result as a tuple for sorting later: (similarity, formatted_string)
        # Using relative path for the name to distinguish files in different subfolders
        rel_path = os.path.relpath(os.path.dirname(tgt_v_path), fake_mismatch_root)
        full_name = os.path.join(rel_path, name)
        
        mismatch_results.append((similarity, f"{full_name}: {similarity}\n"))

# 4. Sort results by similarity in descending order
mismatch_results.sort(key=lambda x: x[0], reverse=True)

# Extract just the string part for writing
sorted_output_lines = [item[1] for item in mismatch_results]

# 5. Save sorted results
output_file_mismatch = os.path.join(output_dir, f'similarity_results_fakeMismatch_sorted_{random_key_mismatch}.txt')
with open(output_file_mismatch, 'w') as f:
    f.writelines(sorted_output_lines)

print(f"Fake mismatch comparison completed. Source: {random_key_mismatch}. Sorted results saved to {output_file_mismatch}")

Fake mismatch comparison completed. Source: cjh_seg_0227_video. Sorted results saved to D:\CodeSpace\PROJECT\UltraPrint\data\show_results\similarity_results_fakeMismatch_sorted_cjh_seg_0227_video.txt
