# 1. Extract Relevant Data 

* From the JSON file, image-name needs to be extarcted and asssocicated classes 
* From the PyTorch similarity results, extract the image names and their highest similarity score along with the associated text prompt 

# 2. Create Mapping 

* Map the image_name from both datasets
* Pair the classes (from teh JSON) with the most rleveant text prompt and ist similarity score (from PyTorch)

# 3. Compare Results 

* For each image_name, check if the classes from the JSON file align with the highest similarity text prompt from PyTorch
* Record matches and mismatches for further analysis


In [3]:
text_prompts = [
    "a photo of a gun", "a photo of a shooting", "a photo of a weapon",
    "a photo of a killing", "a photo of a bomb", "a photo of violence",
    
    # Focus auf waffen 
    "a photo of other", "a photo of an animal", "a photo of a family",
    "a photo of a meeting", "a photo of a tool", "a photo of a gathering"
]

In [4]:
import json
import torch

# Step 1: Load JSON and PyTorch results
# Replace with your file paths
json_file_path = "weapon_classification_results.json"
torch_file_path = "sim_violence_test.torch"

# Load JSON results
with open(json_file_path, "r") as f:
    weapon_results = json.load(f)

# Load PyTorch similarity results
torch_results = torch.load(torch_file_path, map_location="cpu")

# Step 2: Prepare a mapping from JSON
json_mapping = {
    entry["image_name"]: entry["classes"]
    for entry in weapon_results["results"]
}

# Step 3: Compare with PyTorch results
comparison_results = []

for image_path, similarity_scores in torch_results.items():
    # Extract image name from the path (assuming it's the filename)
    image_name = image_path.split("/")[-1]
    
    # Get the highest similarity score and corresponding text prompt
    max_score_idx = similarity_scores.argmax()
    max_score = similarity_scores[max_score_idx]
    related_text = text_prompts[max_score_idx]

    # Check if the image is in the JSON results
    if image_name in json_mapping:
        detected_classes = json_mapping[image_name]
        comparison_results.append({
            "image_name": image_name,
            "detected_classes": detected_classes,
            "torch_related_text": related_text,
            "torch_similarity_score": max_score,
            "match": any(cls.lower() in related_text.lower() for cls in detected_classes)
        })
    else:
        comparison_results.append({
            "image_name": image_name,
            "detected_classes": None,
            "torch_related_text": related_text,
            "torch_similarity_score": max_score,
            "match": False
        })

# Step 4: Output Results
for result in comparison_results:
    print(f"Image: {result['image_name']}")
    print(f"  Detected Classes (JSON): {result['detected_classes']}")
    print(f"  Torch Related Text: {result['torch_related_text']} (Score: {result['torch_similarity_score']:.3f})")
    print(f"  Match: {'Yes' if result['match'] else 'No'}\n")


IndexError: index 10 is out of bounds for axis 0 with size 1

# Get Top Ten images 

In [5]:
# Extract top 10 images with highest similarity scores
top_images = []

for image_path, similarity_scores in torch_results.items():
    # Get the highest similarity score for this image
    max_score = similarity_scores.max()
    top_images.append((image_path, max_score))

# Sort by similarity score in descending order
top_images = sorted(top_images, key=lambda x: x[1], reverse=True)

# Get the top 10 images
top_10_images = top_images[:10]

# Display the results
print("Top 10 Images with Highest Similarity Scores:")
for rank, (image_path, score) in enumerate(top_10_images, start=1):
    print(f"{rank}. Image: {image_path}, Max Similarity Score: {score:.3f}")



# werte im range 

Top 10 Images with Highest Similarity Scores:
1. Image: /Users/apple/Documents/HIWI_Katharina/Hiwi_Katarina/violence_detection/Katharina_pytorch_Comparison/images_test/id_1080234390649741312_2019-01-01.jpg, Max Similarity Score: 0.205
2. Image: /Users/apple/Documents/HIWI_Katharina/Hiwi_Katarina/violence_detection/Katharina_pytorch_Comparison/images_test/id_1081146394398863363_2019-01-04.jpg, Max Similarity Score: 0.197
3. Image: /Users/apple/Documents/HIWI_Katharina/Hiwi_Katarina/violence_detection/Katharina_pytorch_Comparison/images_test/id_1080638380466204672_2019-01-03.jpg, Max Similarity Score: 0.195
4. Image: /Users/apple/Documents/HIWI_Katharina/Hiwi_Katarina/violence_detection/Katharina_pytorch_Comparison/images_test/id_1080332859456610304_2019-01-02.jpg, Max Similarity Score: 0.193
5. Image: /Users/apple/Documents/HIWI_Katharina/Hiwi_Katarina/violence_detection/Katharina_pytorch_Comparison/images_test/id_1080660544250470400_2019-01-03.jpg, Max Similarity Score: 0.193
6. Image:

In [6]:
# Extract top 10 images with highest similarity scores and their related prompts
top_images_with_prompts = []

for image_path, similarity_scores in torch_results.items():
    max_score_idx = similarity_scores.argmax()
    max_score = similarity_scores[max_score_idx]
    related_text = text_prompts[max_score_idx]
    top_images_with_prompts.append((image_path, max_score, related_text))

# Sort by similarity score in descending order
top_images_with_prompts = sorted(top_images_with_prompts, key=lambda x: x[1], reverse=True)

# Get the top 10 images
top_10_images_with_prompts = top_images_with_prompts[:10]

# Display the results
print("Top 10 Images with Highest Similarity Scores and Related Prompts:")
for rank, (image_path, score, prompt) in enumerate(top_10_images_with_prompts, start=1):
    print(f"{rank}. Image: {image_path}, Max Similarity Score: {score:.3f}, Related Prompt: {prompt}")


IndexError: index 10 is out of bounds for axis 0 with size 1

In [7]:
# Define category indices based on text prompts
category_indices = {
    "gun": text_prompts.index("a photo of a gun"),
    "grenade": text_prompts.index("a photo of a grenade"),  # Add if this was in your prompts
    "knife": text_prompts.index("a photo of a knife")      # Add if this was in your prompts
}

# Function to get top 10 images for a specific category
def get_top_images_by_category(category, index, torch_results):
    category_scores = []

    for image_path, similarity_scores in torch_results.items():
        # Extract the score for the specified category
        score = similarity_scores[index]
        category_scores.append((image_path, score))
    
    # Sort by similarity score in descending order
    category_scores = sorted(category_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top 10 images
    return category_scores[:10]

# Get top 10 images for each category
top_10_guns = get_top_images_by_category("gun", category_indices["gun"], torch_results)
top_10_grenades = get_top_images_by_category("grenade", category_indices["grenade"], torch_results)
top_10_knives = get_top_images_by_category("knife", category_indices["knife"], torch_results)

# Display results
print("Top 10 Images for Guns:")
for rank, (image_path, score) in enumerate(top_10_guns, start=1):
    print(f"{rank}. Image: {image_path}, Similarity Score: {score:.3f}")

print("\nTop 10 Images for Grenades:")
for rank, (image_path, score) in enumerate(top_10_grenades, start=1):
    print(f"{rank}. Image: {image_path}, Similarity Score: {score:.3f}")

print("\nTop 10 Images for Knives:")
for rank, (image_path, score) in enumerate(top_10_knives, start=1):
    print(f"{rank}. Image: {image_path}, Similarity Score: {score:.3f}")


ValueError: 'a photo of a grenade' is not in list