Import Modules

In [None]:
import os
import csv

Object class

In [None]:
class object:
    def __init__(self, name, tags):
        # Object's name
        self.name = name
        # List of object's tags
        self.tags = tags
        # Dictionary of object's descriptions {image_name : [descriptions]}
        self.descriptions = self.__read_descriptions(self.name)
        
    def __read_descriptions(self, name):
        descriptions = {}
        
        # Read descriptions from csv file
        with open(os.path.join('Descriptions', name + '.csv'), 'r', encoding='utf-8-sig') as csv_file:
            csv_reader = csv.reader(csv_file)
            # Skip the first row
            next(csv_reader)

            # For each row in the csv
            for row in csv_reader:
                    # Read the objects's name and tags
                    image_name = row[0]
                    image_descriptions = [label.strip(' ') for label in row[1].split(',')]
                    # Add the name : descriptions pair to the dictionary
                    descriptions[image_name] = image_descriptions
        
        return descriptions

Create an Instance of Every Object

In [None]:
# List of objects
objects = []

# Read object tags from csv file
with open(os.path.join("ObjectTags.csv"), 'r', encoding='utf-8-sig') as csv_file:
    csv_reader = csv.reader(csv_file)
    # Skip the first row
    next(csv_reader)
    # For each row in the csv
    for row in csv_reader:
        # Read the objects's name and tags
        object_name = row[0]
        object_labels = [label.strip(' ') for label in row[1].split(',')]
        
        # Add the object to the list
        objects.append(object(object_name, object_labels))

Calculate The Percentage of Tag Occurcences Per Object

In [None]:
def occurrence_per_tag (obj, first_description_only):
    occurrence_percents = [0.0] * len(obj.tags) 

    for index, (tag) in enumerate(obj.tags):
        num_occurrences = 0
        # Loop through descriptions dictionary
        for descriptions in obj.descriptions.values():
            if first_description_only:
                # If the tag shows up in the first description, add 1 to the counter
                if tag in descriptions[0]:
                    num_occurrences += 1
            else:
                # If the tag shows up in any of the descriptions, add 1 to the counter and move to the next image
                for description in descriptions:
                    if tag in description:
                        num_occurrences += 1
                        break
        
        occurrence_percents[index] = num_occurrences / len(obj.descriptions)

    return occurrence_percents

In [None]:
### All Descriptions ###
data = []

# For each object
for obj in objects:
    # For each tag + percent occurence pair
    for tag, percent in zip(obj.tags, occurrence_per_tag(obj, first_description_only=False)):
        # Add to 'data' list
        data.append((obj.name, tag, percent))
        
# Save 'data' as csv
with open('NameOccurrence.csv', 'w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(['Object Name', 'Tag', 'Percent of Images where tag is in Description'])
    writer.writerows(data)

In [None]:
### First Description Only###
data = []

# For each object
for obj in objects:
    # For each tag + percent occurence pair
    for tag, percent in zip(obj.tags, occurrence_per_tag(obj, first_description_only=True)):
        # Add to 'data' list
        data.append((obj.name, tag, percent))
        
# Save 'data' as csv
with open('NameOccurrenceFirstOnly.csv', 'w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(['Object Name', 'Tag', 'Percent of Images where tag is in Description'])
    writer.writerows(data)

Histogram of Description Occurences

In [None]:
from collections import Counter
import matplotlib.pyplot as plt

In [None]:
# For every object
for obj in objects:
    # Store descritptions from all images in a single list
    all_descriptions = [description for descriptions in obj.descriptions.values() for description in descriptions]
    # Dictionary to store counts for each description {description : count}
    description_occurrence_count = Counter(all_descriptions)
    
    # Sort the dictionary by description count
    sorted_items = sorted(description_occurrence_count.items(), key=lambda x: x[1], reverse=True)
    # Isolate top 10 descriptions
    top_items = sorted_items[:10]

    labels = [item[0] for item in top_items]
    values = [item[1] for item in top_items]
        
    # Create horrizontal bar plot
    plt.barh(labels, values)
    plt.xlabel('Description')
    plt.ylabel('Number of Occurrences')
    plt.title(f"{obj.name} description counts")
    plt.tight_layout()
    
    # Save the plot as an image
    plt.savefig(os.path.join('Histograms', f"{obj.name}.png"))
    
    # Clear the plot for the next object
    plt.clf()


In [None]:
# For every object
for obj in objects:
    # Store descritptions from all images in a single list
    all_first_descriptions = [descriptions[0] for descriptions in obj.descriptions.values()]
    # Dictionary to store counts for each description {description : count}
    first_description_occurrence_count = Counter(all_first_descriptions)
    
    # Sort the dictionary by description count
    sorted_items = sorted(first_description_occurrence_count.items(), key=lambda x: x[1], reverse=True)
    # Isolate top 10 descriptions
    top_items = sorted_items[:10]

    labels = [item[0] for item in top_items]
    values = [item[1] for item in top_items]
        
    # Create horrizontal bar plot
    plt.barh(labels, values)
    plt.xlabel('Description')
    plt.ylabel('Number of Occurrences')
    plt.title(f"{obj.name} description counts")
    plt.tight_layout()
    
    # Save the plot as an image
    plt.savefig(os.path.join('FirstOnlyHistograms', f"{obj.name}.png"))
    
    # Clear the plot for the next object
    plt.clf()
