Analysis of image factors on annotation consensus-- Zooniverse
Start date: 09/26/2023

In [41]:
#Imports
import pandas as pd
from PIL import Image
import os
import ast
import numpy as np

In [42]:
#Analysis annotations
path = "E:\\imagefactors\\data\\consensusLabels_agreementIndex.csv"
with open(path) as f:
  df = pd.read_csv(f)

#Fixing how bounding boxes are read for the analysis labels
def eval_bbox_refined(row):
    if pd.notnull(row['consensus_bbox']):
        return ast.literal_eval(row['consensus_bbox'])
    else:
        return None
# Apply the function to the 'bbox_refined' column and save the results
df['consensus_bbox'] = df.apply(eval_bbox_refined, axis=1)

#Creating a base file column to match tiles to full images later
df["basefile"] = [x[:-10] for x in df['filename']]

df.head()

Unnamed: 0,id,filename,consensus_class_ID,consensus_bbox,pielou_index,basefile
0,1,20211201_Atrisco_0459_01_01.png,Goose,"[634.05224609375, 260.4735412597656, 49.0, 80....",0.764205,20211201_Atrisco_0459
1,2,20211201_Atrisco_0459_01_01.png,Goose,"[555.4261474609375, 216.25, 53.0, 69.0]",0.764205,20211201_Atrisco_0459
2,3,20211201_Atrisco_0459_01_01.png,Goose,"[266.75, 120.83124542236328, 60.33087158203125...",0.764205,20211201_Atrisco_0459
3,4,20211201_Atrisco_0459_01_01.png,Goose,"[176.8125, 22.46035385131836, 52.0, 84.0]",0.764205,20211201_Atrisco_0459
4,5,20211201_Atrisco_0459_01_01.png,Goose,"[101.36946105957031, 170.06580352783203, 62.24...",0.721928,20211201_Atrisco_0459


In [43]:
#Calculate area of bboxes
def calc_area(row):
    bbox = row['consensus_bbox']
    xmin, ymin, w, h = bbox
    return w * h

df['area'] = df.apply(calc_area, axis=1)

In [44]:
# Define a function to calculate percentage area
def calculate_percentage_area(image_filename, bbox_area):
    image_path = os.path.join("E:\\imagefactors\\data\\zooniverse", image_filename)
    
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        # Handle the case where the image is not found
        print(f"Image not found: {image_path}")
        return None  # You can return a special value, such as None, to indicate the image wasn't found
    
    image_width, image_height = image.size
    image_area = image_width * image_height

    percentage_area = (bbox_area / image_area) * 100
    return percentage_area

# Calculate percentage area and add it as a new
df['bbox_percent_area'] = df.apply(lambda row: calculate_percentage_area(row['filename'], row['area']), axis=1)

df.head()

Unnamed: 0,id,filename,consensus_class_ID,consensus_bbox,pielou_index,basefile,area,bbox_percent_area
0,1,20211201_Atrisco_0459_01_01.png,Goose,"[634.05224609375, 260.4735412597656, 49.0, 80....",0.764205,20211201_Atrisco_0459,3920.06131,1.100016
1,2,20211201_Atrisco_0459_01_01.png,Goose,"[555.4261474609375, 216.25, 53.0, 69.0]",0.764205,20211201_Atrisco_0459,3657.0,1.026198
2,3,20211201_Atrisco_0459_01_01.png,Goose,"[266.75, 120.83124542236328, 60.33087158203125...",0.764205,20211201_Atrisco_0459,5188.454956,1.455943
3,4,20211201_Atrisco_0459_01_01.png,Goose,"[176.8125, 22.46035385131836, 52.0, 84.0]",0.764205,20211201_Atrisco_0459,4368.0,1.225713
4,5,20211201_Atrisco_0459_01_01.png,Goose,"[101.36946105957031, 170.06580352783203, 62.24...",0.721928,20211201_Atrisco_0459,5135.498171,1.441082


In [45]:
# % of targets of the same class in the same image, as the analysis target

# Define a function to calculate the percentage of same-class neighbors for a given row
def calculate_same_class_percentage(row, df):
    # Get the filename and class ID of the target bounding box
    filename = row['filename']
    class_id = row['consensus_class_ID']
    
    # Filter the DataFrame to include only rows with matching filenames
    matching_rows = df[df['filename'] == filename]
    
    # Calculate the total number of neighbors in the same image
    total_neighbors = len(matching_rows) - 1  # Subtract 1 to exclude the target bounding box
    
    if total_neighbors == 0:
        return 0  # Avoid division by zero
    
    # Calculate the number of same-class neighbors
    same_class_neighbors = len(matching_rows[matching_rows['consensus_class_ID'] == class_id]) - 1  # Subtract 1 to exclude the target bounding box
    
    # Calculate the percentage of same-class neighbors
    same_class_percentage = (same_class_neighbors / total_neighbors) * 100
    
    return same_class_percentage

# Calculate the same-class percentage for each row and add the results as a new column
df['same_class_percent'] = df.apply(lambda row: calculate_same_class_percentage(row, df), axis=1)

In [47]:
#How many neighbors for each annotation?

# Define a function to calculate the number of neighbors for a given row
def count_neighbors(row, df):
    # Extract 'bbox' values from the 'consensus_bbox' column as a list [xmin, ymin, width, height]
    bbox = row['consensus_bbox']  # Use ast.literal_eval() to safely evaluate the string
    
    # Define the search radius as 2 times the maximum of width and height
    search_radius = 2 * max(bbox[2], bbox[3])
    
    # Calculate the center coordinates of the bounding box
    x_center = bbox[0] + bbox[2] / 2
    y_center = bbox[1] + bbox[3] / 2
    
    # Initialize a count for neighbors
    num_neighbors = 0
    
    # Iterate through rows with matching filenames
    matching_rows = df[df['filename'] == row['filename']]
    
    for _, neighbor_row in matching_rows.iterrows():
        if neighbor_row.name != row.name:
            # Extract 'bbox' values for the neighbor as a list [xmin, ymin, width, height]
            neighbor_bbox = neighbor_row['consensus_bbox']
            
            # Calculate the center coordinates of the potential neighbor
            neighbor_x_center = neighbor_bbox[0] + neighbor_bbox[2] / 2
            neighbor_y_center = neighbor_bbox[1] + neighbor_bbox[3] / 2
            
            # Calculate the Euclidean distance between centers
            distance = np.sqrt((x_center - neighbor_x_center)**2 + (y_center - neighbor_y_center)**2)
            
            # Check if the neighbor is within the search radius
            if distance <= search_radius:
                num_neighbors += 1
    
    return num_neighbors

# Calculate the number of neighbors for each row and add the results as a new column
df['num_neighbors'] = df.apply(lambda row: count_neighbors(row, df), axis=1)

df.head()

Unnamed: 0,id,filename,consensus_class_ID,consensus_bbox,pielou_index,basefile,area,bbox_percent_area,same_class_percent,num_neighbors
0,1,20211201_Atrisco_0459_01_01.png,Goose,"[634.05224609375, 260.4735412597656, 49.0, 80....",0.764205,20211201_Atrisco_0459,3920.06131,1.100016,100.0,1
1,2,20211201_Atrisco_0459_01_01.png,Goose,"[555.4261474609375, 216.25, 53.0, 69.0]",0.764205,20211201_Atrisco_0459,3657.0,1.026198,100.0,1
2,3,20211201_Atrisco_0459_01_01.png,Goose,"[266.75, 120.83124542236328, 60.33087158203125...",0.764205,20211201_Atrisco_0459,5188.454956,1.455943,100.0,4
3,4,20211201_Atrisco_0459_01_01.png,Goose,"[176.8125, 22.46035385131836, 52.0, 84.0]",0.764205,20211201_Atrisco_0459,4368.0,1.225713,100.0,2
4,5,20211201_Atrisco_0459_01_01.png,Goose,"[101.36946105957031, 170.06580352783203, 62.24...",0.721928,20211201_Atrisco_0459,5135.498171,1.441082,100.0,1


In [49]:
#Temporary save progress-- still need to calculate image texture and distance from center
df.to_csv('E:/imagefactors/data/crowdsourced_imagefactors.csv', index=False)

In [None]:
#Distance from image center

# Function to calculate distance from center
def calculate_distance_from_center(row):
    # Get the coordinates of the bounding box (x, y, width, height)
    x, y, width, height = row['consensus_bbox']

    # Calculate the center point of the bounding box in pixels
    bbox_center_x_px = x + (width / 2)
    bbox_center_y_px = y + (height / 2)

    # Calculate the center point of the bounding box in meters
    bbox_center_x_m = bbox_center_x_px * row['gsd']
    bbox_center_y_m = bbox_center_y_px * row['gsd']

    # Calculate the distance from the center of the image in meters
    distance_m = ((row['center_x_m'] - bbox_center_x_m)**2 + (row['center_y_m'] - bbox_center_y_m)**2)**0.5

    return distance_m

# Apply the function to the merged dataframe
df['distance_from_center'] = df.apply(calculate_distance_from_center, axis=1)

In [None]:
#GCLM derivatives: ratio of donut to target