In [None]:
import xml.etree.ElementTree as ET
import pandas as pd

# Step 1: Parse the XML file
def parse_annotations(xml_file):
    # Parse the XML tree
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # List to store the extracted data
    data = []

    # Iterate through each image in the XML
    for image in root.findall('image'):
        image_name = image.get('name')  # Get the image name

        # Iterate through each box in the image
        for box in image.findall('box'):
            xtl = float(box.get('xtl'))  # Top-left x-coordinate
            ytl = float(box.get('ytl'))  # Top-left y-coordinate
            xbr = float(box.get('xbr'))  # Bottom-right x-coordinate
            ybr = float(box.get('ybr'))  # Bottom-right y-coordinate

            # Get attributes (e.g., size and ripeness)
            attributes = {}
            for attribute in box.findall('attribute'):
                attributes[attribute.get('name')] = attribute.text == 'true'

            # Add a row of data to the list
            data.append({
                'image_name': image_name,
                'xtl': xtl,
                'ytl': ytl,
                'xbr': xbr,
                'ybr': ybr,
                'small': attributes.get('Small', False),
                'medium': attributes.get('Medium', False),
                'large': attributes.get('Large', False),
                'raw': attributes.get('Raw', False),
                'ripe': attributes.get('Ripe', False),
            })

    return pd.DataFrame(data)

# Example usage
xml_file = "annotations.xml"  # Replace with the path to your annotation file
annotations_df = parse_annotations(xml_file)

# Display the parsed data
print(annotations_df.head())

# Save the data to a CSV file for future use
annotations_df.to_csv("parsed_annotations.csv", index=False)


  image_name     xtl     ytl     xbr     ybr  small  medium  large    raw  \
0   i100.jpg   88.12   96.68  140.07  180.90  False    True  False  False   
1   i100.jpg   89.77  136.31  149.25  225.00  False   False   True  False   
2   i100.jpg  132.19   80.20  175.90  151.60  False    True  False  False   
3   i100.jpg  140.07   94.53  200.63  208.83  False   False   True  False   
4   i100.jpg  188.45   73.75  209.59  107.79   True   False  False   True   

    ripe  
0   True  
1   True  
2   True  
3   True  
4  False  


In [None]:
pip install torch torchvision opencv-python




In [None]:
import os
import cv2
import numpy as np
import torch
# from torchvision.transforms import Compose, Resize, ToTensor, Normalize

from torchvision import transforms

# Define the preprocessing transform for MiDaS
transform = transforms.Compose([
    transforms.ToPILImage(),                # Convert the image to PIL format
    transforms.Resize((384, 384)),          # Resize image to 384x384 (MiDaS input size)
    transforms.ToTensor(),                  # Convert image to PyTorch tensor
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],        # Normalize using ImageNet means
        std=[0.229, 0.224, 0.225]
    ),
])


# Load the MiDaS model
def load_midas_model():
    model_type = "DPT_Large"  # Choose a MiDaS model
    midas = torch.hub.load("intel-isl/MiDaS", model_type)
    midas.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    midas.eval()

    # Define MiDaS transform
    transform = torch.hub.load("intel-isl/MiDaS", "transforms").dpt_transform
    return midas, transform

# Estimate depth for an image
def estimate_depth(image_path, midas, transform):
    # Check if the image file exists
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image not found: {image_path}")

    # Read the image
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError(f"Failed to load image: {image_path}")

    # Convert to RGB
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Transform for MiDaS model
    input_batch = transform(img).unsqueeze(0).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    # print(f"Image shape before transform: {img.shape}")
    # print(f"Input batch shape: {input_batch.shape}")


    # Predict depth
    with torch.no_grad():
        depth = midas(input_batch)
        depth = torch.nn.functional.interpolate(
            depth.unsqueeze(1),
            size=img.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze().cpu().numpy()

    return depth
    # return 0

def extract_depth_data(df, midas, transform):
    depth_data = []

    for image_name in df['image_name'].unique():
        image_path = f"{image_name}"

        print(f"Processing {image_name}...")
        try:
            # Generate depth map
            depth_map = estimate_depth(image_path, midas, transform)
            print(f"Depth map shape for {image_name}: {depth_map.shape}")
        except Exception as e:
            print(f"Error generating depth map for {image_name}: {e}")
            continue

        # Filter rows for the current image
        image_rows = df[df['image_name'] == image_name]
        print(f"Bounding boxes for {image_name}: {len(image_rows)}")
        if image_rows.empty:
            print(f"No valid bounding boxes found for {image_name}. Skipping...")
            continue

        for _, row in image_rows.iterrows():
            try:
                # Extract and validate bounding box coordinates
                xtl, ytl, xbr, ybr = map(int, [row['xtl'], row['ytl'], row['xbr'], row['ybr']])
                xtl = max(0, xtl)
                ytl = max(0, ytl)
                xbr = min(depth_map.shape[1], xbr)
                ybr = min(depth_map.shape[0], ybr)

                # Extract depth for bounding box
                bbox_depth = depth_map[ytl:ybr, xtl:xbr]
                if bbox_depth.size == 0:  # Handle empty depth slices
                    print(f"Empty depth slice for bounding box in {image_name}. Skipping...")
                    depth_data.append(np.nan)
                    continue

                avg_depth = np.mean(bbox_depth)
                depth_data.append(avg_depth)
            except Exception as e:
                print(f"Error processing bounding box for {row['image_name']}: {e}")
                depth_data.append(np.nan)

    # Ensure the length of depth_data matches the DataFrame
    print(f"Total depth values collected: {len(depth_data)}")
    if len(depth_data) != len(df):
        raise ValueError(f"Length of depth_data ({len(depth_data)}) does not match the number of rows in df ({len(df)}).")

    df['depth'] = depth_data
    return df


In [None]:
# Example Usage
midas, transform = load_midas_model()

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master
Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


In [None]:
# image_dir = "path_to_images"  # Replace with the directory containing your images
xml_file = "annotations.xml"
annotations_df = parse_annotations(xml_file)
annotations_df = extract_depth_data(annotations_df, midas, transform)

# Save updated data with depth
annotations_df.to_csv("annotations_with_depth.csv", index=False)

Processing i100.jpg...
Image shape before transform: (225, 225, 3)
Input batch shape: torch.Size([1, 3, 384, 384])
Depth map shape for i100.jpg: (225, 225)
Bounding boxes for i100.jpg: 6
Processing i101.jpg...
Image shape before transform: (175, 288, 3)
Input batch shape: torch.Size([1, 3, 384, 384])
Depth map shape for i101.jpg: (175, 288)
Bounding boxes for i101.jpg: 1
Processing i102.jpg...
Image shape before transform: (183, 275, 3)
Input batch shape: torch.Size([1, 3, 384, 384])
Depth map shape for i102.jpg: (183, 275)
Bounding boxes for i102.jpg: 4
Processing i103.jpg...
Image shape before transform: (163, 310, 3)
Input batch shape: torch.Size([1, 3, 384, 384])
Depth map shape for i103.jpg: (163, 310)
Bounding boxes for i103.jpg: 4
Processing i104.jpg...
Image shape before transform: (194, 260, 3)
Input batch shape: torch.Size([1, 3, 384, 384])
Depth map shape for i104.jpg: (194, 260)
Bounding boxes for i104.jpg: 7
Processing i105.jpg...
Image shape before transform: (260, 260, 3

In [None]:
# depth_map = estimate_depth("i100.jpg", midas, transform)
# print(f"Depth map shape for i100.jpg:", depth_map)

Image shape before transform: (225, 225, 3)
Input batch shape: torch.Size([1, 3, 384, 384])
Depth map shape for i100.jpg: [[ 2.8744266  2.9286826  2.9766912 ... 19.163998  18.961834  19.074747 ]
 [ 2.8878372  2.9264984  2.9923728 ... 19.141748  19.08182   18.957941 ]
 [ 2.8944275  2.9323015  2.9825191 ... 19.013908  18.978682  18.924757 ]
 ...
 [ 8.989193   9.018615   9.039868  ... 11.592698  11.58648   11.596172 ]
 [ 8.980381   9.045133   9.090462  ... 11.661094  11.604401  11.5635805]
 [ 8.993718   9.041263   9.053713  ... 11.695225  11.575205  11.518712 ]]


# Area and depth proportionality

In [None]:
df = pd.read_csv("annotations_with_depth.csv")

# Calculate bounding box area
df["area"] = (df["xbr"] - df["xtl"]) * (df["ybr"] - df["ytl"])

# Calculate proportionality metric
df["metric"] = df["area"] / df["depth"]

# Save the updated dataframe for inspection
df.to_csv("annotations_with_area_and_metric.csv", index=False)


# Estimating break points

In [None]:
# # Filter rows for each size class
small_metrics = df[df["small"] == True]["metric"]
medium_metrics = df[df["medium"] == True]["metric"]
large_metrics = df[df["large"] == True]["metric"]

# # Calculate breakpoints
# break_point1 = np.max(small_metrics)  # Highest value for small
# break_point2 = np.min(large_metrics)  # Lowest value for large

# # Print the results
# print(f"Break Point 1 (Small-Medium): {break_point1}")
# print(f"Break Point 2 (Medium-Large): {break_point2}")


In [None]:
# # Calculate breakpoints based on the mean metric of small and large
# break_point1 = np.mean(small_metrics)  # Mean of small category
# break_point2 = np.mean(large_metrics)  # Mean of large category

# print(f"Break Point 1 (Small-Medium): {break_point1}")
# print(f"Break Point 2 (Medium-Large): {break_point2}")



# Calculate medians instead of using min/max
break_point1 = small_metrics.median()  # Median for small category
break_point2 = large_metrics.median()  # Median for large category

print(f"Break Point 1 (Small-Medium): {break_point1}")
print(f"Break Point 2 (Medium-Large): {break_point2}")



Break Point 1 (Small-Medium): 32.32600853009189
Break Point 2 (Medium-Large): 59.09826083014485


In [None]:
# Classify mangoes based on breakpoints
def classify_size(metric, break_point1, break_point2):
    if metric < break_point1:
        return "Small"
    elif break_point1 <= metric < break_point2:
        return "Medium"
    else:
        return "Large"

# Apply the classification to the DataFrame
df["predicted_size"] = df["metric"].apply(lambda x: classify_size(x, break_point1, break_point2))
df_filtered = df[["image_name", "small", "medium", "large", "predicted_size"]]
# Save the results to a new CSV file
df.to_csv("annotations_with_predicted_size.csv", index=False)


In [None]:
# Function to get the actual size label from the annotations
def get_actual_size(row):
    if row['small']:
        return 'Small'
    elif row['medium']:
        return 'Medium'
    else:
        return 'Large'

# Apply to the DataFrame to get the actual size
df_filtered["actual_size"] = df_filtered.apply(get_actual_size, axis=1)

# Calculate the accuracy
correct_predictions = (df_filtered["predicted_size"] == df_filtered["actual_size"]).sum()
total_predictions = len(df_filtered)
accuracy = correct_predictions / total_predictions

print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 40.07%


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered["actual_size"] = df_filtered.apply(get_actual_size, axis=1)
