### 3-3-CombineData.ipynb

Combines output data into a single csv file.

In [1]:
import re
import os
import cv2
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from shapely import Polygon
from pathlib import Path
from tqdm import tqdm

In [2]:
# Define your base working directory
base_dir = Path("/mnt/c/Projects/Master/Data/Processed/LiveBees/")

# Build directory paths from the base directory
marker_lenghts_path = base_dir / "3-LiveWingCrops" / "MarkerLenghts.csv"
wing_heights_path = base_dir / "5-LiveWingCropsRemovedBackground" / "WingHeight.csv"
pred_path = base_dir / "7-LiveWingsSegmented" / "8-prediction" / "predictions_resized_overlaid"
mask_path = base_dir / "7-LiveWingsSegmented" / "8-prediction" / "predictions_resized"
pred_fps = list((pred_path).glob("*"))
out_fp = base_dir / "7-LiveWingsSegmented" / "8-prediction" / "LiveBees.csv"

In [3]:
wing_heights = pd.read_csv(wing_heights_path)
wing_heights["Filename"] = wing_heights["Filename"].str.replace("JPG", "png")

In [4]:
marker_lenghts = pd.read_csv(marker_lenghts_path)
marker_lenghts["Filename"] = marker_lenghts["Filename"].str.replace("JPG", "png")

In [5]:
problems = [
    "Round01-Hive01-2024_06_05-h01bee19.png",
    "Round01-Hive01-2024_06_05-h01bee45.png",
    "Round01-Hive02-2024_06_07-h02bee38.png",
    "Round01-Hive05-2024_06_06-h05bee07.png",
    "Round03-hive21-2024_07_23-h21b28.png",
    ""]

name_to_class = {'FWL':1, 'MC':2, '1sMC':3, '2sMC':4,'3sMC':5,'2MdC':6}
class_to_name = {1:'FWL',2:'MC',3:'1sMC',4:'2sMC',5:'3sMC',6:'2MdC'}

In [6]:
from shapely import Polygon

# Pattern to match info of the filename
pattern = re.compile(r"Round(\d+)-Hive(\d+)-(\d{4}_\d{2}_\d{2})-[^-]*?(\d+)", re.IGNORECASE)

# Loop through every prediction
records = []
for pred_fp in tqdm(pred_fps, desc="Processing files", ncols=145):
    pred_fn = Path(pred_fp).name
    try:
        if pred_fn in problems:
            continue
                
        if not pred_fn.startswith("Round"):
            continue
    
        match = pattern.match(pred_fn)
        if not match:
            print(f"Couldn't find a name pattern for: {pred_fn}")
            
        round_number, hive_id, date, bee_id = match.groups()
        date = date.replace("_", "-")
        bee_id = "ww" + bee_id
        wing_side = "R"
        
        if re.search("(deformed|x)", pred_fn, re.IGNORECASE):
            flag = "deformed"
        elif re.search("dead", pred_fn, re.IGNORECASE):
            flag = "dead"
        else:
            flag = "ok"
    
        marker_length = marker_lenghts.loc[marker_lenghts["Filename"] == pred_fn, "MarkerLengthInPixels"].values[0]
        pixels_per_mm = marker_length / 5
        
        wing_height_pixels = wing_heights.loc[wing_heights["Filename"] == pred_fn, "WingHeightInPixels"].values[0]
        wing_height = wing_height_pixels / pixels_per_mm
    
        pred_mask_path = mask_path / pred_fn
        cells_mask = cv2.imread(pred_mask_path, cv2.IMREAD_GRAYSCALE)
        
        for cell_class in range(1,7):
            cell_name = class_to_name[cell_class]
            cell_mask = np.zeros(cells_mask.shape).astype("uint8")
            cell_mask[np.where(cells_mask == cell_class)] = 1
            contours,hierarchy = cv2.findContours(cell_mask, 1, 2)
            area_sorted_indices = np.argsort([cv2.contourArea(x) for x in contours])
            biggest_contour_index = area_sorted_indices[-1]
            biggest_contour = contours[biggest_contour_index]
        
            #convert to polygon because i suspect this will make the calculation of the perimeter more accurate
            poly = Polygon(biggest_contour.squeeze())
            
            area = np.round(poly.area / (pixels_per_mm)**2,3)
            
            perimeter = np.round(poly.length / pixels_per_mm,3)
            
            records.append({
                "Filename": pred_fn,
                "Round": round_number,
                "Hive": hive_id,
                "BeeID": bee_id,
                "Date": date,
                "WingSide": wing_side,
                "Flags": flag,
                "WingHeight": wing_height, 
                "Cell": cell_name, 
                "Area": area, 
                "Perimeter": perimeter
            })
    except IndexError as e:
        print(f"{pred_fn}\n{e}")
df = pd.DataFrame(records)

Processing files: 100%|█████████████████████████████████████████████████████████████████████████████████████| 5574/5574 [00:41<00:00, 132.96it/s]


In [7]:
df

Unnamed: 0,Filename,Round,Hive,BeeID,Date,WingSide,Flags,WingHeight,Cell,Area,Perimeter
0,Round01-Hive01-2024_06_05-h01bee01.png,01,01,ww01,2024-06-05,R,ok,3.554352,FWL,9.829,15.925
1,Round01-Hive01-2024_06_05-h01bee01.png,01,01,ww01,2024-06-05,R,ok,3.554352,MC,1.084,5.706
2,Round01-Hive01-2024_06_05-h01bee01.png,01,01,ww01,2024-06-05,R,ok,3.554352,1sMC,0.300,2.522
3,Round01-Hive01-2024_06_05-h01bee01.png,01,01,ww01,2024-06-05,R,ok,3.554352,2sMC,0.378,2.875
4,Round01-Hive01-2024_06_05-h01bee01.png,01,01,ww01,2024-06-05,R,ok,3.554352,3sMC,0.470,2.937
...,...,...,...,...,...,...,...,...,...,...,...
7129,Round04-hive35-2024_08_06-h35b38.png,04,35,ww35,2024-08-06,R,ok,4.035163,MC,1.172,6.332
7130,Round04-hive35-2024_08_06-h35b38.png,04,35,ww35,2024-08-06,R,ok,4.035163,1sMC,0.407,3.040
7131,Round04-hive35-2024_08_06-h35b38.png,04,35,ww35,2024-08-06,R,ok,4.035163,2sMC,0.510,3.329
7132,Round04-hive35-2024_08_06-h35b38.png,04,35,ww35,2024-08-06,R,ok,4.035163,3sMC,0.554,3.200


In [8]:
df.to_csv(out_fp)