### File to analyze the self created dataset

In [None]:
from src.data.dataloader import VesselCaptureDataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import matplotlib

# Set font and fontsize globally
matplotlib.rcParams["font.family"] = "Arial"
matplotlib.rcParams["font.size"] = 11

In [None]:
data_dir = "data/processed"
dataset = VesselCaptureDataset(data_dir)

In [None]:
print("Number of samples:", len(dataset))

In [None]:
data_loader = DataLoader(dataset, batch_size=1, shuffle=True)

In [None]:
# get average of all vessel volumes
vessel_vol_sum = 0
liquid_vol_sum = 0
vessel_vol_list = []
liquid_vol_list = []

for i, data in enumerate(data_loader):
    vessel_vol = data["vol_vessel"]
    # store value for each sample for histogram
    vessel_vol_list.append(vessel_vol.item())
    vessel_vol_sum += vessel_vol.item()
    liquid_vol = data["vol_liquid"]
    # store value for each sample for histogram
    liquid_vol_list.append(liquid_vol.item())
    liquid_vol_sum += liquid_vol.item()

vessel_vol_avg = vessel_vol_sum / len(dataset)
liquid_vol_avg = liquid_vol_sum / len(dataset)

# print results with 2 decimal places
print("Average vessel volume:", "{:.2f}".format(vessel_vol_avg))
print("Average liquid volume:", "{:.2f}".format(liquid_vol_avg))

# plot histogram of vessel volumes
plt.figure(figsize=(6.3, 5))
plt.hist(vessel_vol_list, bins=20)
plt.title("Histogram of vessel volumes")
plt.xlabel("Vessel volume (mL)")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()

# plot histogram of liquid volumes
plt.figure(figsize=(6.3, 3.5))
plt.hist(liquid_vol_list, bins=30)
plt.title("Histogram of Liquid Volumes in the Dataset")
plt.xlabel("Liquid volume (mL)")
plt.ylabel("Frequency")
plt.tight_layout()
plt.savefig("output/histogram_liquid_volume.png", format="png", dpi=1200)
plt.show()

In [None]:
from collections import Counter

vessel_names = []
for i, data in enumerate(data_loader):
    vessel_name = data["vessel_name"]
    vessel_names.append(str(vessel_name))
    # print(vessel_names)

# Count the occurrences of each vessel name
name_counts = Counter(vessel_names)

# Extract unique vessel names and their counts
unique_vessel_names = [
    name[2:-2] for name in name_counts.keys()
]  # Remove brackets and quotes
vessel_name_counts = list(name_counts.values())

# print value for each vessel name
for i in range(len(unique_vessel_names)):
    print(unique_vessel_names[i], ":", vessel_name_counts[i])


# Plot histogram of vessel names
# bar without brackets for names
plt.figure(figsize=(6.3, 5))
plt.bar(unique_vessel_names, vessel_name_counts)
plt.title("Histogram of the Number of Images per Vessel in the Dataset")
plt.xlabel("Vessel")
plt.ylabel("Frequency")
plt.tight_layout()
plt.xticks(rotation=90)
plt.savefig("output/histogram_vessel_names.png", format="png", dpi=1200)
plt.show()

In [None]:
for name in unique_vessel_names:
    print(name)

In [None]:
# get average volume per vessel name
vessel_vol_sum = 0
liquid_vol_sum = 0
vessel_vol_list = []
liquid_vol_list = []
vessel_name_list = []

# unique_vessel_names
# create subplots for each unique vessel name
fig, axs = plt.subplots(4, 3, figsize=(6.3, 5), sharex=True, sharey=True)
axs = axs.ravel()

for name in unique_vessel_names:
    print("Vessel name:", name)
    # load samples for vessel name
    for i, data in enumerate(data_loader):
        vessel_name = data["vessel_name"][0]

        # print("Vessel name from data:", vessel_name)

        if str(vessel_name) == str(name):
            # print("Sample", i)
            liquid_vol = data["vol_liquid"]
            # store value for each sample for histogram
            liquid_vol_list.append(liquid_vol.item())
            liquid_vol_sum += liquid_vol.item()

    liquid_vol_avg = liquid_vol_sum / len(liquid_vol_list)
    print("Average liquid volume for", name, ":", "{:.2f}".format(liquid_vol_avg))

    # plot histogram of liquid volumes
    axs[unique_vessel_names.index(name)].hist(liquid_vol_list, bins=30)
    axs[unique_vessel_names.index(name)].set_title(name)
    axs[unique_vessel_names.index(name)].set_xlabel("Liquid volume (mL)")
    axs[unique_vessel_names.index(name)].set_ylabel("Frequency")
    axs[unique_vessel_names.index(name)].set_xlim([0, 100])
    axs[unique_vessel_names.index(name)].set_ylim([0, 100])

    # reset values
    liquid_vol_sum = 0
    liquid_vol_list = []

plt.tight_layout()
plt.savefig("output/histogram_liquid_volume_per_vessel.png", format="png", dpi=1200)
plt.show()

In [None]:
vessel_depth_path = (
    "data/processed/Gibco_500mL_358ml_1105_2306/Input_EmptyVessel_Depth_segmented.npy"
)
vessel_depth = np.load(vessel_depth_path).astype(np.float32)

In [None]:
# visualize vessel depth image
plt.imshow(vessel_depth)

In [None]:
# copy depth map for visualization
tmIm = vessel_depth.copy()
# squeeze depth map
tmIm = tmIm.squeeze()

# normalize tmIm to values between 0-255

# find min and max values for non zero values
min = tmIm[tmIm != 0].min()
max = tmIm[tmIm != 0].max()

# normalize non zero values to 0-255
tmIm[tmIm != 0] = (tmIm[tmIm != 0] - min) / (max - min) * 255

plt.figure(figsize=(6.3, 5))
# turn off axis
plt.axis("off")
# visualize depth map
plt.imshow(tmIm, cmap="CMRmap")
plt.tight_layout()
plt.savefig("output/depth_map.png", format="png", dpi=1200)
# turn off axis
plt.axis("off")

In [None]:
liquid_depth_path = (
    "data/processed/Gibco_500mL_358ml_1105_2306/Input_ContentDepth_segmented.npy"
)
liquid_depth = np.load(liquid_depth_path).astype(np.float32)

# copy depth map for visualization
tmIm = liquid_depth.copy()
# squeeze depth map
tmIm = tmIm.squeeze()

# normalize tmIm to values between 0-255

# find min and max values for non zero values
min = tmIm[tmIm != 0].min()
max = tmIm[tmIm != 0].max()

# normalize non zero values to 0-255
tmIm[tmIm != 0] = (tmIm[tmIm != 0] - min) / (max - min) * 255

plt.figure(figsize=(6.3, 5))
# turn off axis
plt.axis("off")
# visualize depth map
plt.imshow(tmIm, cmap="CMRmap")
plt.tight_layout()
plt.savefig("output/depth_map_liquid.png", format="png", dpi=1200)
# turn off axis
plt.axis("off")

In [None]:
# load vessel mask
vessel_mask = np.load(
    "data/processed/Gibco_500mL_358ml_1105_2306/Input_VesselMask.npy"
).astype(np.float32)

depth_map = np.load(
    "data/processed/Gibco_500mL_358ml_1105_2306/Input_DepthMap.npy"
).astype(np.float32)

# multiply depth map with vessel mask
vessel_depth_masked = depth_map * vessel_mask

In [None]:
# copy depth map for visualization
tmIm = vessel_depth_masked.copy()
# squeeze depth map
tmIm = tmIm.squeeze()

# set values above 500 to 500
tmIm[tmIm > 500] = 500

# normalize tmIm to values between 0-255

# find min and max values for non zero values
min = tmIm[tmIm != 0].min()
max = tmIm[tmIm != 0].max()

# normalize non zero values to 0-255
tmIm[tmIm != 0] = (tmIm[tmIm != 0] - min) / (max - min) * 255

# visualize depth map
plt.figure(figsize=(6.3, 5))
plt.imshow(tmIm, cmap="CMRmap")
plt.tight_layout()
# turn off axis
plt.axis("off")
plt.savefig("output/depth_map_masked.png", format="png", dpi=1200)

In [None]:
# load vessel mask
vessel_mask = np.load(
    "data/processed/Gibco_500mL_358ml_1105_2306/Input_ContentMaskClean.npy"
).astype(np.float32)

depth_map = np.load(
    "data/processed/Gibco_500mL_358ml_1105_2306/Input_DepthMap.npy"
).astype(np.float32)

# multiply depth map with vessel mask
vessel_depth_masked = depth_map * vessel_mask

In [None]:
# copy depth map for visualization
tmIm = vessel_depth_masked.copy()
# squeeze depth map
tmIm = tmIm.squeeze()

# set values above 500 to 500
tmIm[tmIm > 500] = 500

# normalize tmIm to values between 0-255

# find min and max values for non zero values
min = tmIm[tmIm != 0].min()
max = tmIm[tmIm != 0].max()

# normalize non zero values to 0-255
tmIm[tmIm != 0] = (tmIm[tmIm != 0] - min) / (max - min) * 255

# visualize depth map
plt.figure(figsize=(6.3, 5))
plt.imshow(tmIm, cmap="CMRmap")
plt.tight_layout()
# turn off axis
plt.axis("off")
plt.savefig("output/depth_map_masked_liquid.png", format="png", dpi=1200)

## Get color distribution

In [None]:
# get average volume per vessel name
vessel_vol_sum = 0
liquid_vol_sum = 0
vessel_vol_list = []
liquid_vol_list = []
vessel_name_list = []
green_samples = 0
red_samples = 0
blue_samples = 0

for name in unique_vessel_names:
    print("Vessel name:", name)
    # load samples for vessel name
    for i, data in enumerate(data_loader):
        vessel_name = data["vessel_name"][0]

        if str(vessel_name) == str(name):
            color = data["color"][0]
            # store value for each sample for histogram
            if color == "green":
                green_samples += 1
            if color == "red":
                red_samples += 1
            if color == "blue":
                blue_samples += 1

    print("Green samples", name, ":", green_samples)
    print("Red samples:", name, ":", red_samples)
    print("Blue samples:", name, ":", blue_samples)

    # reset values
    green_samples = 0
    red_samples = 0
    blue_samples = 0