In [10]:
# JUPYTER NOTEBOOK CELL #2

import open3d as o3d
import numpy as np
import pandas as pd
import glob
import os

from sklearn.ensemble import IsolationForest


In [11]:
# JUPYTER NOTEBOOK CELL #3

def extract_ply_features(ply_path):
    """
    Loads a .ply point cloud using Open3D and returns some simple geometric features:
    - Axis-aligned bounding box dimensions: (dx, dy, dz)
    - Volume of that bounding box
    - Number of points
    """
    pcd = o3d.io.read_point_cloud(ply_path)
    if not pcd.has_points():
        # In case there's any empty file, handle gracefully
        return None

    # Get axis-aligned bounding box
    aabb = pcd.get_axis_aligned_bounding_box()
    min_bound = aabb.min_bound
    max_bound = aabb.max_bound
    dims = max_bound - min_bound  # (dx, dy, dz)

    dx, dy, dz = dims[0], dims[1], dims[2]
    volume = dx * dy * dz

    num_points = np.asarray(pcd.points).shape[0]

    return {
        "dx": dx,
        "dy": dy,
        "dz": dz,
        "volume": volume,
        "num_points": num_points
    }


In [12]:
# JUPYTER NOTEBOOK CELL #4

excel_path = "Measurements.xlsx"  # Change if needed
df_measurements = pd.read_excel(excel_path)

# Let's quickly check the first few rows
df_measurements.head()


Unnamed: 0,N,live weithg,withers height,hip height,chest depth,chest width,ilium width,hip joint width,oblique body length,hip length,heart girth,left timestamp,top timestamp,right timestamp,Data
0,1,415,117,122,62,40,43,42,145,43,172,12.09.07.767,12.09.07.765,12.09.07.778,2019-08-15
1,2,407,116,121,60,39,42,44,127,37,171,12.15.22.153,12.15.22.142,12.15.22.188,2019-08-15
2,3,448,114,121,60,43,41,40,128,41,176,12.20.11.699,12.20.11.691,12.20.11.688,2019-08-15
3,4,443,118,123,63,46,44,44,150,46,176,12.24.08.077,12.24.08.106,12.24.08.096,2019-08-15
4,5,410,124,127,66,41,42,44,140,45,178,12.27.25.234,12.27.25.197,12.27.25.276,2019-08-15


In [13]:
# Path to the folder containing .ply files
ply_folder = "AlignedClouds"
ply_paths = glob.glob(os.path.join(ply_folder, "*.ply"))

cloud_features_dict = {}

for ply_file in ply_paths:
    filename = os.path.basename(ply_file)  # e.g. "1_AutoAligned.ply"
    filename_no_ext = os.path.splitext(filename)[0]  # "1_AutoAligned"

    # Split on underscore to isolate the numeric portion at the start:
    cow_id_str = filename_no_ext.split('_')[0]  # "1" from "1_AutoAligned"

    try:
        cow_id = int(cow_id_str)  # Convert "1" to integer
    except ValueError:
        # If we can't parse it, skip this file
        continue

    feats = extract_ply_features(ply_file)
    if feats is not None:
        cloud_features_dict[cow_id] = feats

print(f"Extracted features for {len(cloud_features_dict)} cows.")


Extracted features for 103 cows.


In [14]:
# JUPYTER NOTEBOOK CELL #6

# Create new columns for the point cloud features
df_measurements["dx"] = np.nan
df_measurements["dy"] = np.nan
df_measurements["dz"] = np.nan
df_measurements["volume"] = np.nan
df_measurements["num_points"] = np.nan

for idx, row in df_measurements.iterrows():
    cow_id = row["N"]  # integer from the 'N' column
    if cow_id in cloud_features_dict:
        feats = cloud_features_dict[cow_id]
        df_measurements.at[idx, "dx"] = feats["dx"]
        df_measurements.at[idx, "dy"] = feats["dy"]
        df_measurements.at[idx, "dz"] = feats["dz"]
        df_measurements.at[idx, "volume"] = feats["volume"]
        df_measurements.at[idx, "num_points"] = feats["num_points"]

# Let's see if the new columns are populated
df_measurements.head()


Unnamed: 0,N,live weithg,withers height,hip height,chest depth,chest width,ilium width,hip joint width,oblique body length,hip length,heart girth,left timestamp,top timestamp,right timestamp,Data,dx,dy,dz,volume,num_points
0,1,415,117,122,62,40,43,42,145,43,172,12.09.07.767,12.09.07.765,12.09.07.778,2019-08-15,11.694385,12.036753,7.100489,999.48203,651264.0
1,2,407,116,121,60,39,42,44,127,37,171,12.15.22.153,12.15.22.142,12.15.22.188,2019-08-15,11.573621,12.146367,8.824709,1240.554957,651264.0
2,3,448,114,121,60,43,41,40,128,41,176,12.20.11.699,12.20.11.691,12.20.11.688,2019-08-15,11.407958,11.975951,8.532564,1165.728623,651264.0
3,4,443,118,123,63,46,44,44,150,46,176,12.24.08.077,12.24.08.106,12.24.08.096,2019-08-15,11.394471,12.140296,8.022305,1109.74345,651264.0
4,5,410,124,127,66,41,42,44,140,45,178,12.27.25.234,12.27.25.197,12.27.25.276,2019-08-15,11.509396,11.796293,7.637703,1036.95729,651264.0


In [15]:
# JUPYTER NOTEBOOK CELL #7

df_merged = df_measurements.dropna(subset=["dx", "dy", "dz", "volume", "num_points"])
print(f"DataFrame shape before dropping: {df_measurements.shape}")
print(f"DataFrame shape after dropping:  {df_merged.shape}")
df_merged.head()


DataFrame shape before dropping: (103, 20)
DataFrame shape after dropping:  (103, 20)


Unnamed: 0,N,live weithg,withers height,hip height,chest depth,chest width,ilium width,hip joint width,oblique body length,hip length,heart girth,left timestamp,top timestamp,right timestamp,Data,dx,dy,dz,volume,num_points
0,1,415,117,122,62,40,43,42,145,43,172,12.09.07.767,12.09.07.765,12.09.07.778,2019-08-15,11.694385,12.036753,7.100489,999.48203,651264.0
1,2,407,116,121,60,39,42,44,127,37,171,12.15.22.153,12.15.22.142,12.15.22.188,2019-08-15,11.573621,12.146367,8.824709,1240.554957,651264.0
2,3,448,114,121,60,43,41,40,128,41,176,12.20.11.699,12.20.11.691,12.20.11.688,2019-08-15,11.407958,11.975951,8.532564,1165.728623,651264.0
3,4,443,118,123,63,46,44,44,150,46,176,12.24.08.077,12.24.08.106,12.24.08.096,2019-08-15,11.394471,12.140296,8.022305,1109.74345,651264.0
4,5,410,124,127,66,41,42,44,140,45,178,12.27.25.234,12.27.25.197,12.27.25.276,2019-08-15,11.509396,11.796293,7.637703,1036.95729,651264.0


In [19]:
# JUPYTER NOTEBOOK CELL #8

feature_cols = [
    "live weithg",  # from Excel
    "withers height",
    "hip height",
    # etc. ... add the columns you want from the measurements
    "dx",
    "dy",
    "dz",
    "volume",
    "num_points"
]

# Drop rows if any of these feature columns are missing
df_for_model = df_merged.dropna(subset=feature_cols).copy()

# Create matrix X
X = df_for_model[feature_cols].values

# Initialize IsolationForest
iso_forest = IsolationForest(n_estimators=100, random_state=42, contamination=0.1)
# contamination=0.1 => about 10% will be flagged as outliers. Adjust as desired.

iso_forest.fit(X)

# Predict outliers: -1 = outlier/anomaly, +1 = normal
preds = iso_forest.predict(X)

df_for_model["anomaly_label"] = preds  # -1 or +1
# Let's mark them as True/False for clarity
df_for_model["is_anomaly"] = df_for_model["anomaly_label"].apply(lambda x: True if x == -1 else False)
df_for_model["anomaly"] = df_for_model["anomaly_label"].apply(lambda x: 1 if x == -1 else 0)

# Check how many anomalies were detected
anomaly_count = df_for_model["is_anomaly"].sum()
print(f"Detected {anomaly_count} anomalies out of {len(df_for_model)} cows.")
df_for_model.head(10)


Detected 11 anomalies out of 103 cows.


Unnamed: 0,N,live weithg,withers height,hip height,chest depth,chest width,ilium width,hip joint width,oblique body length,hip length,...,right timestamp,Data,dx,dy,dz,volume,num_points,anomaly_label,is_anomaly,anomaly
0,1,415,117,122,62,40,43,42,145,43,...,12.09.07.778,2019-08-15,11.694385,12.036753,7.100489,999.48203,651264.0,1,False,0
1,2,407,116,121,60,39,42,44,127,37,...,12.15.22.188,2019-08-15,11.573621,12.146367,8.824709,1240.554957,651264.0,1,False,0
2,3,448,114,121,60,43,41,40,128,41,...,12.20.11.688,2019-08-15,11.407958,11.975951,8.532564,1165.728623,651264.0,1,False,0
3,4,443,118,123,63,46,44,44,150,46,...,12.24.08.096,2019-08-15,11.394471,12.140296,8.022305,1109.74345,651264.0,1,False,0
4,5,410,124,127,66,41,42,44,140,45,...,12.27.25.276,2019-08-15,11.509396,11.796293,7.637703,1036.95729,651264.0,1,False,0
5,6,441,120,124,60,43,43,44,138,45,...,12.31.11.492,2019-08-15,11.481887,12.079382,7.252875,1005.931033,651264.0,1,False,0
6,7,427,117,121,62,41,43,45,149,45,...,12.35.29.490,2019-08-15,11.368161,12.239047,9.185722,1278.059631,651264.0,1,False,0
7,8,380,115,117,60,48,41,41,137,43,...,12.38.58.626,2019-08-15,12.035826,12.366945,9.461826,1408.358733,651264.0,1,False,0
8,9,416,118,120,62,44,42,40,140,44,...,12.44.15.007,2019-08-15,12.090695,12.005622,8.97372,1302.592059,651264.0,1,False,0
9,10,424,119,122,65,46,44,40,151,46,...,12.49.59.038,2019-08-15,11.963981,12.202548,8.696407,1269.597497,651264.0,1,False,0


In [20]:
df_for_model.to_csv("annotated.csv", index=False)
