In [None]:

import numpy as np
import pandas as pd
import cv2

#connect to drive
from google.colab import drive
drive.mount('/content/drive')


In [None]:
!pip install ultralytics==8.3.124


In [None]:
#Load the yolov8 trained segmentation model
model_path = "/content/drive/MyDrive/BSF_Phenotyping_pipelines/models/YoloRetrain.pt"
from ultralytics import YOLO

model = YOLO("model_path")  # Or use full Google Drive path


In [None]:
#Load Images for larvae segmentation and save the result
import glob

image_paths = glob.glob("/content/drive/MyDrive/BSF_Phenotyping_pipelines/models/input-images/*.jpg")  # Adjust path
results = model(image_paths, save=True, project="/content/drive/MyDrive/BSF_Phenotyping_pipelines/results/output",
    name="bsf_segment_results")



0: 480x640 1 larva, 3974.3ms
Speed: 9.5ms preprocess, 3974.3ms inference, 15.1ms postprocess per image at shape (1, 3, 480, 640)
Results saved to [1m/content/drive/MyDrive/BSF_Phenotyping_pipelines/results/output/bsf_segment_results2[0m


In [None]:
# Code to extract the features
# Loop through each image result
# Data storage
data = []

for i, result in enumerate(results):
    image_name = image_paths[i].split("/")[-1]

    if result.masks is None:
        continue  # No detections

    masks = result.masks.data.cpu().numpy()  # shape: [n, H, W]

    for j, mask in enumerate(masks):
        # Convert float mask to binary image (uint8)
        binary_mask = (mask > 0.5).astype(np.uint8) * 255

        # Find contours
        contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Compute area and perimeter
        for contour in contours:
            area = cv2.contourArea(contour)
            perimeter = cv2.arcLength(contour, closed=True)
            data.append({
                'image': image_name,
                'mask_id': j,
                'area': area,
                'perimeter': perimeter
            })

# Convert to DataFrame and save as CSV
df = pd.DataFrame(data)
df.to_csv("/content/drive/MyDrive/BSF_Phenotyping_pipelines/models/mask_metrics.csv", index=False)
print("Saved to mask_metrics.csv")

Saved to mask_metrics.csv


In [None]:
!pip uninstall opencv-python
!pip install opencv-contrib-python


In [None]:
#pip install fil_finder astropy scikit-image opencv-python-headless

import os
import cv2
import numpy as np
import pandas as pd
import torch
import math
data = []

import os
import math
import cv2
import numpy as np
import pandas as pd

data = []
Calibration_factor = 0.0119 #Pixel to mm

def calculate_length_width(perimeter, area):
    # Intermediate calculation
    inner_sqrt = (perimeter ** 2) / 4 - area
    if inner_sqrt < 0:
        raise ValueError("Invalid inputs: square root of a negative number.")

    sqrt_value = math.sqrt(inner_sqrt)

    # Length and Width formulas
    length = abs(perimeter / 4 + sqrt_value)*Calibration_factor
    width = abs(perimeter / 4 - sqrt_value)*Calibration_factor

    return length, width

for i, result in enumerate(results):
    image_name = os.path.basename(image_paths[i])

    if result.masks is None:
        continue  # Skip if no masks

    masks = result.masks.data.cpu().numpy()  # [n, H, W]

    for j, mask in enumerate(masks):
        binary_mask = (mask > 0.5).astype(np.uint8) * 255

        # Resize for analysis
        resized_mask = cv2.resize(binary_mask, None, fx=6.3375, fy=6.3375)

        # --------- AREA & PERIMETER ---------
        contours, _ = cv2.findContours(resized_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if len(contours) == 0:
            continue

        area = cv2.contourArea(contours[0])
        perimeter = cv2.arcLength(contours[0], closed=True)

        # --------- LENGTH & WIDTH ---------
        try:
            length, width = calculate_length_width(perimeter, area)
        except ValueError:
            length, width = None, None  # Optional: log or skip if desired

        # --------- Append all metrics ---------
        data.append({
            'image': image_name,
            'mask_id': j,
            'area': area,
            'perimeter': perimeter,
            'length': length,
            'width': width
        })

# Save to CSV
df = pd.DataFrame(data)
output_csv = "/content/drive/MyDrive/BSF_Phenotyping_pipelines/models/larva_metrics.csv"
df.to_csv(output_csv, index=False)
print(f"Saved larva metrics to: {output_csv}")



Saved larva metrics to: /content/drive/MyDrive/BSF_Phenotyping_pipelines/models/larva_metrics.csv


In [None]:
#For weight prediction
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import math
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import make_scorer

# Load the Larva dataset
df = pd.read_csv(r"/content/drive/MyDrive/BSF_Phenotyping_pipelines/AllImgfeatures_YoloRetrainmodel.csv")

# Split the data into features and labels
df.describe()

#df.isnull().sum()
#df.dropna(axis=0, inplace=True)  # axis=0 for rows, axis=1 for columns

X =  np.array(df[['Area', 'Length_Actual', 'Wid2']])
#X =  np.array(df[['Area', 'Length']])
#print(X)
y = df['Weight']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

# Define cross-validation strategy (5-fold CV)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize models
LR = LinearRegression()


# Train LR using K-Fold CV (No hyperparameter tuning)
rmse_lr_scores = np.sqrt(-cross_val_score(LR, X_train, y_train, scoring='neg_mean_squared_error', cv=kf))
r2_lr_scores = cross_val_score(LR, X_train, y_train, scoring='r2', cv=kf)

# Train final models on the full training set
LR.fit(X_train, y_train)

# Predictions on test set
y_pred_lr = LR.predict(X_test)

# Compute RMSE and R² for each model on the test set
rmse_lr = np.sqrt(mean_squared_error(y_test, y_pred_lr))
r2_lr = r2_score(y_test, y_pred_lr)

# Compute mean and standard deviation for all models
lr_rmse_mean, lr_rmse_std = rmse_lr_scores.mean(), rmse_lr_scores.std()
lr_r2_mean, lr_r2_std = r2_lr_scores.mean(), r2_lr_scores.std()

# Print results
print(f"Linear Regression - RMSE: {rmse_lr:.3f} ± {lr_rmse_std:.2f}, R²: {r2_lr:.2f} ± {lr_r2_std:.2f}")


In [None]:
import joblib

# Save the model
joblib.dump(LR, "/content/drive/MyDrive/BSF_Phenotyping_pipelines/models/linear_regression_model.pkl")



['/content/drive/MyDrive/BSF_Phenotyping_pipelines/models/linear_regression_model.pkl']

In [None]:

# Load the model
model = joblib.load("/content/drive/MyDrive/BSF_Phenotyping_pipelines/models/linear_regression_model.pkl")

csv_path = "/content/drive/MyDrive/BSF_Phenotyping_pipelines/models/larva_metrics.csv"
df = pd.read_csv(csv_path)

# === 3. Extract features for prediction ===
# Ensure these exact columns exist in your CSV
feature_columns = ['area', 'length', 'width']
X_new = df[feature_columns].values

# === 4. Predict weights ===
predicted_weights = model.predict(X_new)

# === 5. Add predictions to the DataFrame ===
df['Predicted_Weight'] = predicted_weights

# === 6. Save to a new CSV (optional) ===
output_csv = "/content/drive/MyDrive/BSF_Phenotyping_pipelines/larva_with_predicted_weights.csv"
df.to_csv(output_csv, index=False)

print(f"Predictions saved to: {output_csv}")


Predictions saved to: /content/drive/MyDrive/BSF_Phenotyping_pipelines/larva_with_predicted_weights.csv
