In [51]:
!pip install torch torchvision ultralytics opencv-python pandas numpy scikit-learn



In [52]:
import os
from datetime import datetime
from google.colab import drive

BASE_PATH = "/content/drive/My Drive/capstone"
drive.mount('/content/drive',force_remount=True)

import sys
sys.path.append(BASE_PATH)

print("Setup completed successfully!")

Mounted at /content/drive
Setup completed successfully!


In [53]:
import os
import cv2
import pandas as pd
import numpy as np
from ultralytics import YOLO
from tqdm import tqdm

# ========================
# PATH SETUP
# ========================
RAW_IMG_DIR = f"{BASE_PATH}/raw_data/images_selected"
LABEL_PATH = f"{BASE_PATH}/raw_data/labels_filtered.csv"

OUT_IMG_DIR = f"{BASE_PATH}/processed_data/images_selected_cropped_bbox"
OUT_CSV_PATH = f"{BASE_PATH}/processed_data/features_bbox.csv"

os.makedirs(OUT_IMG_DIR, exist_ok=True)

# ========================
# LOAD DATA
# ========================
df = pd.read_csv(LABEL_PATH)

# ========================
# LOAD YOLO MODEL
# ========================
model = YOLO("models/yolov8n.pt")

records = []

# ========================
# PROCESS LOOP
# ========================
for _, row in tqdm(df.iterrows(), total=len(df)):
    img_name = f"Salinan {row["image_name"]}"
    cow_id = row["cow_id"]
    distance = row["distance_m"]
    weight = row["weight_kg"]

    img_path = os.path.join(RAW_IMG_DIR, img_name)
    img = cv2.imread(img_path)

    if img is None:
        print(f"Image not found: {img_name}")
        continue

    # YOLO inference
    results = model(img, conf=0.4, verbose=False)

    if len(results[0].boxes) == 0:
        print(f"No cow detected: {img_name}")
        continue

    # Ambil bounding box TERBESAR
    boxes = results[0].boxes.xyxy.cpu().numpy()
    areas = (boxes[:,2]-boxes[:,0]) * (boxes[:,3]-boxes[:,1])
    idx = np.argmax(areas)
    x1, y1, x2, y2 = boxes[idx].astype(int)

    # Bounding box pixel
    bbox_w = x2 - x1
    bbox_h = y2 - y1
    bbox_area = bbox_w * bbox_h

    # Crop image
    crop = img[y1:y2, x1:x2]

    out_img_name = img_name.replace(".jpg", "_crop.jpg")
    out_img_path = os.path.join(OUT_IMG_DIR, out_img_name)
    cv2.imwrite(out_img_path, crop)

    records.append({
        "image_name": out_img_name,
        "cow_id": cow_id,
        "bbox_width_px": bbox_w,
        "bbox_height_px": bbox_h,
        "bbox_area_px": bbox_area,
        "distance_m": distance,
        "weight_kg": weight
    })

# ========================
# SAVE FEATURES
# ========================
features_df = pd.DataFrame(records)
features_df.to_csv(OUT_CSV_PATH, index=False)

print("Preprocessing DONE.")
print(f"Saved cropped images to: {OUT_IMG_DIR}")
print(f"Saved features CSV to: {OUT_CSV_PATH}")


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 35/35 [00:02<00:00, 15.32it/s]

Preprocessing DONE.
Saved cropped images to: /content/drive/My Drive/capstone/processed_data/images_selected_cropped_bbox
Saved features CSV to: /content/drive/My Drive/capstone/processed_data/features_bbox.csv





In [54]:
features_df = pd.DataFrame(records)
features_df.to_csv(OUT_CSV_PATH, index=False)

features_df.head()

Unnamed: 0,image_name,cow_id,bbox_width_px,bbox_height_px,bbox_area_px,distance_m,weight_kg
0,Salinan 10_cattle_315_733__crop.jpg,cow_1,813,525,426825,3.15,700
1,Salinan 11_cattle_289_906__crop.jpg,cow_1,902,578,521356,2.89,700
2,Salinan 12_cattle_284_799__crop.jpg,cow_1,889,610,542290,2.84,700
3,Salinan 13_cattle_233_1221__crop.jpg,cow_1,1111,659,732149,2.33,700
4,Salinan 14_cattle_200_1526__crop.jpg,cow_1,1258,654,822732,2.0,700


## MODELING

In [55]:
import os
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm


In [56]:
class CowBBoxDataset(Dataset):
    def __init__(self, df, img_dir):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir

        self.tf = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        img = Image.open(
            os.path.join(self.img_dir, row["image_name"])
        ).convert("RGB")
        img = self.tf(img)

        # === NUMERIC FEATURE ===
        size_feature = row["bbox_area_px"] * (row["distance_m"] ** 2)
        size_feature = torch.tensor([size_feature], dtype=torch.float32)

        weight = torch.tensor(row["weight_kg"], dtype=torch.float32)

        return img, size_feature, weight


In [57]:
df = pd.read_csv(OUT_CSV_PATH)

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42
)

train_ds = CowBBoxDataset(train_df, OUT_IMG_DIR)
val_ds = CowBBoxDataset(val_df, OUT_IMG_DIR)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=16)


In [58]:
class BBoxWeightModel(nn.Module):
    def __init__(self):
        super().__init__()

        backbone = models.resnet18(pretrained=True)
        backbone.fc = nn.Identity()
        self.cnn = backbone

        self.regressor = nn.Sequential(
            nn.Linear(512 + 1, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    def forward(self, img, size_feat):
        img_feat = self.cnn(img)
        x = torch.cat([img_feat, size_feat], dim=1)
        return self.regressor(x).squeeze(1)


In [59]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = BBoxWeightModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss()

EPOCHS = 30




In [60]:
best_val_mae = float("inf")
MODEL_OUT = f"{BASE_PATH}/models/bbox_weight_model.pt"
EPOCHS = 50
PATIENCE = 5

patience_counter = 0

for epoch in range(EPOCHS):
    # ===== TRAIN =====
    model.train()
    train_preds, train_trues = [], []

    for img, size_feat, weight in train_loader:
        img = img.to(device)
        size_feat = size_feat.to(device)
        weight = weight.to(device)

        pred = model(img, size_feat)
        loss = criterion(pred, weight)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_preds.extend(pred.detach().cpu().numpy())
        train_trues.extend(weight.cpu().numpy())

    train_mae = mean_absolute_error(train_trues, train_preds)

    # ===== VALIDATION =====
    model.eval()
    val_preds, val_trues = [], []

    with torch.no_grad():
        for img, size_feat, weight in val_loader:
            img = img.to(device)
            size_feat = size_feat.to(device)

            pred = model(img, size_feat)

            val_preds.extend(pred.cpu().numpy())
            val_trues.extend(weight.numpy())

    val_mae = mean_absolute_error(val_trues, val_preds)

    print(
        f"Epoch {epoch+1}/{EPOCHS} | "
        f"Train MAE: {train_mae:.2f} | "
        f"Val MAE: {val_mae:.2f}"
    )

    # ===== EARLY STOPPING =====
    if val_mae < best_val_mae:
        best_val_mae = val_mae
        patience_counter = 0
        torch.save(model.state_dict(), MODEL_OUT)
        print(f"‚úÖ Best model saved (Val MAE: {best_val_mae:.2f})")
    else:
        patience_counter += 1
        print(f"‚è≥ No improvement ({patience_counter}/{PATIENCE})")

    if patience_counter >= PATIENCE:
        print("üõë Early stopping triggered")
        break


Epoch 1/50 | Train MAE: 751.08 | Val MAE: 613.80
‚úÖ Best model saved (Val MAE: 613.80)
Epoch 2/50 | Train MAE: 429.84 | Val MAE: 540.95
‚úÖ Best model saved (Val MAE: 540.95)
Epoch 3/50 | Train MAE: 702.83 | Val MAE: 169.74
‚úÖ Best model saved (Val MAE: 169.74)
Epoch 4/50 | Train MAE: 295.88 | Val MAE: 541.52
‚è≥ No improvement (1/5)
Epoch 5/50 | Train MAE: 530.56 | Val MAE: 396.11
‚è≥ No improvement (2/5)
Epoch 6/50 | Train MAE: 236.80 | Val MAE: 153.07
‚úÖ Best model saved (Val MAE: 153.07)
Epoch 7/50 | Train MAE: 354.19 | Val MAE: 246.77
‚è≥ No improvement (1/5)
Epoch 8/50 | Train MAE: 293.15 | Val MAE: 169.77
‚è≥ No improvement (2/5)
Epoch 9/50 | Train MAE: 204.77 | Val MAE: 394.86
‚è≥ No improvement (3/5)
Epoch 10/50 | Train MAE: 310.45 | Val MAE: 148.16
‚úÖ Best model saved (Val MAE: 148.16)
Epoch 11/50 | Train MAE: 122.13 | Val MAE: 129.30
‚úÖ Best model saved (Val MAE: 129.30)
Epoch 12/50 | Train MAE: 243.54 | Val MAE: 46.59
‚úÖ Best model saved (Val MAE: 46.59)
Epoch 13/50 |

In [61]:
PRED_OUT_DIR = f"{BASE_PATH}/processed_data/predicted_images"
os.makedirs(PRED_OUT_DIR, exist_ok=True)


In [62]:
from PIL import ImageDraw, ImageFont

model.eval()

with torch.no_grad():
    for i in range(len(val_ds)):
        img, size_feat, true_w = val_ds[i]

        pred_w = model(
            img.unsqueeze(0).to(device),
            size_feat.unsqueeze(0).to(device)
        ).item()

        img_pil = Image.open(
            os.path.join(OUT_IMG_DIR, val_df.iloc[i]["image_name"])
        ).convert("RGB")

        draw = ImageDraw.Draw(img_pil)

        text = (
            f"Pred: {pred_w:.1f} kg\n"
            f"True: {true_w.item():.1f} kg"
        )

        draw.rectangle(
            [(10,10), (260,80)],
            fill=(0,0,0)
        )

        draw.text(
            (15,15),
            text,
            fill=(255,255,255)
        )

        img_pil.save(
            os.path.join(
                PRED_OUT_DIR,
                val_df.iloc[i]["image_name"]
            )
        )


In [63]:
# Load best model
model.load_state_dict(torch.load(MODEL_OUT, map_location=device))
model.eval()

test_preds, test_trues = [], []

with torch.no_grad():
    for img, size_feat, weight in val_loader:
        img = img.to(device)
        size_feat = size_feat.to(device)

        pred = model(img, size_feat)

        test_preds.extend(pred.cpu().numpy())
        test_trues.extend(weight.numpy())

test_mae = mean_absolute_error(test_trues, test_preds)
print(f"üìä TEST MAE: {test_mae:.2f} kg")


üìä TEST MAE: 34.84 kg


In [64]:
test_preds

[np.float32(230.196),
 np.float32(354.71844),
 np.float32(330.68036),
 np.float32(348.63544),
 np.float32(360.81415),
 np.float32(370.48602),
 np.float32(369.83466)]