In [None]:
from IPython.display import clear_output
# Cell 1: Install required packages
#!pip install ultralytics kagglehub
clear_output()

In [None]:
# Cell 2: Import libraries
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
import os
import random
import kagglehub

In [None]:
# # Set KaggleHub cache to a directory inside /content/
#os.environ["KAGGLEHUB_CACHE"] = "/content/data"

In [None]:
# Cell 3: Download dataset
path = kagglehub.dataset_download("simplexitypipeline/pipeline-defect-dataset")
print("Path to dataset files:", path)

In [None]:
# Cell 4: Setup dataset paths and class names
dataset_path = path
train_images_path = os.path.join(dataset_path, 'images', 'images', 'train')
train_labels_path = os.path.join(dataset_path, 'labels', 'labels', 'train')

class_names = {
    0: 'Deformation',
    1: 'Obstacle',
    2: 'Rupture',
    3: 'Disconnect',
    4: 'Misalignment',
    5: 'Deposition'
}

image_files = [f for f in os.listdir(train_images_path) if f.endswith(('.jpg', '.png', '.jpeg'))]
print(f"✓ Found {len(image_files)} training images")
print(f"✓ Found {len(os.listdir(train_labels_path))} label files")

In [None]:
# Cell 5: Create YOLO config file
config_content = f"""path: {dataset_path}
train: images/images/train
val: images/images/train

nc: 6
names: ['Deformation', 'Obstacle', 'Rupture', 'Disconnect', 'Misalignment', 'Deposition']
"""

config_path = os.path.join(dataset_path, 'config.yaml')
with open(config_path, 'w') as f:
    f.write(config_content)

# Create symlink for labels
expected_labels = os.path.join(dataset_path, 'images', 'labels')
actual_labels = os.path.join(dataset_path, 'labels', 'labels')

os.makedirs(os.path.join(dataset_path, 'images'), exist_ok=True)
os.system(f'rm -rf {expected_labels} && ln -s {actual_labels} {expected_labels}')

print(f"✓ Config created at: {config_path}")
print(f"✓ Labels symlink created: {len(os.listdir(expected_labels))} files accessible")

In [None]:
# Cell 6: Train the model
model = YOLO('yolov8n.pt')

print("Starting training...")
results = model.train(
    data=config_path,
    epochs=10,
    imgsz=320,
    batch=128,
    name='pipeline_defect_model',
    patience=5,
    save=True,
    plots=True,
    workers=1
)

print("\n✓ Training completed!")
print(f"✓ Model saved in: runs/detect/pipeline_defect_model")

In [None]:
# Cell 7: Download pretrained model
import kagglehub

pathModel = kagglehub.model_download("alihabibullah/pretrained-pipeline-yolo-8/pyTorch/default")

print("Path to model files:", pathModel)

In [None]:
os.listdir(path)

In [None]:
# Cell 8: Load trained model (choose one of the two options)
# Option 1: Use your trained model
#trained_model = YOLO('runs/detect/pipeline_defect_model/weights/best.pt')

# Option 2: Use the pretrained model from Kaggle (uncomment to use)
trained_model = YOLO(os.path.join(pathModel, 'runs/detect/pipeline_defect_model/weights/best.pt'))

print("✓ Model loaded successfully")

In [None]:
# Cell 9: Run predictions on random sample images and compare with ground truth
sample_images = random.sample(image_files, 3)

fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Ground Truth (Top) vs Model Predictions (Bottom)', fontsize=16, fontweight='bold')

for idx, img_name in enumerate(sample_images):
    img_path = os.path.join(train_images_path, img_name)
    label_path = os.path.join(train_labels_path, img_name.replace('.jpg', '.txt').replace('.png', '.txt'))
    
    # Ground Truth
    image = cv2.imread(img_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    if os.path.exists(label_path):
        img_height, img_width = image.shape[:2]
        with open(label_path, 'r') as f:
            for line in f.readlines():
                parts = line.strip().split()
                class_id = int(parts[0])
                x_center, y_center, width, height = map(float, parts[1:5])
                
                x1 = int((x_center - width/2) * img_width)
                y1 = int((y_center - height/2) * img_height)
                x2 = int((x_center + width/2) * img_width)
                y2 = int((y_center + height/2) * img_height)
                
                cv2.rectangle(image_rgb, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(image_rgb, class_names[class_id], (x1, y1-10), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    axes[0, idx].imshow(image_rgb)
    axes[0, idx].set_title(f'GT: {img_name}', fontsize=10)
    axes[0, idx].axis('off')
    
    # Predictions
    results = trained_model(img_path, conf=0.25)
    image_pred_rgb = cv2.cvtColor(results[0].plot(), cv2.COLOR_BGR2RGB)
    
    axes[1, idx].imshow(image_pred_rgb)
    axes[1, idx].set_title(f'Pred: {img_name}', fontsize=10)
    axes[1, idx].axis('off')
    
    # Print detection details
    print(f"\n--- {img_name} ---")
    print(f"Ground Truth Objects: {len(open(label_path).readlines()) if os.path.exists(label_path) else 0}")
    print(f"Detected Objects: {len(results[0].boxes)}")
    for box in results[0].boxes:
        cls_id = int(box.cls[0])
        conf = float(box.conf[0])
        print(f"  - {class_names[cls_id]}: {conf:.2f}")

plt.tight_layout()
plt.show()