In [18]:
pip install ultralytics 

Note: you may need to restart the kernel to use updated packages.


In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import DINOv3ConvNextModel, DINOv3ConvNextConfig
from huggingface_hub import login
import numpy as np

In [20]:
from ultralytics import YOLO
from ultralytics.nn.modules import Detect
from ultralytics.nn.tasks import DetectionModel

In [21]:

login(new_session=False)

In [22]:
# ...existing code...
class ConvNextBackboneAdapter(nn.Module):
    """
    Adapter to make ConvNext compatible with YOLO architecture
    Converts ConvNext features to YOLO expected format
    """
    def __init__(self, model_name="facebook/dinov3-convnext-base-pretrain-lvd1689m"):
        super().__init__()
        
        # Load the pre-trained ConvNext model
        config = DINOv3ConvNextConfig.from_pretrained(model_name)
        self.convnext = DINOv3ConvNextModel.from_pretrained(model_name, config=config)
        
        # Fix: We need to dynamically determine the actual channel sizes
        # Test forward pass to get actual dimensions
        with torch.no_grad():
            test_input = torch.randn(1, 3, 224, 224)
            outputs = self.convnext(test_input, output_hidden_states=True)
            actual_channels = [feat.shape[1] for feat in outputs.hidden_states]
            print(f"Actual ConvNext channels: {actual_channels}")
        
        # Create adaptation layers based on actual channels
        self.adapt_layers = nn.ModuleList()
        target_channels = [256, 512, 1024]  # YOLO expected channels
        
        for i in range(min(3, len(actual_channels))):
            in_ch = actual_channels[i] if i < len(actual_channels) else actual_channels[-1]
            out_ch = target_channels[i]
            self.adapt_layers.append(nn.Conv2d(in_ch, out_ch, 1))
            
        for layer in self.adapt_layers:
            nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
            nn.init.constant_(layer.bias, 0)
    
    def forward(self, x):
        """
        Extract features compatible with YOLO format
        Returns [P3, P4, P5] features
        """
        outputs = self.convnext(x, output_hidden_states=True)
        hidden_states = outputs.hidden_states  # List of tensors

        print(f"ConvNext hidden_states length: {len(hidden_states)}")  # Debug

        # Fix: Handle the case where we only have 2 hidden states
        if len(hidden_states) >= 3:
            selected = [hidden_states[0], hidden_states[1], hidden_states[2]]
        elif len(hidden_states) == 2:
            # Duplicate the last feature map for P5
            selected = [hidden_states[0], hidden_states[1], hidden_states[1]]
        else:
            raise ValueError(f"ConvNext hidden_states has only {len(hidden_states)} stages, need at least 2 for YOLO neck.")

        features = []
        for i, feat in enumerate(selected):
            adapted_feature = self.adapt_layers[i](feat)
            features.append(adapted_feature)

        return features  # [P3, P4, P5]

class ConvNextYOLO(nn.Module):
    """
    YOLO model with ConvNext backbone
    Uses real YOLO head from ultralytics
    """
    def __init__(self, yolo_model_path="yolov8n.pt", num_classes=2):
        super().__init__()
        
        # Load pretrained YOLO model
        self.yolo_model = YOLO(yolo_model_path)
        self.yolo_pytorch_model = self.yolo_model.model
        
        # Replace the backbone with ConvNext
        self.convnext_backbone = ConvNextBackboneAdapter()
        
        # Fix: Better way to extract neck and head
        # Get the actual model layers
        model_layers = list(self.yolo_pytorch_model.model)
        
        # Find neck and head indices (usually last few layers)
        self.neck = None
        self.head = None
        
        # For YOLOv8, typically:
        # - Backbone: layers 0-9
        # - Neck: layers 10-18 (FPN/PAN)
        # - Head: layer 19+ (Detection head)
        
        # Extract neck (FPN/PAN layers)
        try:
            self.neck = nn.Sequential(*model_layers[10:19])  # Adjust indices as needed
            self.head = model_layers[-1]  # Detection head
        except IndexError:
            # Fallback: use simpler approach
            self.neck = self.yolo_pytorch_model.model[-2]
            self.head = self.yolo_pytorch_model.model[-1]
        
        # Update head for 2 classes
        self._update_head_for_classes(num_classes)
    
    def _update_head_for_classes(self, num_classes):
        """Update detection head for custom number of classes"""
        if hasattr(self.head, 'nc'):
            self.head.nc = num_classes
        
        # Update the classification layers
        if hasattr(self.head, 'cv3'):  # YOLOv8 style
            for i, cv3_layer in enumerate(self.head.cv3):
                if hasattr(cv3_layer, '__getitem__') and len(cv3_layer) > 0:
                    old_conv = cv3_layer[-1]
                    new_conv = nn.Conv2d(
                        old_conv.in_channels,
                        num_classes,  # Simplified: just num_classes
                        old_conv.kernel_size,
                        old_conv.stride,
                        old_conv.padding,
                        bias=old_conv.bias is not None
                    )
                    cv3_layer[-1] = new_conv
    
    def forward(self, x):
        try:
            # Extract features using ConvNext backbone
            backbone_features = self.convnext_backbone(x)
            
            # Ensure we have the right number of features
            if len(backbone_features) < 3:
                # Pad with the last feature
                while len(backbone_features) < 3:
                    backbone_features.append(backbone_features[-1])
            
            # Pass through YOLO neck (FPN)
            if self.neck is not None:
                neck_features = self.neck(backbone_features)
            else:
                neck_features = backbone_features
            
            # Pass through YOLO detection head
            detections = self.head(neck_features)
            
            return detections
            
        except Exception as e:
            print(f"Forward pass error: {e}")
            print(f"Backbone features shapes: {[f.shape for f in backbone_features]}")
            raise

In [23]:

# Main recommended approach using ultralytics YOLOv8
def create_convnext_yolo(yolo_version="yolov8n.pt", num_classes=2):
    """
    Create ConvNext-YOLO model using ultralytics
    """
    print(f"Creating ConvNext-YOLO with {yolo_version}")
    
    # Create the hybrid model
    model = ConvNextYOLO(yolo_version, num_classes)
    
    return model


# Training functions using ultralytics API
def train_convnext_yolo(model, data_yaml_path, epochs=100, img_size=640, batch_size=16):
    """
    Train the ConvNext-YOLO model
    
    Args:
        model: ConvNextYOLO model
        data_yaml_path: Path to YOLO format dataset YAML file
        epochs: Number of training epochs
        img_size: Input image size
        batch_size: Training batch size
    """
    
    # For training, we need to create a custom trainer or use ultralytics with modifications
    # This is a simplified approach - you might need to modify ultralytics source code
    
    # Create a temporary YOLO model for training setup
    temp_yolo = YOLO("yolov8n.pt")
    
    # Replace the model's backbone with our ConvNext version
    temp_yolo.model = model
    
    # Train using ultralytics API
    results = temp_yolo.train(
        data=data_yaml_path,
        epochs=epochs,
        imgsz=img_size,
        batch=batch_size,
        name='convnext_yolo_fire_detection',
        project='runs/detect',
        save_period=10,
        device='cuda' if torch.cuda.is_available() else 'cpu'
    )
    
    return results


In [24]:
# Create dataset YAML for fire/smoke detection
def create_fire_smoke_yaml(train_path, val_path, output_path="fire_smoke.yaml"):
    """
    Create YAML configuration file for fire/smoke dataset
    """
    yaml_content = f"""
# Fire and Smoke Detection Dataset Configuration

# Train and validation paths
train: {train_path}
val: {val_path}

# Number of classes
nc: 2

# Class names
names:
  0: fire
  1: smoke
"""
    
    with open(output_path, 'w') as f:
        f.write(yaml_content)
    
    print(f"Dataset YAML created at: {output_path}")
    return output_path

In [25]:
def test_convnext_yolo():
    """Test the ConvNext-YOLO model with better error handling"""
    print("Testing ConvNext-YOLO Model...")
    
    try:
        # Create model
        model = create_convnext_yolo("yolov8n.pt", num_classes=2)
        
        # Test input - start with smaller size
        test_input = torch.randn(1, 3, 224, 224)  # Start with 224x224
        
        # Forward pass
        model.eval()
        with torch.no_grad():
            print(f"Testing with input shape: {test_input.shape}")
            outputs = model(test_input)
        
        print(f"✅ Model test successful!")
        print(f"Input shape: {test_input.shape}")
        print(f"Output type: {type(outputs)}")
        
        if isinstance(outputs, (list, tuple)):
            for i, output in enumerate(outputs):
                print(f"Output {i} shape: {output.shape}")
        else:
            print(f"Output shape: {outputs.shape}")
        
        return model
        
    except Exception as e:
        print(f"❌ Model test failed: {str(e)}")
        import traceback
        traceback.print_exc()  # Print full stack trace
        return None

In [26]:
def predict_fire_smoke(model, image_path, confidence=0.25, save_results=True):
    """
    Predict fire and smoke in an image
    
    Args:
        model: Trained ConvNext-YOLO model
        image_path: Path to input image
        confidence: Confidence threshold
        save_results: Whether to save detection results
    """
    
    # For inference, we can use ultralytics API
    # Create YOLO object with our custom model
    yolo = YOLO("yolov8n.pt")  # Load base
    yolo.model = model  # Replace with our model
    
    # Run inference
    results = yolo(image_path, conf=confidence, save=save_results)
    
    # Process results
    for result in results:
        boxes = result.boxes
        if boxes is not None:
            print(f"Detected {len(boxes)} objects:")
            for box in boxes:
                cls = int(box.cls[0])
                conf = float(box.conf[0])
                class_name = "fire" if cls == 0 else "smoke"
                print(f"  {class_name}: {conf:.3f}")
    
    return results

In [27]:
if __name__ == "__main__":
    print("🔥 ConvNext-YOLO Fire Detection Setup")
    print("="*50)
    
    # Test model creation
    model = test_convnext_yolo()
    
    if model is not None:
        print(f"\n📊 Model Information:")
        total_params = sum(p.numel() for p in model.parameters())
        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f"Total parameters: {total_params:,}")
        print(f"Trainable parameters: {trainable_params:,}")
        
        print(f"\n🏗️ Architecture:")
        print(f"Backbone: ConvNext (DINOv3) - Pre-trained")
        print(f"Neck: YOLO FPN - From ultralytics") 
        print(f"Head: YOLO Detection Head - From ultralytics")
        print(f"Classes: Fire (0), Smoke (1)")
        
        print(f"\n📋 Next Steps:")
        print(f"1. Prepare your dataset in YOLO format")
        print(f"2. Create dataset YAML using create_fire_smoke_yaml()")
        print(f"3. Train using train_convnext_yolo()")
        print(f"4. Use predict_fire_smoke() for inference")
        
        # Example dataset setup
        print(f"\n💡 Example dataset setup:")
        print(f"# Create YAML config")
        print(f"yaml_path = create_fire_smoke_yaml('path/to/train', 'path/to/val')")
        print(f"")
        print(f"# Train model") 
        print(f"results = train_convnext_yolo(model, yaml_path, epochs=100)")
        print(f"")
        print(f"# Inference")
        print(f"predictions = predict_fire_smoke(model, 'test_image.jpg')")
        
    else:
        print("\n❌ Please install required dependencies:")
        print("pip install ultralytics")
        print("# or for YOLOv5:")
        print("pip install yolov5")

🔥 ConvNext-YOLO Fire Detection Setup
Testing ConvNext-YOLO Model...
Creating ConvNext-YOLO with yolov8n.pt
Actual ConvNext channels: [3, 128, 256, 512, 1024]
Ultralytics 8.3.191 🚀 Python-3.12.7 torch-2.8.0+cu128 CPU (13th Gen Intel Core(TM) i7-13700H)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=coco.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=Fa

KeyboardInterrupt: 