In [1]:
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os

In [None]:
import torch
import torch.nn as nn

class FireDetectorConv2D1D(nn.Module):
    def __init__(self, in_channels=3, num_classes=1):
        super().__init__()

        # Shared Conv2D block applied to each frame
        self.spatial_extractor = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),  # (H, W) becomes (H/2, W/2)

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1))  # output shape: (B, 64, 1, 1)
        )

        # After spatial features are extracted from each frame,
        # we treat the sequence as a time-series of 64D vectors
        self.temporal_extractor = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)  # temporal pooling over time
        )

        self.classifier = nn.Linear(128, num_classes)

    def forward(self, x):
        # x shape: (B, C, T, H, W)
        B, C, T, H, W = x.size()

        # Merge batch and time: apply Conv2D per frame
        x = x.permute(0, 2, 1, 3, 4)         # (B, T, C, H, W)
        x = x.reshape(B * T, C, H, W)        # (B*T, C, H, W)
        x = self.spatial_extractor(x)       # (B*T, 64, 1, 1)
        x = x.view(B, T, 64)                # (B, T, 64)

        # Transpose for Conv1D: (B, features, T)
        x = x.permute(0, 2, 1)              # (B, 64, T)
        x = self.temporal_extractor(x)     # (B, 128, 1)
        x = x.squeeze(-1)                   # (B, 128)

        return self.classifier(x)


_IncompleteInputError: incomplete input (371091138.py, line 15)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class Conv2Plus1D(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, padding):
        super().__init__()
        # kernel_size is a tuple like (T, H, W)
        T, H, W = kernel_size
        
        self.spatial_conv = nn.Conv3d(
            in_channels, out_channels, kernel_size=(1, H, W), padding=(0, padding, padding)
        )
        self.temporal_conv = nn.Conv3d(
            out_channels, out_channels, kernel_size=(T, 1, 1), padding=(padding, 0, 0)
        )
        
    def forward(self, x):
        x = self.spatial_conv(x)
        x = F.relu(x)
        x = self.temporal_conv(x)
        return x


class ResidualMain(nn.Module):

    def __init__(self, channels, kernel_size, padding=1):
        super().__init__()
        self.conv1 = Conv2Plus1D(channels, channels, kernel_size, padding)
        self.norm1 = nn.LayerNorm([channels, 1, 1, 1])  # LayerNorm over C, D, H, W
        self.conv2 = Conv2Plus1D(channels, channels, kernel_size, padding)
        self.norm2 = nn.LayerNorm([channels, 1, 1, 1])
        self.relu = nn.ReLU()
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        # LayerNorm expects (N, C, D, H, W), but works on last dims. Need permute:
        # We'll permute to (N, D, H, W, C), apply LayerNorm over last dim, then permute back
        out = out.permute(0, 2, 3, 4, 1)  # N, D, H, W, C
        out = self.norm1(out)
        out = out.permute(0, 4, 1, 2, 3)  # N, C, D, H, W
        
        out = self.relu(out)
        out = self.conv2(out)
        
        out = out.permute(0, 2, 3, 4, 1)
        out = self.norm2(out)
        out = out.permute(0, 4, 1, 2, 3)
        
        out += residual
        out = self.relu(out)
        return out


# Example usage similar to your FireDetector conv_block

class FireDetectorWithResidual(nn.Module):
    def __init__(self, in_channels=3, num_classes=1):
        super().__init__()
        self.initial_conv = Conv2Plus1D(in_channels, 16, kernel_size=(3,7,7), padding=1)
        self.bn = nn.BatchNorm3d(16)
        self.relu = nn.ReLU()
        self.pool1 = nn.MaxPool3d((1, 2, 2))
        
        self.res_block1 = ResidualMain(16, kernel_size=(3,3,3))
        self.pool2 = nn.MaxPool3d((2, 2, 2))
        
        self.res_block2 = ResidualMain(16, kernel_size=(3,3,3))
        self.adaptive_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
        
        self.classifier = nn.Linear(16, num_classes)
        
    def forward(self, x):
        x = self.initial_conv(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.pool1(x)
        
        x = self.res_block1(x)
        x = self.pool2(x)
        
        x = self.res_block2(x)
        x = self.adaptive_pool(x)
        
        x = x.flatten(1)  # flatten all except batch dim
        x = self.classifier(x)
        return x


In [None]:
import numpy as np
import torch 
import cv2 as cv
import os 
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader , random_split
from PIL import Image
from torchvision import transforms 

fire 1
no_fire 0
