In [1]:
import numpy as np
import torch 
import cv2 as cv
import os 
import matplotlib.pyplot as plt 
from torchinfo import summary 
import torch.nn as nn
from typing import Tuple , Callable , Optional
from tqdm import tqdm 
from torch.utils.data import Dataset, DataLoader , random_split
from PIL import Image
from torchvision import transforms 

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
root_dir = '/media/ai/External/datasets/firesmoke_dataset'
fire_vidoes_list  = os.listdir(os.path.join(root_dir , 'fire' ))
no_fire_video_list  = os.listdir(os.path.join(root_dir , 'no_fire'))

len(fire_vidoes_list) , len(os.listdir(os.path.join(root_dir,  'fire', 'firesmoke_2')))


(3179, 30)

In [4]:
class FireVideoDataset(Dataset):
    def __init__(self, root_dir, chunk_size=30):
        self.chunk_size = chunk_size
        self.samples = []
        self.transform = transforms.Compose([
            transforms.ToTensor(),  # Convert PIL image to min max scale (0-1)
            transforms.Normalize(
                    [0.485, 0.456, 0.406],
                    [0.229, 0.224, 0.225]
                    ), 
            transforms.Resize((224, 224))
            # transforms.RandomHorizontalFlip(0.5), 
            # transforms.ColorJitter(0.2)
        ])
        for class_name, label in [('fire', 1), ('no_fire', 0)]:
            class_dir = os.path.join(root_dir, class_name)
            for chunk_folder in os.listdir(class_dir):
                chunk_path = os.path.join(class_dir, chunk_folder)
                
                if os.path.isdir(chunk_path) and len(os.listdir(chunk_path)) >= chunk_size:
                    self.samples.append((chunk_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        chunk_path, label = self.samples[idx]
        
        image_files = os.listdir(chunk_path)[:self.chunk_size] 
        frames = []

        for fname in image_files:
            img_path = os.path.join(chunk_path, fname)
            img = Image.open(img_path)

            if img is None:
                continue  
            frames.append(self.transform(img)) 
        np_frame = np.array(frames)  # its [B , C , H , W]
        np_frame = np.transpose(np_frame, (1 , 0, 2, 3))  # becomes [C , B , H , W]

        return torch.tensor(np_frame, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)


In [5]:
dataset = FireVideoDataset(root_dir=root_dir)

train_size = int(0.7 *len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_data_loader = DataLoader(train_dataset, batch_size=2,shuffle=True , num_workers=8)
test_data_loader = DataLoader(test_dataset, batch_size=2, shuffle=True , num_workers=8)

# for batch_idx, (data, label) in enumerate(train_data_loader):
#     if batch_idx == 2:
#         break
#     print(data.shape)

In [6]:
from torch.utils.data import Subset, random_split, DataLoader
import random

subset_size = int(len(dataset) * 0.1)

subset_data, _ = random_split(dataset, [subset_size, len(dataset) - subset_size])

train_subset_size = int(subset_size * 0.8)
test_subset_size = subset_size - train_subset_size

subset_train, subset_test = random_split(subset_data, [train_subset_size, test_subset_size])

check_data_loader = DataLoader(subset_train, batch_size=2, shuffle=True, num_workers=8, drop_last=True)
test_check_data_loader = DataLoader(subset_test, batch_size=2, shuffle=True, num_workers=8, drop_last=True)

print(len(check_data_loader), len(test_check_data_loader))


210 53


In [7]:
import torch
# Choose the `slowfast_r50` model 
slow_fast_model = torch.hub.load('facebookresearch/pytorchvideo', 'slowfast_r50' , pretrained=True)

Using cache found in /home/ai/.cache/torch/hub/facebookresearch_pytorchvideo_main


In [8]:
summary(slow_fast_model)

Layer (type:depth-idx)                                       Param #
Net                                                          --
├─ModuleList: 1-1                                            --
│    └─MultiPathWayWithFuse: 2-1                             --
│    │    └─ModuleList: 3-1                                  15,432
│    │    └─FuseFastToSlow: 3-2                              928
│    └─MultiPathWayWithFuse: 2-2                             --
│    │    └─ModuleList: 3-3                                  225,760
│    │    └─FuseFastToSlow: 3-4                              14,464
│    └─MultiPathWayWithFuse: 2-3                             --
│    │    └─ModuleList: 3-5                                  1,287,552
│    │    └─FuseFastToSlow: 3-6                              57,600
│    └─MultiPathWayWithFuse: 2-4                             --
│    │    └─ModuleList: 3-7                                  10,369,536
│    │    └─FuseFastToSlow: 3-8                              229,8

In [9]:
slow_fast_model = slow_fast_model.eval()
slow_fast_model = slow_fast_model.to(device)

In [11]:
print(slow_fast_model.blocks[-1].proj)

Linear(in_features=2304, out_features=400, bias=True)


In [12]:
slow_fast_model.blocks[-1].proj = torch.nn.Linear(2304 , 1)

In [13]:
slow_fast_model.blocks[-1]

ResNetBasicHead(
  (dropout): Dropout(p=0.5, inplace=False)
  (proj): Linear(in_features=2304, out_features=1, bias=True)
  (output_pool): AdaptiveAvgPool3d(output_size=1)
)

In [14]:
loss_fn = nn.BCEWithLogitsLoss()
def measure_acc(ypred , ytrue): 
    from torchmetrics import Accuracy , Recall
    acc_fn = Accuracy(task='binary').to(device)
    acc = acc_fn(ypred , ytrue)

    sig_ypred = (torch.sigmoid(ypred) > 0.5).int()
    recallfn = Recall(task='binary').to(device)
    recall = recallfn(sig_ypred , ytrue)
    return acc , recall

In [None]:
def slow_fast_model_train(model, train_loader:DataLoader , val_loader:DataLoader, 
                          loss_fn,
                          number_of_epochs:int ,
                          pathway_alpha:int)-> Tuple[list , list , list , list]:

    def pack_pathway_output(frames, alpha):
        fast_pathway = frames
        slow_pathway = frames[:, :, ::alpha, :, :]  
        return [slow_pathway, fast_pathway]
    
    def padd_to_32(data:torch.tensor) : 
            T = data.shape[2]
            if T == 32 : 
                return data 
            elif T < 32: 
                padd = 32 - T 
                st1 =  data[:,:,-1:,:,:].repeat(1, 1, padd , 1, 1)
                stacked = torch.cat([st1, data] , dim=2)
                return stacked
            else: 
                return data[:, :, :32, : , :]
            

    def transform_data(data:torch.tensor): 

        # transform = ApplyTransformToKey(
        # key="video",
        # transform=Compose([
        #     # UniformTemporalSubsample(32), # its for videos , samples are ready
        #     Lambda(lambda x: x / 255.0),
        #     NormalizeVideo(mean=[0.45, 0.45, 0.45], std=[0.225, 0.225, 0.225]),
        #     ShortSideScale(256),
        #     CenterCropVideo(224),
        # ]))
        
        # input shape N, C , D , H , W 
        # p_data = data.permute(0, 2 , 1, 3 ,4)
        # N, D, C, H, W = p_data.shape
        # r_data = p_data.reshape(N * D, C , H, W)
        # r_data = {'video' : r_data}
        # t_data = transform(r_data)['video']
        # data = t_data.reshape(N, D , C , H , W)    
        # data = data.permute(0 , 2 , 1, 3, 4)  

        data = pack_pathway_output(data , pathway_alpha)   # this transforms frame to slow and fast double frames 
        
        return data
        

    optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3, weight_decay=1e-3)

            
    optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    train_recalls , val_recalls = [] , []

    for epoch in range(number_of_epochs):
        running_loss , running_acc , running_recall = 0 , 0 , 0 
        for x , y in tqdm(train_loader , desc=f'Epoch {epoch+1}/{number_of_epochs} - Training' ): 
            # ----- TRAINING ----- #
            x , y = x.to(device) , y.to(device)
            x = padd_to_32(x)
            x = transform_data(x)

            model.train() 
            ypred = model(x).squeeze()

            loss = loss_fn(ypred, y)
            optimizer.zero_grad() 
            loss.backward()
            optimizer.step()
            acc , recall = measure_acc(ypred, y)
            running_loss += loss.item() 
            running_acc  += acc 
            running_recall += recall
        epoch_train_recall = running_recall/len(train_loader)
        epoch_train_loss = running_loss / len(train_loader)
        epoch_train_acc = running_acc / len(train_loader)
        train_losses.append(epoch_train_loss)
        train_accuracies.append(epoch_train_acc)
        train_recalls.append(epoch_train_recall)        

    # ----- EVALUATION ---- # 

        model.eval()
        val_loss, val_acc , val_recall= 0.0, 0.0 , 0.0
        with torch.inference_mode():
            for x, y in tqdm(val_loader, desc=f"Epoch {epoch+1}/{number_of_epochs} - Validation"):
                x, y = x.to(device), y.to(device)
                x = padd_to_32(x)
                x = transform_data(x)
                y_pred = model(x)
                y_pred = y_pred.squeeze()
                loss = loss_fn(y_pred, y)

                val_loss += loss.item()
                acc, recall = measure_acc(y_pred, y)
                val_recall += recall.item()
                val_acc += acc.item()

        epoch_val_loss = val_loss / len(val_loader)
        epoch_val_acc = val_acc / len(val_loader)
        epoch_val_recall = val_recall/len(val_loader)
        val_losses.append(epoch_val_loss)
        val_accuracies.append(epoch_val_acc)
        val_recalls.append(epoch_val_recall)

        print(f"[Epoch {epoch+1}] Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_acc:.4f} | Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_acc:.4f}")

    return train_losses, train_accuracies , train_recalls, val_losses, val_accuracies , val_recall


In [18]:
train_losses, train_accuracies ,train_recalls, val_losses,val_accuracies , val_recall  = slow_fast_model_train(slow_fast_model , check_data_loader, test_check_data_loader , loss_fn, 10  , 4)

Epoch 1/10 - Training: 100%|██████████| 210/210 [04:11<00:00,  1.20s/it]
Epoch 1/10 - Validation: 100%|██████████| 53/53 [01:04<00:00,  1.22s/it]


[Epoch 1] Train Loss: 0.6220, Train Acc: 0.6667 | Val Loss: 1.0793, Val Acc: 0.6604


Epoch 2/10 - Training: 100%|██████████| 210/210 [04:01<00:00,  1.15s/it]
Epoch 2/10 - Validation: 100%|██████████| 53/53 [01:06<00:00,  1.25s/it]


[Epoch 2] Train Loss: 0.6118, Train Acc: 0.6524 | Val Loss: 1.1883, Val Acc: 0.6132


Epoch 3/10 - Training: 100%|██████████| 210/210 [04:06<00:00,  1.17s/it]
Epoch 3/10 - Validation: 100%|██████████| 53/53 [01:05<00:00,  1.24s/it]


[Epoch 3] Train Loss: 0.6037, Train Acc: 0.6786 | Val Loss: 1.8895, Val Acc: 0.6604


Epoch 4/10 - Training: 100%|██████████| 210/210 [04:10<00:00,  1.19s/it]
Epoch 4/10 - Validation: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


[Epoch 4] Train Loss: 0.6042, Train Acc: 0.6976 | Val Loss: 0.6112, Val Acc: 0.7358


Epoch 5/10 - Training: 100%|██████████| 210/210 [03:53<00:00,  1.11s/it]
Epoch 5/10 - Validation: 100%|██████████| 53/53 [01:04<00:00,  1.22s/it]


[Epoch 5] Train Loss: 0.5627, Train Acc: 0.7190 | Val Loss: 2.3786, Val Acc: 0.7453


Epoch 6/10 - Training: 100%|██████████| 210/210 [03:49<00:00,  1.09s/it]
Epoch 6/10 - Validation: 100%|██████████| 53/53 [01:04<00:00,  1.21s/it]


[Epoch 6] Train Loss: 0.5654, Train Acc: 0.7238 | Val Loss: 1.0564, Val Acc: 0.6604


Epoch 7/10 - Training: 100%|██████████| 210/210 [03:58<00:00,  1.14s/it]
Epoch 7/10 - Validation: 100%|██████████| 53/53 [01:02<00:00,  1.19s/it]


[Epoch 7] Train Loss: 0.5436, Train Acc: 0.7310 | Val Loss: 0.8835, Val Acc: 0.7358


Epoch 8/10 - Training: 100%|██████████| 210/210 [03:57<00:00,  1.13s/it]
Epoch 8/10 - Validation: 100%|██████████| 53/53 [01:04<00:00,  1.22s/it]


[Epoch 8] Train Loss: 0.4799, Train Acc: 0.7810 | Val Loss: 1.2761, Val Acc: 0.5660


Epoch 9/10 - Training: 100%|██████████| 210/210 [03:58<00:00,  1.14s/it]
Epoch 9/10 - Validation: 100%|██████████| 53/53 [01:04<00:00,  1.22s/it]


[Epoch 9] Train Loss: 0.4484, Train Acc: 0.7881 | Val Loss: 1.1631, Val Acc: 0.6604


Epoch 10/10 - Training: 100%|██████████| 210/210 [03:53<00:00,  1.11s/it]
Epoch 10/10 - Validation: 100%|██████████| 53/53 [01:00<00:00,  1.14s/it]

[Epoch 10] Train Loss: 0.4563, Train Acc: 0.7833 | Val Loss: 0.9451, Val Acc: 0.5943





In [19]:
print(
       f'train loss is {train_losses[-1]:2f}\n',
       f'train accuracy is {train_accuracies[-1]:.2f}\n', 
       f'recall is {train_recalls[-1]:.2f}\n'
       f'val loss is {val_losses[-1]:2f}\n',
       f'val accuracy is {val_accuracies[-1]:.2f}\n', 
       # f'val recall is {val_recalls[-1]:.2f}\n'
       )

train loss is 0.456286
 train accuracy is 0.78
 recall is 0.67
val loss is 0.945058
 val accuracy is 0.59

