In [1]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
from torchvision.io.video import read_video
from torchvision.models.video import r3d_18, R3D_18_Weights
from torchinfo import summary
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchmetrics
from torchmetrics.classification import BinaryPrecision, BinaryRecall
import os
import PIL
from tqdm.notebook import tqdm
import lightning.pytorch as L
from sklearn.metrics import classification_report

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Import from PyTorchVideo

In [7]:
import pytorchvideo
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RemoveKey,
)
from torchvision.transforms import (
    Compose,
    Lambda,
    Resize,
)

# Import WandB

In [8]:
import wandb

wandb_logger = L.loggers.WandbLogger(
    project="ResNet_attention_frame_error",
    tags=['frame error','capture/emulator'],
    config={
        "learning_rate": 0.001,
        "architecture": "ResNet18 backbone",
        "epochs": 100,
        "batch_size": 64,
    },
    name="test_ResNet18_pretrained"
)

[34m[1mwandb[0m: Currently logged in as: [33mngkhaiphu[0m. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011115745555434842, max=1.0…

In [9]:
def get_data(path):
    video_list = []
    
    for root, dirs, files in os.walk(path):
        for name in files:
            video_list.append(os.path.join(root, name))

    return video_list

In [10]:
root = './data/'
lst = get_data(root)
lst

['./data/black screen when trigger Siri/black screen when trigger Siri_585_600_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_180_195_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_640_655_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1245_1260_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_215_230_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_410_425_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_320_335_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1085_1100_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_775_790_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_860_875_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_75_90_0.mp4',
 './data/black screen when trigger Siri/b

In [11]:
df = pd.DataFrame(lst,columns=["path"])

In [12]:
df

Unnamed: 0,path
0,./data/black screen when trigger Siri/black sc...
1,./data/black screen when trigger Siri/black sc...
2,./data/black screen when trigger Siri/black sc...
3,./data/black screen when trigger Siri/black sc...
4,./data/black screen when trigger Siri/black sc...
...,...
6313,./data/CP map CP/CP map CP_95_110_0.mp4
6314,./data/CP map CP/CP map CP_225_240_0.mp4
6315,./data/CP map CP/CP map CP_105_120_0.mp4
6316,./data/CP map CP/CP map CP_310_325_0.mp4


In [13]:
index = np.random.choice(range(5717), 5717-603, replace=False)
index

array([5576, 4534,  336, ..., 1999, 5443,  143])

In [14]:
for i in range(len(df)):
    df.loc[i,"label"] = df.loc[i,"path"].split('.')[-2][-1]
df

Unnamed: 0,path,label
0,./data/black screen when trigger Siri/black sc...,0
1,./data/black screen when trigger Siri/black sc...,0
2,./data/black screen when trigger Siri/black sc...,0
3,./data/black screen when trigger Siri/black sc...,0
4,./data/black screen when trigger Siri/black sc...,0
...,...,...
6313,./data/CP map CP/CP map CP_95_110_0.mp4,0
6314,./data/CP map CP/CP map CP_225_240_0.mp4,0
6315,./data/CP map CP/CP map CP_105_120_0.mp4,0
6316,./data/CP map CP/CP map CP_310_325_0.mp4,0


In [15]:
df2 = df[df['label'] == '0'].reset_index(drop = True)
df2

Unnamed: 0,path,label
0,./data/black screen when trigger Siri/black sc...,0
1,./data/black screen when trigger Siri/black sc...,0
2,./data/black screen when trigger Siri/black sc...,0
3,./data/black screen when trigger Siri/black sc...,0
4,./data/black screen when trigger Siri/black sc...,0
...,...,...
5712,./data/CP map CP/CP map CP_95_110_0.mp4,0
5713,./data/CP map CP/CP map CP_225_240_0.mp4,0
5714,./data/CP map CP/CP map CP_105_120_0.mp4,0
5715,./data/CP map CP/CP map CP_310_325_0.mp4,0


In [16]:
for i in index:
    df2.drop(index=i,axis=0,inplace=True)

In [17]:
df2 = df2.append(df[df['label'] == '1'])
df2


  df2 = df2.append(df[df['label'] == '1'])


Unnamed: 0,path,label
20,./data/black screen when trigger Siri/black sc...,0
26,./data/black screen when trigger Siri/black sc...,0
30,./data/black screen when trigger Siri/black sc...,0
34,./data/black screen when trigger Siri/black sc...,0
56,./data/black screen when trigger Siri/black sc...,0
...,...,...
6220,"./data/press phone, white screen/press phone, ...",1
6221,"./data/press phone, white screen/press phone, ...",1
6250,./data/CP map CP/CP map CP_210_225_1.mp4,1
6265,./data/CP map CP/CP map CP_200_215_1.mp4,1


In [18]:
df2.drop(columns='label',axis=1,inplace=True)

In [19]:
lst = list(df2['path'])

In [20]:
lst

['./data/black screen when trigger Siri/black screen when trigger Siri_815_830_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1285_1300_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1175_1190_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_110_125_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_505_520_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_55_70_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_800_815_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_985_1000_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1305_1320_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_450_465_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_365_380_0.mp4',
 './data/black screen when trigger Sir

In [21]:
class CustomDataset(Dataset):
    def __init__(self, dataset):
            super().__init__()
            self.dataset = dataset
            self.targets = torch.FloatTensor([int(data.split('.')[-2][-1]) for data in self.dataset])
        
            mean = [0.485, 0.456, 0.406]
            std = [0.229, 0.224, 0.225]
            resize_to = (64, 64)
            num_frames = 16
        
            self.transform =  ApplyTransformToKey(
                key="video",
                transform=Compose(
                    [
                        Lambda(lambda x: x/255.0),
                        Normalize(mean, std),
                        Resize(resize_to,antialias=True)
                    ]
                ),
            )
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        video_path = self.dataset[idx]
        label = self.targets[idx]

        # video_data, _, _ = read_video(video_path, output_format="TCHW")
        video = EncodedVideo.from_path(video_path, decode_audio=False)
        video_data = video.get_clip(start_sec=0,end_sec=0.99)
        video_data = self.transform(video_data)
        inputs = video_data["video"]
    
        return inputs, label

In [22]:
dataset = CustomDataset(dataset=lst)

In [23]:
train_size = int(0.8 * len(dataset))
val_size = int((len(dataset) - train_size) / 2)
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

In [24]:
batch_size = 64
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True,num_workers=12)
val_loader = DataLoader(dataset=val_dataset,batch_size=batch_size,shuffle=False,num_workers=12)
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False,num_workers=12)

In [9]:
model = torch.load('./r3d101_K_200ep.pth')
print(model['state_dict'].keys())

odict_keys(['conv1.weight', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'layer1.0.conv1.weight', 'layer1.0.bn1.weight', 'layer1.0.bn1.bias', 'layer1.0.bn1.running_mean', 'layer1.0.bn1.running_var', 'layer1.0.bn1.num_batches_tracked', 'layer1.0.conv2.weight', 'layer1.0.bn2.weight', 'layer1.0.bn2.bias', 'layer1.0.bn2.running_mean', 'layer1.0.bn2.running_var', 'layer1.0.bn2.num_batches_tracked', 'layer1.0.conv3.weight', 'layer1.0.bn3.weight', 'layer1.0.bn3.bias', 'layer1.0.bn3.running_mean', 'layer1.0.bn3.running_var', 'layer1.0.bn3.num_batches_tracked', 'layer1.0.downsample.0.weight', 'layer1.0.downsample.1.weight', 'layer1.0.downsample.1.bias', 'layer1.0.downsample.1.running_mean', 'layer1.0.downsample.1.running_var', 'layer1.0.downsample.1.num_batches_tracked', 'layer1.1.conv1.weight', 'layer1.1.bn1.weight', 'layer1.1.bn1.bias', 'layer1.1.bn1.running_mean', 'layer1.1.bn1.running_var', 'layer1.1.bn1.num_batches_tracked', 'layer1.1.conv2.we

In [5]:
print(summary(model, input_size=(64, 3, 16, 64, 64)))

AttributeError: 'dict' object has no attribute 'parameters'

In [39]:
class LitR3D(L.LightningModule):
    def __init__(self, pth_path):
        super().__init__()
        self.feature_extractor = torch.load(pth_path)
        layers = list(self.feature_extractor.children())[:-1]
        self.feature_extractor = nn.Sequential(*layers)

        self.classifier = nn.Sequential(
            nn.Linear(700, 256),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.Linear(256, 64),
            nn.LeakyReLU(inplace=True),
            nn.BatchNorm1d(64),
            nn.Dropout(0.3),
            nn.Linear(64,1)
        )
        
        self.loss = nn.BCELoss()
        self.accuracy = torchmetrics.Accuracy(task='binary')
        self.prec = BinaryPrecision()
        self.rec = BinaryRecall()
        self.save_hyperparameters()
        
    def forward(self,x):
        self.feature_extractor.eval()
        with torch.no_grad():
            x = self.feature_extractor(x).flatten(1)
        x = self.classifier(x)
        return F.sigmoid(x)

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.classifier.parameters(), lr=0.001)
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=50,verbose=True)
        return {"optimizer": optimizer, "lr_scheduler": lr_scheduler, "monitor": "val/loss"}

    def training_step(self, train_batch, batch_idx):
        X_train, y_train = train_batch
        y_train = y_train.unsqueeze(-1)
        pred = self(X_train)
        loss = self.loss(pred, y_train)
        
        self.log('train/loss', loss, on_epoch=True, on_step=False)
        self.log('train/acc', self.accuracy(pred,y_train), on_epoch=True, on_step=False)
        return loss

    def validation_step(self, val_batch, batch_idx):
        X_val, y_val = val_batch
        y_val = y_val.unsqueeze(-1)
        pred = self(X_val)
        # predicted = torch.round(pred)
        # correct += (predicted == y_test.unsqueeze(dim=-1)).sum()
        loss = self.loss(pred, y_val)
        # self.log('precision', self.prec(pred, y_val).item(), on_epoch=True, on_step=False)
        # self.log('recall', self.rec(pred, y_val).item(), on_epoch=True, on_step=False)
        self.log('val/loss', loss)
        self.log('val/acc', self.accuracy(pred, y_val))

    def test_step(self, test_batch, batch_idx):
        X_test, y_test = test_batch
        y_test = y_test.unsqueeze(-1)
        pred = self(X_test)
        # predicted = torch.round(pred)
        # correct += (predicted == y_test.unsqueeze(dim=-1)).sum()
        loss = self.loss(pred, y_test)
        # self.log('precision', self.prec(pred, y_test).item(), on_epoch=True, on_step=False)
        # self.log('recall', self.rec(pred, y_test).item(), on_epoch=True, on_step=False)
        self.log('test/loss', loss)
        self.log('test/acc', self.accuracy(pred, y_test))
        # Confusion matrices
        self.log({"conf_mat": wandb.plot.confusion_matrix(y_true=y_test, preds=pred)})
        # ROC Curves
        self.log({"roc": wandb.plot.roc_curve(ground_truth, predictions)})


In [35]:
model = LitR3D()
print(summary(model, input_size=(64, 3, 16, 64, 64)))

Layer (type:depth-idx)                        Output Shape              Param #
LitR3D                                        [64, 1]                   --
├─Sequential: 1-1                             [64, 512, 1, 1, 1]        --
│    └─BasicStem: 2-1                         [64, 64, 16, 32, 32]      --
│    │    └─Conv3d: 3-1                       [64, 64, 16, 32, 32]      28,224
│    │    └─BatchNorm3d: 3-2                  [64, 64, 16, 32, 32]      128
│    │    └─ReLU: 3-3                         [64, 64, 16, 32, 32]      --
│    └─Sequential: 2-2                        [64, 64, 16, 32, 32]      --
│    │    └─BasicBlock: 3-4                   [64, 64, 16, 32, 32]      221,440
│    │    └─BasicBlock: 3-5                   [64, 64, 16, 32, 32]      221,440
│    └─Sequential: 2-3                        [64, 128, 8, 16, 16]      --
│    │    └─BasicBlock: 3-6                   [64, 128, 8, 16, 16]      672,512
│    │    └─BasicBlock: 3-7                   [64, 128, 8, 16, 16]      885

In [36]:
early_stopper = L.callbacks.EarlyStopping(monitor='val/loss', mode='min', patience=15, min_delta=0.01, verbose=True)
wandb_logger.watch(model, log="all", log_freq=30)
checkpoint_callback = L.callbacks.ModelCheckpoint(
    dirpath="./resnet_27-10/",
    filename="{epoch:02d}-{val_loss:.2f}",
)
trainer = L.Trainer(
    logger=wandb_logger,
    callbacks=([early_stopper,checkpoint_callback]),
    max_epochs=100,
    default_root_dir='./resnet_27-10/'
)

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [37]:
trainer.fit(model=model,train_dataloaders=train_loader,val_dataloaders=val_loader)
trainer.save_checkpoint("./resnet_27-10/final.ckpt")

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type            | Params
------------------------------------------------------
0 | feature_extractor | Sequential      | 33.2 M
1 | classifier        | Sequential      | 148 K 
2 | loss              | BCELoss         | 0     
3 | accuracy          | BinaryAccuracy  | 0     
4 | prec              | BinaryPrecision | 0     
5 | rec               | BinaryRecall    | 0     
------------------------------------------------------
33.3 M    Trainable params
0         Non-trainable params
33.3 M    Total params
133.259   Total estimated model params size (MB)


Adjusting learning rate of group 0 to 1.0000e-03.


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 9.9901e-04.


Validation: 0it [00:00, ?it/s]

Metric val/loss improved. New best score: 0.569


Adjusting learning rate of group 0 to 9.9606e-04.


Validation: 0it [00:00, ?it/s]

Metric val/loss improved by 0.127 >= min_delta = 0.01. New best score: 0.442


Adjusting learning rate of group 0 to 9.9114e-04.


Validation: 0it [00:00, ?it/s]

Metric val/loss improved by 0.031 >= min_delta = 0.01. New best score: 0.411


Adjusting learning rate of group 0 to 9.8429e-04.


Validation: 0it [00:00, ?it/s]

Metric val/loss improved by 0.020 >= min_delta = 0.01. New best score: 0.391


Adjusting learning rate of group 0 to 9.7553e-04.


Validation: 0it [00:00, ?it/s]

Metric val/loss improved by 0.025 >= min_delta = 0.01. New best score: 0.366


Adjusting learning rate of group 0 to 9.6489e-04.


Validation: 0it [00:00, ?it/s]

Metric val/loss improved by 0.041 >= min_delta = 0.01. New best score: 0.326


Adjusting learning rate of group 0 to 9.5241e-04.


Validation: 0it [00:00, ?it/s]

Metric val/loss improved by 0.018 >= min_delta = 0.01. New best score: 0.308


Adjusting learning rate of group 0 to 9.3815e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 9.2216e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 9.0451e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 8.8526e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 8.6448e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 8.4227e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 8.1871e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 7.9389e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 7.6791e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 7.4088e-04.


Validation: 0it [00:00, ?it/s]

Metric val/loss improved by 0.025 >= min_delta = 0.01. New best score: 0.282


Adjusting learning rate of group 0 to 7.1289e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 6.8406e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 6.5451e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 6.2434e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 5.9369e-04.


Validation: 0it [00:00, ?it/s]

Metric val/loss improved by 0.011 >= min_delta = 0.01. New best score: 0.272


Adjusting learning rate of group 0 to 5.6267e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 5.3140e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 5.0000e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 4.6860e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 4.3733e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 4.0631e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 3.7566e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 3.4549e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 3.1594e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 2.8711e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 2.5912e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 2.3209e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 2.0611e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 1.8129e-04.


Validation: 0it [00:00, ?it/s]

Adjusting learning rate of group 0 to 1.5773e-04.


Validation: 0it [00:00, ?it/s]

Monitored metric val/loss did not improve in the last 15 records. Best score: 0.272. Signaling Trainer to stop.


In [60]:
model = LitR3D.load_from_checkpoint("./resnet_27-10/final.ckpt")
print(model)

LitR3D(
  (feature_extractor): Sequential(
    (0): BasicStem(
      (0): Conv3d(3, 64, kernel_size=(3, 7, 7), stride=(1, 2, 2), padding=(1, 3, 3), bias=False)
      (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (1): Sequential(
      (0): BasicBlock(
        (conv1): Sequential(
          (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
          (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (conv2): Sequential(
          (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1), bias=False)
          (1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (relu): ReLU(inplace=True)
      )
      (1): BasicBlock(
        (conv1): Sequential(
          (0): Conv3DSimple(64, 64, kernel_size=(3, 3, 3

In [71]:
model.eval()
model.to(device='cuda')
pred = []
gt = []
with torch.no_grad():
    correct = 0
    for X_test, y_test in test_loader:
        X_test = X_test.to(device='cuda')
        y_test = y_test.to(device='cuda')
        y_val = model(X_test)
        predicted = torch.round(y_val)
        correct += (predicted == y_test.unsqueeze(dim=-1)).sum()
        pred = np.concatenate((pred, predicted.to('cpu').numpy()),axis=None)
        gt = np.concatenate((gt, y_test.to('cpu').numpy()),axis=None)
print(f'Test accuracy: {correct.item()}/{len(test_dataset)} = {correct.item()*100/len(test_dataset):7.3f}%')
print(classification_report(pred, gt))

Test accuracy: 101/121 =  83.471%
              precision    recall  f1-score   support

         0.0       0.82      0.87      0.84        61
         1.0       0.86      0.80      0.83        60

    accuracy                           0.83       121
   macro avg       0.84      0.83      0.83       121
weighted avg       0.84      0.83      0.83       121



In [None]:
wandb.finish()

wandb: Network error (TransientError), entering retry loop.
