In [1]:
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
from torchvision.io.video import read_video
from torchinfo import summary
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchinfo
import os
import PIL
from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Import from PyTorchVideo

In [3]:
import pytorchvideo
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    Normalize,
    RemoveKey,
)
from torchvision.transforms import (
    Compose,
    Lambda,
    Resize,
)



# Import WandB

In [4]:
import wandb

wandb.init(
    project="RFSBD_frame_error",
    tags=['frame error','capture/emulator'],
    config={
        "learning_rate": 0.001,
        "architecture": "Modified RFSBD",
        "epochs": 20,
        "batch_size": 16,
    }
)

[34m[1mwandb[0m: Currently logged in as: [33mngkhaiphu[0m. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114880000241101, max=1.0…

In [5]:
def get_data(path):
    video_list = []
    
    for root, dirs, files in os.walk(path):
        for name in files:
            video_list.append(os.path.join(root, name))

    return video_list

In [6]:
root = './data/'
lst = get_data(root)
lst

['./data/black screen when trigger Siri/black screen when trigger Siri_585_600_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_180_195_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_640_655_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1245_1260_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_215_230_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_410_425_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_320_335_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1085_1100_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_775_790_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_860_875_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_75_90_0.mp4',
 './data/black screen when trigger Siri/b

In [7]:
df = pd.DataFrame(lst,columns=["path"])

In [8]:
df

Unnamed: 0,path
0,./data/black screen when trigger Siri/black sc...
1,./data/black screen when trigger Siri/black sc...
2,./data/black screen when trigger Siri/black sc...
3,./data/black screen when trigger Siri/black sc...
4,./data/black screen when trigger Siri/black sc...
...,...
6313,./data/CP map CP/CP map CP_95_110_0.mp4
6314,./data/CP map CP/CP map CP_225_240_0.mp4
6315,./data/CP map CP/CP map CP_105_120_0.mp4
6316,./data/CP map CP/CP map CP_310_325_0.mp4


In [9]:
index = np.random.choice(range(5717), 5717-603, replace=False)
index

array([ 343, 4031, 2857, ..., 4236,  800, 1791])

In [10]:
for i in range(len(df)):
    df.loc[i,"label"] = df.loc[i,"path"].split('.')[-2][-1]
df

Unnamed: 0,path,label
0,./data/black screen when trigger Siri/black sc...,0
1,./data/black screen when trigger Siri/black sc...,0
2,./data/black screen when trigger Siri/black sc...,0
3,./data/black screen when trigger Siri/black sc...,0
4,./data/black screen when trigger Siri/black sc...,0
...,...,...
6313,./data/CP map CP/CP map CP_95_110_0.mp4,0
6314,./data/CP map CP/CP map CP_225_240_0.mp4,0
6315,./data/CP map CP/CP map CP_105_120_0.mp4,0
6316,./data/CP map CP/CP map CP_310_325_0.mp4,0


In [11]:
df2 = df[df['label'] == '0'].reset_index(drop = True)
df2

Unnamed: 0,path,label
0,./data/black screen when trigger Siri/black sc...,0
1,./data/black screen when trigger Siri/black sc...,0
2,./data/black screen when trigger Siri/black sc...,0
3,./data/black screen when trigger Siri/black sc...,0
4,./data/black screen when trigger Siri/black sc...,0
...,...,...
5712,./data/CP map CP/CP map CP_95_110_0.mp4,0
5713,./data/CP map CP/CP map CP_225_240_0.mp4,0
5714,./data/CP map CP/CP map CP_105_120_0.mp4,0
5715,./data/CP map CP/CP map CP_310_325_0.mp4,0


In [12]:
for i in index:
    df2.drop(index=i,axis=0,inplace=True)

In [13]:
df2 = df2.append(df[df['label'] == '1'])
df2


  df2 = df2.append(df[df['label'] == '1'])


Unnamed: 0,path,label
16,./data/black screen when trigger Siri/black sc...,0
34,./data/black screen when trigger Siri/black sc...,0
37,./data/black screen when trigger Siri/black sc...,0
39,./data/black screen when trigger Siri/black sc...,0
51,./data/black screen when trigger Siri/black sc...,0
...,...,...
6220,"./data/press phone, white screen/press phone, ...",1
6221,"./data/press phone, white screen/press phone, ...",1
6250,./data/CP map CP/CP map CP_210_225_1.mp4,1
6265,./data/CP map CP/CP map CP_200_215_1.mp4,1


In [14]:
df2.drop(columns='label',axis=1,inplace=True)

In [15]:
lst = list(df2['path'])

In [16]:
lst

['./data/black screen when trigger Siri/black screen when trigger Siri_1200_1215_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_110_125_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_60_75_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1170_1185_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_80_95_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_720_735_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1325_1340_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_170_185_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_985_1000_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_1135_1150_0.mp4',
 './data/black screen when trigger Siri/black screen when trigger Siri_975_990_0.mp4',
 './data/black screen when trigger Sir

In [17]:
class CustomDataset(Dataset):
    def __init__(self, dataset):
            super().__init__()
            self.dataset = dataset
            self.targets = torch.FloatTensor([int(data.split('.')[-2][-1]) for data in self.dataset])
        
            mean = [0.485, 0.456, 0.406]
            std = [0.229, 0.224, 0.225]
            resize_to = (64, 64)
            num_frames = 16
        
            self.transform =  ApplyTransformToKey(
                key="video",
                transform=Compose(
                    [
                        Lambda(lambda x: x/255.0),
                        Normalize(mean, std),
                        Resize(resize_to,antialias=True)
                    ]
                ),
            )
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        video_path = self.dataset[idx]
        label = self.targets[idx]

        # video_data, _, _ = read_video(video_path, output_format="TCHW")
        video = EncodedVideo.from_path(video_path, decode_audio=False)
        video_data = video.get_clip(start_sec=0,end_sec=0.99)
        video_data = self.transform(video_data)
        inputs = video_data["video"]
    
        return inputs, label

In [18]:
dataset = CustomDataset(dataset=lst)

In [19]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

In [20]:
batch_size=16
train_loader = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True,num_workers=12)
test_loader = DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True,num_workers=12)

In [21]:
# class RFSBD(nn.Module):
#     def __init__(self, in_channels=3):
#         super(RFSBD, self).__init__()
        # self.conv_layer1 = nn.Conv3d(in_channels, 16, kernel_size=(11,5,5), padding=(5,2,2))
        # self.conv_layer2 = nn.Conv3d(16, 32, kernel_size=(11,5,5), padding=(5,2,2))
        # self.conv_layer3 = nn.Conv3d(32, 64, kernel_size=(11,5,5), padding=(5,2,2))
        # self.conv_layer4 = nn.Conv3d(64, 32, kernel_size=(11,5,5), padding=(5,2,2))
        # self.conv_layer5 = nn.Conv3d(32, 16, kernel_size=(11,5,5), padding=(5,2,2))
        # self.conv_layer6 = nn.Conv3d(16, 2, kernel_size=(11,5,5), padding=(5,2,2))
        # self.max_pool = nn.MaxPool3d(kernel_size=(1,2,2))
        # self.avg_pool = nn.AvgPool3d(kernel_size=(1,2,2))
        # self.fc = nn.Linear(32,1)

#     def forward(self, x):
#         x = F.relu(self.conv_layer1(x))
#         x = self.max_pool(x)
#         x = F.relu(self.conv_layer2(x))
#         x = self.max_pool(x)
#         x = F.relu(self.conv_layer3(x))
#         x = self.avg_pool(x)
#         x = F.relu(self.conv_layer4(x))
#         x = self.max_pool(x)
#         x = F.relu(self.conv_layer5(x))
#         x = self.max_pool(x)
#         x = F.relu(self.conv_layer6(x))
#         x = self.max_pool(x)
        
#         x = x.view(-1,32)
#         x = self.fc(x)
#         return F.sigmoid(x)

In [22]:
class RFSBD(nn.Module):
    def __init__(self):
        super(RFSBD, self).__init__()
        self.conv1=nn.Conv3d(3, 96, kernel_size=3, stride=(1, 2, 2),
                               padding=(0,0,0), bias=True)
        self.pool1=nn.MaxPool3d(kernel_size=(3, 3, 3), stride=(1,2,2), padding=0)
        self.conv2=nn.Conv3d(96, 256, kernel_size=3, stride=(1, 2, 2),
                               padding=(0,0,0), bias=True)
        self.pool2=nn.MaxPool3d(kernel_size=(3, 3, 3), stride=(1,2,2), padding=0)
        self.conv3=nn.Conv3d(256, 384, kernel_size=3, stride=1,
                               padding=1, bias=True)
        self.conv4=nn.Conv3d(384, 384, kernel_size=3, stride=1,
                               padding=1, bias=True)
        self.conv5=nn.Conv3d(384, 256, kernel_size=3, stride=1,
                               padding=1, bias=True)
        self.pool1=nn.MaxPool3d(kernel_size=(3, 3, 3), stride=(1,2,2), padding=0)
        self.fc6=nn.Linear(18432, 2048)
        self.fc7=nn.Linear(2048, 2048)
        self.fc8=nn.Linear(2048, 1)
    
    def forward(self,x):
        x=F.relu(self.conv1(x))
        x=self.pool1(x)
        x=F.relu(self.conv2(x))
        x=self.pool2(x)
        x=F.relu(self.conv3(x))
        x=F.relu(self.conv4(x))
        x=F.relu(self.conv5(x))
        x=x.view(x.size(0),-1)
        x=F.relu(self.fc6(x))
        x=F.relu(self.fc7(x))
        x=self.fc8(x)
        return F.sigmoid(x)

In [23]:
model = RFSBD()
model.to(device)
print(summary(model, input_size=(16, 3, 16, 64, 64)))

wandb: Network error (TransientError), entering retry loop.


Layer (type:depth-idx)                   Output Shape              Param #
RFSBD                                    [16, 1]                   --
├─Conv3d: 1-1                            [16, 96, 14, 31, 31]      7,872
├─MaxPool3d: 1-2                         [16, 96, 12, 15, 15]      --
├─Conv3d: 1-3                            [16, 256, 10, 7, 7]       663,808
├─MaxPool3d: 1-4                         [16, 256, 8, 3, 3]        --
├─Conv3d: 1-5                            [16, 384, 8, 3, 3]        2,654,592
├─Conv3d: 1-6                            [16, 384, 8, 3, 3]        3,981,696
├─Conv3d: 1-7                            [16, 256, 8, 3, 3]        2,654,464
├─Linear: 1-8                            [16, 2048]                37,750,784
├─Linear: 1-9                            [16, 2048]                4,196,352
├─Linear: 1-10                           [16, 1]                   2,049
Total params: 51,911,617
Trainable params: 51,911,617
Non-trainable params: 0
Total mult-adds (G): 18.27
Inp

In [24]:
class EarlyStopper:
    def __init__(self, patience=2, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss and abs(validation_loss - self.min_validation_loss) > self.min_delta:
            self.counter = 0
            self.min_validation_loss = validation_loss
        else:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

In [25]:
epochs = 20
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1,verbose=True)
early_stopper = EarlyStopper(patience=5, min_delta=0.1)

Adjusting learning rate of group 0 to 1.0000e-03.


In [27]:
for i in range(epochs):
    model.train()
    training_loss = 0
    for b, (X_train, y_train) in tqdm(enumerate(train_loader)):
        b+=1
        X_train = X_train.to(device='cuda')
        y_train = y_train.to(device='cuda')
        pred = model(X_train)
        loss = criterion(pred, y_train.unsqueeze(-1))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        training_loss += loss.item()
        torch.cuda.empty_cache()
 
    torch.save({
            'epoch': i,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            }, f"./RFSBD_23-10/model_epoch_{i}.pth")

    training_loss /= len(train_loader)
    wandb.log({"training_loss": training_loss})
    wandb.log({"lr": optimizer.param_groups[-1]['lr']})
              
    running_loss = 0
    correct = 0
    model.eval()
    pred = []
    gt = []
    with torch.no_grad():
        for vb, (X_test, y_test) in tqdm(enumerate(test_loader)):
            X_test = X_test.to(device='cuda')
            y_test = y_test.to(device='cuda')
            val = model(X_test)
            predicted = torch.round(val)
            correct += (predicted == y_test.unsqueeze(dim=-1)).sum()
            loss = criterion(val, y_test.unsqueeze(dim=-1))
            running_loss += loss

            # pred = np.concatenate((pred, predicted.to('cpu').numpy()),axis=None)
            # gt = np.concatenate((gt, y_test.to('cpu').numpy()),axis=None)

    avg_loss = running_loss / (vb+1)
    acc = correct.item()*100 / len(test_dataset)
    print(f'epoch: {i:2}  finished,  validation loss: {avg_loss.item()} / {batch_size}')
    print(f'Test accuracy: {correct.item()}/{len(test_dataset)} = {acc:7.3f}%')
    wandb.log({"val_loss": avg_loss})
    wandb.log({"accuracy (%)": acc})

    if early_stopper.early_stop(avg_loss):             
        break
    scheduler.step()

print("Finished Training!")

0it [00:00, ?it/s]

0it [00:00, ?it/s]

epoch:  0  finished,  validation loss: 0.6930215954780579 / 16
Test accuracy: 122/241 =  50.622%
Adjusting learning rate of group 0 to 1.0000e-03.


0it [00:00, ?it/s]

0it [00:00, ?it/s]

epoch:  1  finished,  validation loss: 0.6931215524673462 / 16
Test accuracy: 122/241 =  50.622%
Adjusting learning rate of group 0 to 1.0000e-03.


0it [00:00, ?it/s]

KeyboardInterrupt: 

In [None]:
wandb.finish()