In [1]:
import random
import pandas as pd
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A #이미지 증감.
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action="ignore")



In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [3]:
CFG = {
    "VIDEO_LENGTH" : 50,
    "IMG_SIZE" : 128,
    "EPOCHS" : 10,
    "LEARNING_RATE":3e-4,
    "BATCH_SIZE" : 4,
    "SEED" : 41
}

In [4]:
def seed_everything(seed):
  random.seed(seed)
  os.environ["PYTHONHASHSEED"] = str(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = True

seed_everything(CFG["SEED"])


In [5]:
!unzip -qq "/content/drive/MyDrive/open.zip"

In [6]:
df = pd.read_csv("/content/train.csv")

In [7]:
df.describe

<bound method NDFrame.describe of        sample_id              video_path  label
0     TRAIN_0000  ./train/TRAIN_0000.mp4      7
1     TRAIN_0001  ./train/TRAIN_0001.mp4      7
2     TRAIN_0002  ./train/TRAIN_0002.mp4      0
3     TRAIN_0003  ./train/TRAIN_0003.mp4      0
4     TRAIN_0004  ./train/TRAIN_0004.mp4      1
...          ...                     ...    ...
2693  TRAIN_2693  ./train/TRAIN_2693.mp4      3
2694  TRAIN_2694  ./train/TRAIN_2694.mp4      5
2695  TRAIN_2695  ./train/TRAIN_2695.mp4      0
2696  TRAIN_2696  ./train/TRAIN_2696.mp4      0
2697  TRAIN_2697  ./train/TRAIN_2697.mp4      0

[2698 rows x 3 columns]>

In [8]:
label_data = df.groupby("label").count()
label_data
# xLabel = np.arange(0,7)
# plt.bar(xLabel, label_data)

Unnamed: 0_level_0,sample_id,video_path
label,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1783,1783
1,318,318
2,51,51
3,78,78
4,13,13
5,28,28
6,3,3
7,317,317
8,30,30
9,34,34


In [9]:
train, val, _,_ = train_test_split(df, df["label"], test_size=0.2, random_state=CFG["SEED"])
#train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CFG['SEED'])

In [10]:
# class CustomDataset(Dataset):
#   def __init__(self, video_path_list, label_list):
#     self.video_path_list = video_path_list
#     self.label_list = label_list

#   def __getitem__(self, index):
#     frames = self.get_video(self.video_path_list[index])

#     if self.label_list is not None: #None이 있나.
#       label = self.label_list[index]
#       return frames, label
#     else:
#       return frames

#   def __len__(self):
#     return len(self.video_path_list)

#   def get_video(self, path):
#     frames = []
#     cap = cv2.VideoCapture(path)
#     for _ in range(CFG["VIDEO_LENGTH"]):
#       _, img = cap.read()
#       img = cv2.resize(img, (CFG["IMG_SIZE"],CFG["IMG_SIZE"]))
#       img = img/255
#       frames.append(img)
#     return torch.FloatTensor(np.array(frames)).permute(3,0,1,2) #왜 옮기는거지.

  
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list,tfms):
        self.video_path_list = video_path_list
        self.label_list = label_list
        self.tfms = tfms
        
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            frames.append(img)
        frames = aug_video(frames, tfms=self.tfms)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)

def aug_video(vid, tfms):
    seed = random.randint(0,99999)
    aug_vid = []
    for x in vid:
        random.seed(seed)
        aug_vid.append((tfms(image = np.asarray(x)))['image'])
    return torch.from_numpy(np.stack(aug_vid))

tfms = A.Compose([
            A.Resize(width=CFG['IMG_SIZE'], height=CFG['IMG_SIZE']),
            A.HorizontalFlip(p=0.5),
            A.Normalize()
            ], p=1)

In [12]:
train_dataset = CustomDataset(train['video_path'].values, train["label"].values, tfms=tfms)
train_loader = DataLoader(train_dataset, batch_size=CFG["BATCH_SIZE"], shuffle=True, num_workers = 0)


In [13]:
class CustomDataset(Dataset):
    def __init__(self, video_path_list, label_list):
        self.video_path_list = video_path_list
        self.label_list = label_list
        
    def __getitem__(self, index):
        frames = self.get_video(self.video_path_list[index])
        
        if self.label_list is not None:
            label = self.label_list[index]
            return frames, label
        else:
            return frames
        
    def __len__(self):
        return len(self.video_path_list)
    
    def get_video(self, path):
        frames = []
        cap = cv2.VideoCapture(path)
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            img = cv2.resize(img, (CFG['IMG_SIZE'], CFG['IMG_SIZE']))
            img = img / 255.
            frames.append(img)
        return torch.FloatTensor(np.array(frames)).permute(3, 0, 1, 2)


val_dataset = CustomDataset(val["video_path"].values, val["label"].values)
val_loader = DataLoader(val_dataset, batch_size= CFG["BATCH_SIZE"], shuffle=False, num_workers=0)

In [14]:
# class BaseModel(nn.Module):
#   def __init__(self, num_classes=13):
#     super(BaseModel,self).__init__() #이거 chatgpt 풀리면 봐야겠다.
#     self.feature_extract = nn.Sequential(
        
#     nn.Conv3d(3,8,(1,3,3)), #왜 3이지? input channel 3, output channel 8 , kernal size : (1,3,3), filter라고도 불림.
#     nn.ReLU(),               #3x3, padding=1은 크기가 변하지 않음.
#     nn.BatchNorm3d(8),
#     nn.MaxPool3d(2),

#     nn.Conv3d(8,32,(1,2,2)),
#     nn.ReLU(),
#     nn.BatchNorm3d(32),
#     nn.MaxPool3d(2),

#     nn.Conv3d(32,64,(1,2,2)),
#     nn.ReLU(),
#     nn.BatchNorm3d(64),
#     nn.MaxPool3d(2),

#     nn.Conv3d(64,128,(1,2,2)),
#     nn.ReLU(),
#     nn.BatchNorm3d(128),
#     nn.MaxPool3d((3,7,7))
#     )
#     self.classifier = nn.Linear(1024, num_classes)

#   def forward(self,x):
#     batch_size = x.size(0)
#     x=self.feature_extract(x) #위의 코드 시작
#     x = x.view(batch_size,-1) #
#     x = self.classifier(x)
#     return x


class BaseModel(nn.Module):
    def __init__(self, num_classes=13):
        super(BaseModel, self).__init__()
        self.feature_extract = nn.Sequential(
            nn.Conv3d(3, 8, (1, 3, 3)),
            nn.ReLU(),
            nn.BatchNorm3d(8),
            nn.MaxPool3d(2),

            nn.Conv3d(8, 32, (1, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(32),
            nn.MaxPool3d(2),

            nn.Conv3d(32, 64, (1, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(64),
            nn.MaxPool3d(2),
            
            nn.Conv3d(64, 128, (1, 2, 2)),
            nn.ReLU(),
            nn.BatchNorm3d(128),
            nn.MaxPool3d((3, 7, 7)),
        )
        self.classifier = nn.Linear(1024, num_classes)
        
    def forward(self, x):
        batch_size = x.size(0)
        x = self.feature_extract(x)
        x = x.view(batch_size, -1)
        x = self.classifier(x)
        return x

In [15]:
# def train(model, optimizer, train_loader, val_loader, scheduler, device):
#   model.to(device) #이게 무슨 의미인지 물어봐야겠다.
#   criterion = nn.CrossEntropyLoss().to(device)

#   best_val_score = 0
#   best_model = None

#   for epoch in range(1, CFG['EPOCHS'] +1):
#     model.train()
#     train_loss = []
#     for videos, labels in tqdm(iter(train_loader)):
#       videos = videos.to(device) #이게 뭔지 물어봐야겠다.
#       labels = labels.to(device)

#       optimizer.zero_grad()
#       output = model(videos)
#       loss = criterion(output, labels)

#       loss.backward()
#       optimizer.step()
#       train_loss.append(loss.item())

#     _val_loss, _val_score = validation(model, criterion, val_loader, device)
#     _train_loss = np.mean(train_loss)
#     print(f"Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1: [{_val_score:.5f}]")

#     if scheduler is not None:
#       scheduler.step(_val_score)

#     if best_val_score < _val_score:
#       best_val_score = _val_score
#       best_model = model

#   return best_model


def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(videos)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

In [16]:
def validation(model, criterion, val_loader, device):
  model.eval()
  val_loss = []
  preds, trues = [], []

  with torch.no_grad():
    for videos, labels in tqdm(iter(val_loader)):
      videos = videos.to(device)
      labels = labels.to(device)

      logit = model(videos)
      loss = criterion(logit, labels)

      val_loss.append(loss.item())
      
      preds += logit.argmax(1).detach().cpu().numpy().tolist()
      trues += labels.detach().cpu().numpy().tolist()

    _val_loss = np.mean(val_loss)

  _val_score = f1_score(trues, preds, average="macro")
  return _val_loss, _val_score

In [17]:
model = BaseModel()
model.eval()
optimizer = torch.optim.Adam(params=model.parameters(), lr=CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = "max", factor = 0.5, patience=2, threshold_mode='abs',
                                                      min_lr=1e-8, verbose=True)

infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [1], Train Loss : [1.19714] Val Loss : [2.96746] Val F1 : [0.01257]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.93602] Val Loss : [3.49003] Val F1 : [0.01570]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.83128] Val Loss : [4.57612] Val F1 : [0.02447]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.73408] Val Loss : [3.76620] Val F1 : [0.01622]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.70368] Val Loss : [4.47538] Val F1 : [0.01682]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.63554] Val Loss : [4.84917] Val F1 : [0.01078]
Epoch 00006: reducing learning rate of group 0 to 1.5000e-04.


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.48192] Val Loss : [4.21369] Val F1 : [0.00275]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.44269] Val Loss : [4.85148] Val F1 : [0.01684]


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.41233] Val Loss : [5.27299] Val F1 : [0.00274]
Epoch 00009: reducing learning rate of group 0 to 7.5000e-05.


  0%|          | 0/540 [00:00<?, ?it/s]

  0%|          | 0/135 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.33729] Val Loss : [5.03110] Val F1 : [0.01116]


In [18]:
test = pd.read_csv('/content/test.csv')

In [19]:
test_dataset = CustomDataset(test['video_path'].values, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [20]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device)
            
            logit = model(videos)

            preds += logit.argmax(1).detach().cpu().numpy().tolist()
    return preds

In [21]:
preds = inference(model, test_loader, device)

  0%|          | 0/450 [00:00<?, ?it/s]

In [22]:
submit = pd.read_csv('./sample_submission.csv')

In [23]:
submit['label'] = preds
submit.head()

Unnamed: 0,sample_id,label
0,TEST_0000,9
1,TEST_0001,9
2,TEST_0002,9
3,TEST_0003,9
4,TEST_0004,9


In [24]:
submit.to_csv('./baseline_submit.csv', index=False)