# timing model
- 0 : 낮
- 1 : 밤

In [2]:
import random
import pandas as pd
import numpy as np
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import torchvision.models as models

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings(action='ignore') 

In [3]:
CFG = {
    'VIDEO_LENGTH':50, # 10프레임 * 5초
    'HEIGHT':224,
    'WIDTH':224,
    'EPOCHS':2,
    'LEARNING_RATE':1e-3,
    'BATCH_SIZE':2,
    'SEED':41
}

In [7]:
df = pd.read_csv('./data/train_timing.csv')
df = df[df['timing']!='Na'].reset_index(drop=True)
df

Unnamed: 0,sample_id,video_path,timing
0,TRAIN_0000,/data/home/ubuntu/workspace/dacon/data/train/T...,0
1,TRAIN_0001,/data/home/ubuntu/workspace/dacon/data/train/T...,0
2,TRAIN_0004,/data/home/ubuntu/workspace/dacon/data/train/T...,0
3,TRAIN_0006,/data/home/ubuntu/workspace/dacon/data/train/T...,0
4,TRAIN_0007,/data/home/ubuntu/workspace/dacon/data/train/T...,0
...,...,...,...
910,TRAIN_2685,/data/home/ubuntu/workspace/dacon/data/train/T...,1
911,TRAIN_2689,/data/home/ubuntu/workspace/dacon/data/train/T...,0
912,TRAIN_2692,/data/home/ubuntu/workspace/dacon/data/train/T...,0
913,TRAIN_2693,/data/home/ubuntu/workspace/dacon/data/train/T...,0


In [8]:
df['timing'].value_counts()

0    808
1    107
Name: timing, dtype: int64

In [10]:
# 영상 초반 부분이 제일 안정적이라 판단
# 초반 30프레임만 사용
# 0은 7프레임마다 1개씩 -> 808*4 = 3232
# 1은 1프레임마다 1개씩 -> 107*25 = 3210

5099

In [13]:
def get_img(path, label=False):
    frames, labels = [], []
    cap = cv2.VideoCapture(path)
    cnt = 0
    # {0: 7, 1: 1}
    if (label=='0'):
        divide = 7
    elif (label=='1'):
        divide = 1
        
    if (label):
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            cnt+=1
            if (cnt%divide==0):
                img = cv2.resize(img, (CFG['HEIGHT'], CFG['WIDTH']))
                img = img / 255.
                frames.append(img)
                labels.append(int(label))
            if (cnt==30):
                break
        return frames, labels
                
    else:
        for _ in range(CFG['VIDEO_LENGTH']):
            _, img = cap.read()
            cnt+=1
            img = cv2.resize(img, (CFG['HEIGHT'], CFG['WIDTH']))
            img = img / 255.
            frames.append(img)
            if (cnt==30):
                break
        return frames

In [14]:
class CustomDataset(Dataset):
    def __init__(self, frames, labels):
        self.frames = frames
        self.labels = labels

        
    def __getitem__(self, index):
        frame = self.transform_frame(self.frames[index])
        if self.labels is not None:
            label = self.labels[index]
            return frame, label
        else:
            return frame
        
        
    def __len__(self):
        return len(self.frames)
    
    def transform_frame(self, frame):
        frame = frame / 255.
        return torch.FloatTensor(np.array(frame)).permute(2, 0, 1)

In [15]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss = []
    preds, trues = [], []
    
    with torch.no_grad():
        for videos, labels in tqdm(iter(val_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            logit = model(videos)
            
            loss = criterion(logit, labels)
            
            val_loss.append(loss.item())
            
            preds += logit.argmax(1).detach().cpu().numpy().tolist()
            trues += labels.detach().cpu().numpy().tolist()
        
        _val_loss = np.mean(val_loss)
    
    _val_score = f1_score(trues, preds, average='macro')
    return _val_loss, _val_score

In [15]:
## 모델 저장하는 부분 추가
def train(model, optimizer, train_loader, val_loader, scheduler, device):
    model.to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss = []
        for videos, labels in tqdm(iter(train_loader)):
            videos = videos.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            output = model(videos)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val F1 : [{_val_score:.5f}]')
        
        torch.save(model.state_dict(), '/data/home/ubuntu/workspace/dacon/ckp/timing_res101_{0:02d}.ckpt'.format(epoch))
        print(f'======== model saved - epoch : ', epoch)

        if scheduler is not None:
            scheduler.step(_val_score)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

In [16]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [17]:
ckp = torch.load('D:/ㅎㅎㄱ/0.Study/dacon_230221/ckp/weather_res101_36.ckpt')

In [18]:
model = models.resnet101()

num_classes = 2
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)

device = torch.device('cuda:0')
model.load_state_dict(ckp)
model = model.to(device)

# test = pd.read_csv('./data/test.csv')

In [19]:
test = pd.read_csv('./data/test.csv')

In [20]:
total_frames = []
for i in tqdm(range(len(test))):
    frames = get_img(test.loc[i,'video_path'], None)
    total_frames.extend(frames)

  0%|          | 0/1800 [00:00<?, ?it/s]

In [62]:
test_dataset = CustomDataset(total_frames, None)
test_loader = DataLoader(
            test_dataset, 
            batch_size = CFG['BATCH_SIZE'],
            shuffle=False, 
            num_workers=0
            )

In [66]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    img_preds = []
    preds = []
    with torch.no_grad():
        for videos in tqdm(iter(test_loader)):
            videos = videos.to(device) 
            logit = model(videos)
            img_preds += logit.argmax(1).detach().cpu().numpy().tolist()
    
    
    for i in range(0, len(img_preds), 30):
        preds.append(int(mode(img_preds[i: i+30]).mode))

    return preds

In [67]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
preds = inference(model, test_loader, device)
test['weather'] = preds
test.to_csv('./data/test_timing.csv', index=False)

  0%|          | 0/1125 [00:00<?, ?it/s]

In [69]:
test

Unnamed: 0,sample_id,video_path,weather
0,TEST_0000,D:/ㅎㅎㄱ/0.Study/dacon_230221/test/TEST_0000.mp4,0
1,TEST_0001,D:/ㅎㅎㄱ/0.Study/dacon_230221/test/TEST_0001.mp4,0
2,TEST_0002,D:/ㅎㅎㄱ/0.Study/dacon_230221/test/TEST_0002.mp4,0
3,TEST_0003,D:/ㅎㅎㄱ/0.Study/dacon_230221/test/TEST_0003.mp4,0
4,TEST_0004,D:/ㅎㅎㄱ/0.Study/dacon_230221/test/TEST_0004.mp4,0
...,...,...,...
1795,TEST_1795,D:/ㅎㅎㄱ/0.Study/dacon_230221/test/TEST_1795.mp4,0
1796,TEST_1796,D:/ㅎㅎㄱ/0.Study/dacon_230221/test/TEST_1796.mp4,0
1797,TEST_1797,D:/ㅎㅎㄱ/0.Study/dacon_230221/test/TEST_1797.mp4,0
1798,TEST_1798,D:/ㅎㅎㄱ/0.Study/dacon_230221/test/TEST_1798.mp4,0


In [16]:
df = pd.read_csv('./data/train_timing.csv')
df = df[df['timing']!='Na'].reset_index(drop=True)
df

Unnamed: 0,sample_id,video_path,timing
0,TRAIN_0000,/data/home/ubuntu/workspace/dacon/data/train/T...,0
1,TRAIN_0001,/data/home/ubuntu/workspace/dacon/data/train/T...,0
2,TRAIN_0004,/data/home/ubuntu/workspace/dacon/data/train/T...,0
3,TRAIN_0006,/data/home/ubuntu/workspace/dacon/data/train/T...,0
4,TRAIN_0007,/data/home/ubuntu/workspace/dacon/data/train/T...,0
...,...,...,...
910,TRAIN_2685,/data/home/ubuntu/workspace/dacon/data/train/T...,1
911,TRAIN_2689,/data/home/ubuntu/workspace/dacon/data/train/T...,0
912,TRAIN_2692,/data/home/ubuntu/workspace/dacon/data/train/T...,0
913,TRAIN_2693,/data/home/ubuntu/workspace/dacon/data/train/T...,0


In [12]:
df = df[:10]
df

Unnamed: 0,sample_id,video_path,weather
0,TRAIN_0000,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0000.mp4,0
1,TRAIN_0001,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0001.mp4,0
2,TRAIN_0004,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0004.mp4,0
3,TRAIN_0006,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0006.mp4,1
4,TRAIN_0007,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0007.mp4,0
5,TRAIN_0009,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0009.mp4,1
6,TRAIN_0011,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0011.mp4,0
7,TRAIN_0015,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0015.mp4,0
8,TRAIN_0016,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0016.mp4,0
9,TRAIN_0017,D:/ㅎㅎㄱ/0.Study/dacon_230221/train/TRAIN_0017.mp4,0


In [13]:
seed_everything(CFG['SEED']) # Seed 고정

In [17]:
total_frames, total_labels = [], []
for i in tqdm(range(len(df))):
    frames, labels = get_img(df.loc[i,'video_path'], df.loc[i,'timing'])
    total_frames.extend(frames)
    total_labels.extend(labels)

  3%|▎         | 28/915 [00:02<01:15, 11.70it/s]


KeyboardInterrupt: 

In [15]:
train_set, val_set, train_label, val_lable = train_test_split(total_frames, total_labels, test_size=0.2, random_state=CFG['SEED'])

In [16]:
train_dataset = CustomDataset(train_set, train_label)
train_loader = DataLoader(
    train_dataset, 
    batch_size = CFG['BATCH_SIZE'],
    shuffle=True, 
    num_workers=0
    )

val_dataset = CustomDataset(val_set, val_lable)
val_loader = DataLoader(
    val_dataset, 
    batch_size = CFG['BATCH_SIZE'],
    shuffle=False, 
    num_workers=0
    )

In [24]:
# resnet50
model = models.resnet101()

num_classes = 3
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)

device = torch.device('cuda:0')
model = model.to(device)

optimizer = torch.optim.Adam(params = model.parameters(), lr = CFG["LEARNING_RATE"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='max', 
    factor=0.5, 
    patience=2,
    threshold_mode='abs',
    min_lr=1e-4, 
    verbose=False
)
infer_model = train(model, optimizer, train_loader, val_loader, scheduler, device)

  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [1], Train Loss : [1.32783] Val Loss : [6.81932] Val F1 : [0.28000]


  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [2], Train Loss : [0.61962] Val Loss : [60.41614] Val F1 : [0.37931]


  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [3], Train Loss : [0.61390] Val Loss : [1.34795] Val F1 : [0.33333]


  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [4], Train Loss : [0.40290] Val Loss : [0.12338] Val F1 : [1.00000]


  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [5], Train Loss : [0.59596] Val Loss : [0.19229] Val F1 : [0.94286]


  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [6], Train Loss : [0.83270] Val Loss : [0.20029] Val F1 : [1.00000]


  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [7], Train Loss : [0.57909] Val Loss : [0.49057] Val F1 : [0.51786]


  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [8], Train Loss : [0.14590] Val Loss : [0.12255] Val F1 : [1.00000]


  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [9], Train Loss : [0.12133] Val Loss : [0.03099] Val F1 : [1.00000]


  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/9 [00:00<?, ?it/s]

Epoch [10], Train Loss : [0.13523] Val Loss : [0.24900] Val F1 : [0.80364]
