# ECO 모델 구현 및 동영상 분류의 추론 실시

In [1]:
import os

import torch
import torch.nn as nn
from torch.nn import init

## Kinetics 데이터셋의 데이터 로더 작성



In [2]:
from utils.kinetics400_eco_dataloader import *

root_path = "./data/kinetics_videos/"
video_list = make_datapath_list(root_path)

resize, crop_size = 224,224
mean, std = [104, 117, 113], [1, 1, 1]
video_transform = VideoTransform(resize, crop_size, mean, std)

label_dictionary_path = "./video_download/kinetics_400_label_dicitionary.csv"
label_id_dict, id_label_dict = get_label_id_dictionary(label_dictionary_path)

val_dataset = VideoDataset(video_list, label_id_dict, num_segments=16, phase="val", transform=video_transform, img_tmpl="image_{:05d}.jpg")

batch_size = 8
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

batch_iterator = iter(val_dataloader)
imgs_transformed, labels, label_ids, dir_path = next(batch_iterator)
print(imgs_transformed.shape)



torch.Size([8, 16, 3, 224, 224])


## ECO 모델 구현

In [3]:
from utils.eco import *

class ECO_Lite(nn.Module):
    def __init__(self):
        super(ECO_Lite, self).__init__()

        self.eco_2d = ECO_2D()
        self.eco_3d = ECO_3D()

        self.fc_final = nn.Linear(in_features=512, out_features=400, bias=True)

    def forward(self, x):
        bs,ns,c,h,w = x.shape # batch_size, num_segments, channels, height, width

        # x를 (batch_size*num_segments, channels, height, width)로 변경한다
        out = x.view(-1,c,h,w)

        out = self.eco_2d(out) # (batch_size*num_segments, 96,28,28)

        # num_segments를 원래대로 되돌린다
        out = out.view(-1, ns, 96, 28,28)

        out =self.eco_3d(out)

        out = self.fc_final(out)

        return out

net = ECO_Lite()
net

ECO_Lite(
  (eco_2d): ECO_2D(
    (basic_conv): BasicConv(
      (conv1_7x7_s2): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (conv1_7x7_s2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1_relu_7x7): ReLU(inplace=True)
      (pool1_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
      (conv2_3x3_reduce): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (conv2_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2_relu_3x3_reduce): ReLU(inplace=True)
      (conv2_3x3): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2_3x3_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2_relu_3x3): ReLU(inplace=True)
      (pool2_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    )
    (inception_a): InceptionA(
      (inc

## 학습된 모델 로드

In [4]:
def load_pretrained_ECO(model_dict, pretrained_model_dict):
    param_names = []
    for name, param in model_dict.items():
        param_names.append(name)

    new_state_dict = model_dict.copy()

    print("학습된 파라미터를 로드합니다")

    for index, (key_name, value) in enumerate(pretrained_model_dict.items()):
        name = param_names[index]
        new_state_dict[name] = value

        print(str(key_name)+"->"+str(name))

    return new_state_dict

net_model_ECO = "./weights/ECO_Lite_rgb_model_Kinetics.pth.tar"
pretrained_model = torch.load(net_model_ECO, map_location="cpu")
pretrained_model_dict = pretrained_model["state_dict"]

model_dict = net.state_dict()

new_state_dict = load_pretrained_ECO(model_dict, pretrained_model_dict)

net.eval()
net.load_state_dict(new_state_dict)

학습된 파라미터를 로드합니다
module.base_model.conv1_7x7_s2.weight->eco_2d.basic_conv.conv1_7x7_s2.weight
module.base_model.conv1_7x7_s2.bias->eco_2d.basic_conv.conv1_7x7_s2.bias
module.base_model.conv1_7x7_s2_bn.weight->eco_2d.basic_conv.conv1_7x7_s2_bn.weight
module.base_model.conv1_7x7_s2_bn.bias->eco_2d.basic_conv.conv1_7x7_s2_bn.bias
module.base_model.conv1_7x7_s2_bn.running_mean->eco_2d.basic_conv.conv1_7x7_s2_bn.running_mean
module.base_model.conv1_7x7_s2_bn.running_var->eco_2d.basic_conv.conv1_7x7_s2_bn.running_var
module.base_model.conv1_7x7_s2_bn.num_batches_tracked->eco_2d.basic_conv.conv1_7x7_s2_bn.num_batches_tracked
module.base_model.conv2_3x3_reduce.weight->eco_2d.basic_conv.conv2_3x3_reduce.weight
module.base_model.conv2_3x3_reduce.bias->eco_2d.basic_conv.conv2_3x3_reduce.bias
module.base_model.conv2_3x3_reduce_bn.weight->eco_2d.basic_conv.conv2_3x3_reduce_bn.weight
module.base_model.conv2_3x3_reduce_bn.bias->eco_2d.basic_conv.conv2_3x3_reduce_bn.bias
module.base_model.conv2_3x3_red

<All keys matched successfully>

## 추론(동영상 데이터의 클래스 분류)

In [5]:
net.eval()
batch_iterator = iter(val_dataloader)
imgs_transformed, labels, label_ids, dir_path = next(batch_iterator)

with torch.set_grad_enabled(False):
    outputs = net(imgs_transformed)

print(outputs.shape)

torch.Size([8, 400])


In [6]:
def show_eco_inference_result(dir_path, outputs_input, id_label_dict, idx=0):
    print("파일: ", dir_path[idx])

    outputs = outputs_input.clone()

    for i in range(5):
        output = outputs[idx]

        _, pred = torch.max(output, dim=0)

        class_idx = int(pred.numpy())

        print("예측 {}위: {}".format(i+1, id_label_dict[class_idx]))
        outputs[idx][class_idx] = -1000

idx = 0
show_eco_inference_result(dir_path, outputs, id_label_dict, idx)

파일:  ./data/kinetics_videos/bungee jumping/40c7413c-cda1-4e7c-bc53-3b5dc44db082
예측 1위: bungee jumping
예측 2위: swinging on something
예측 3위: trapezing
예측 4위: kitesurfing
예측 5위: parasailing


In [7]:
idx = 4
show_eco_inference_result(dir_path, outputs, id_label_dict, idx)

파일:  ./data/kinetics_videos/arm wrestling/98306750-9046-4363-8152-b133f9f1fa27
예측 1위: arm wrestling
예측 2위: shaking hands
예측 3위: slapping
예측 4위: punching person (boxing)
예측 5위: rock scissors paper
