In [7]:
import torch
import torch.nn as nn  # <== 여기가 중요
import torch.nn.functional as F


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import numpy as np
import modules.resnet as resnet
from modules import BiLSTMLayer, TemporalConv
from modules.criterions import SeqKD
import utils
import modules.resnet as resnet
# Identity Layer (ResNet의 Fully Connected 제거용)
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x

# L2 정규화 선형 레이어
class NormLinear(nn.Module):
    def __init__(self, in_dim, out_dim):
        super(NormLinear, self).__init__()
        self.weight = nn.Parameter(torch.Tensor(in_dim, out_dim))
        nn.init.xavier_uniform_(self.weight, gain=nn.init.calculate_gain('relu'))

    def forward(self, x):
        outputs = torch.matmul(x, F.normalize(self.weight, dim=0))
        return outputs

# SLRModel (수어 인식 모델)
class SLRModel(nn.Module):
    def __init__(
            self, num_classes, c2d_type, conv_type, use_bn=False,
            hidden_size=1024, gloss_dict=None, loss_weights=None,
            weight_norm=True, share_classifier=True
    ):
        super(SLRModel, self).__init__()
        self.decoder = None
        self.loss = dict()
        self.criterion_init()
        self.num_classes = num_classes
        self.loss_weights = loss_weights
        self.conv2d = getattr(resnet, c2d_type)()  # ResNet 기반 2D CNN
        self.conv2d.fc = Identity()  # Fully Connected 제거

        # 1D CNN을 활용한 Temporal Encoding
        self.conv1d = TemporalConv(input_size=512,
                                   hidden_size=hidden_size,
                                   conv_type=conv_type,
                                   use_bn=use_bn,
                                   num_classes=num_classes)

        self.decoder = utils.Decode(gloss_dict, num_classes, 'beam')

        # BiLSTM 기반 Temporal Model
        self.temporal_model = BiLSTMLayer(rnn_type='LSTM', input_size=hidden_size, hidden_size=hidden_size,
                                          num_layers=2, bidirectional=True)

        # Classifier (NormLinear 사용 여부 결정)
        if weight_norm:
            self.classifier = NormLinear(hidden_size, self.num_classes)
            self.conv1d.fc = NormLinear(hidden_size, self.num_classes)
        else:
            self.classifier = nn.Linear(hidden_size, self.num_classes)
            self.conv1d.fc = nn.Linear(hidden_size, self.num_classes)

        # Classifier 공유 여부
        if share_classifier:
            self.conv1d.fc = self.classifier

    def forward(self, x, len_x, label=None, label_lgt=None):
        # CNN으로 Frame-wise Feature 추출
        if len(x.shape) == 5:
            batch, temp, channel, height, width = x.shape
            framewise = self.conv2d(x.permute(0,2,1,3,4)).view(batch, temp, -1).permute(0,2,1)  # btc -> bct
        else:
            framewise = x

        conv1d_outputs = self.conv1d(framewise, len_x)
        x = conv1d_outputs['visual_feat']
        lgt = conv1d_outputs['feat_len'].cpu()

        # BiLSTM을 활용한 Temporal Modeling
        tm_outputs = self.temporal_model(x, lgt)
        features_before_classifier = tm_outputs['predictions']  # ✨ 분류기 전 특징값 저장

        # 최종 Classifier 적용
        outputs = self.classifier(features_before_classifier)

        # Inference 모드에서 Decoding
        pred = None if self.training else self.decoder.decode(outputs, lgt, batch_first=False, probs=False)
        conv_pred = None if self.training else self.decoder.decode(conv1d_outputs['conv_logits'], lgt, batch_first=False, probs=False)

        return {
            "framewise_features": framewise,
            "visual_features": x,
            "temproal_features": tm_outputs['predictions'],
            "feat_len": lgt,
            "conv_logits": conv1d_outputs['conv_logits'],
            "sequence_logits": outputs,
            "features_before_classifier": features_before_classifier,  # ✨ 추가된 부분
            "conv_sents": conv_pred,
            "recognized_sents": pred,
        }

    def criterion_init(self):
        self.loss['CTCLoss'] = torch.nn.CTCLoss(reduction='none', zero_infinity=False)
        self.loss['distillation'] = SeqKD(T=8)
        return self.loss




In [6]:
import os
import numpy as np
import yaml

# 환경 변수 설정
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

# ✅ config 파일 로드 (dataset 정보 포함)
config_path = "./configs/baseline.yaml"  # 혹은 원하는 설정 파일
with open(config_path, "r") as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

# ✅ gloss_dict 로드
gloss_dict_path = config["dataset_info"]["dict_path"]  # `dict_path`는 YAML 파일에 정의됨
gloss_dict = np.load(gloss_dict_path, allow_pickle=True).item()

print("📌 Gloss Dictionary Loaded!")
print(f"Total Classes (Including Blank): {len(gloss_dict) + 1}")
print(f"Sample Gloss Mapping: {list(gloss_dict.items())[:5]}")  # 일부만 출력


KeyError: 'dataset_info'

In [5]:
import torch

# 모델 불러오기
model = SLRModel(
    num_classes=226, c2d_type="resnet18", conv_type=2,  # conv_type은 tconv.py에서 정의된 값 중 하나로 설정
    use_bn=True, hidden_size=1024, gloss_dict=None, loss_weights=None
)

# 저장된 가중치 로드
state_dict = torch.load("model.pt", map_location="cpu")

# state_dict가 딕셔너리인지 확인 후 모델에 로드
if isinstance(state_dict, dict):
    model.load_state_dict(state_dict)

# 모델을 평가 모드로 설정
model.eval()


AttributeError: 'NoneType' object has no attribute 'items'