# 🚀 GTM (Google Trends Transformer) - All-in-One Version

이 노트북은 모든 코드가 포함된 독립실행형 버전입니다.
- 외부 py 파일 없이 노트북 내에서 모든 클래스와 함수 정의
- Google Drive의 작은 데이터셋만 필요
- 빠른 테스트 및 실험 가능

## 1. 📦 패키지 설치 및 import

In [None]:
# Colab 환경에서 안정적인 패키지 설치
import subprocess
import sys

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
        print(f"✅ {package} 설치 완료")
        return True
    except subprocess.CalledProcessError as e:
        print(f"❌ {package} 설치 실패: {e}")
        return False

# tokenizers 문제 해결 - 미리 컴파일된 버전 사용
print("🔧 tokenizers 설치 중...")
!pip install tokenizers --no-build-isolation --quiet

# transformers 호환 버전 설치
print("🔧 transformers 설치 중...")
!pip install transformers==4.21.0 --quiet

# PyTorch Lightning 안정 버전
print("🔧 PyTorch Lightning 설치 중...")
!pip install pytorch-lightning==1.9.5 --quiet

# 기타 패키지
print("🔧 기타 패키지 설치 중...")
!pip install scikit-learn pillow --quiet

print("\n📦 패키지 설치 완료! import 시작...")

# 모든 import
import math
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
import pandas as pd
import numpy as np
from tqdm import tqdm
from PIL import Image, ImageFile
from torch.utils.data import DataLoader, TensorDataset
from torchvision.transforms import Resize, ToTensor, Normalize, Compose
from torchvision import models
from sklearn.preprocessing import MinMaxScaler
from transformers import pipeline, Adafactor
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

ImageFile.LOAD_TRUNCATED_IMAGES = True

# Google Drive 마운트
from google.colab import drive
drive.mount('/content/drive')

print(f"✅ PyTorch: {torch.__version__}")
print(f"✅ PyTorch Lightning: {pl.__version__}")
print(f"✅ CUDA 사용 가능: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"✅ GPU: {torch.cuda.get_device_name(0)}")

## 2. 🧠 모델 클래스 정의 (GTM.py 내용)

In [None]:
# 기본 모듈들 정의
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=52):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

class TimeDistributed(nn.Module):
    def __init__(self, module, batch_first=True):
        super(TimeDistributed, self).__init__()
        self.module = module
        self.batch_first = batch_first

    def forward(self, x):
        if len(x.size()) <= 2:
            return self.module(x)

        x_reshape = x.contiguous().view(-1, x.size(-1))  
        y = self.module(x_reshape)

        if self.batch_first:
            y = y.contiguous().view(x.size(0), -1, y.size(-1))
        else:
            y = y.view(-1, x.size(1), y.size(-1))

        return y

print("✅ 기본 모듈 정의 완료")

In [None]:
# 인코더 클래스들 정의
class FusionNetwork(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, use_img, use_text, dropout=0.2):
        super(FusionNetwork, self).__init__()
        
        self.img_pool = nn.AdaptiveAvgPool2d((1,1))
        self.img_linear = nn.Linear(2048, embedding_dim)
        self.use_img = use_img
        self.use_text = use_text
        input_dim = embedding_dim + (embedding_dim*use_img) + (embedding_dim*use_text)
        self.feature_fusion = nn.Sequential(
            nn.BatchNorm1d(input_dim),
            nn.Linear(input_dim, input_dim, bias=False),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(input_dim, hidden_dim)
        )

    def forward(self, img_encoding, text_encoding, dummy_encoding):
        pooled_img = self.img_pool(img_encoding)
        condensed_img = self.img_linear(pooled_img.flatten(1))

        decoder_inputs = []
        if self.use_img == 1:
            decoder_inputs.append(condensed_img) 
        if self.use_text == 1:
            decoder_inputs.append(text_encoding) 
        decoder_inputs.append(dummy_encoding)
        concat_features = torch.cat(decoder_inputs, dim=1)

        final = self.feature_fusion(concat_features)
        return final

class GTrendEmbedder(nn.Module):
    def __init__(self, forecast_horizon, embedding_dim, use_mask, trend_len, num_trends, gpu_num):
        super().__init__()
        self.forecast_horizon = forecast_horizon
        self.input_linear = TimeDistributed(nn.Linear(num_trends, embedding_dim))
        self.pos_embedding = PositionalEncoding(embedding_dim, max_len=trend_len)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=4, dropout=0.2)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
        self.use_mask = use_mask
        self.gpu_num = gpu_num

    def _generate_encoder_mask(self, size, forecast_horizon):
        mask = torch.zeros((size, size))
        split = math.gcd(size, forecast_horizon)
        for i in range(0, size, split):
            mask[i:i+split, i:i+split] = 1
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def forward(self, gtrends):
        gtrend_emb = self.input_linear(gtrends.permute(0,2,1))
        gtrend_emb = self.pos_embedding(gtrend_emb.permute(1,0,2))
        input_mask = self._generate_encoder_mask(gtrend_emb.shape[0], self.forecast_horizon).to(gtrend_emb.device)
        if self.use_mask == 1:
            gtrend_emb = self.encoder(gtrend_emb, input_mask)
        else:
            gtrend_emb = self.encoder(gtrend_emb)
        return gtrend_emb

class TextEmbedder(nn.Module):
    def __init__(self, embedding_dim, cat_dict, col_dict, fab_dict, gpu_num):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.cat_dict = {v: k for k, v in cat_dict.items()}
        self.col_dict = {v: k for k, v in col_dict.items()}
        self.fab_dict = {v: k for k, v in fab_dict.items()}
        self.word_embedder = pipeline('feature-extraction', model='bert-base-uncased')
        self.fc = nn.Linear(768, embedding_dim)
        self.dropout = nn.Dropout(0.1)
        self.gpu_num = gpu_num

    def forward(self, category, color, fabric):
        textual_description = [self.col_dict[color.detach().cpu().numpy().tolist()[i]] + ' ' \
                + self.fab_dict[fabric.detach().cpu().numpy().tolist()[i]] + ' ' \
                + self.cat_dict[category.detach().cpu().numpy().tolist()[i]] for i in range(len(category))]

        word_embeddings = self.word_embedder(textual_description)
        word_embeddings = [torch.tensor(x[0][1:-1], dtype=torch.float32).mean(axis=0) if len(x[0]) > 2 else torch.tensor(x[0], dtype=torch.float32).mean(axis=0) for x in word_embeddings] 
        word_embeddings = torch.stack(word_embeddings).to(self.fc.weight.device).requires_grad_()
        
        word_embeddings = self.dropout(self.fc(word_embeddings))
        return word_embeddings

class ImageEmbedder(nn.Module):
    def __init__(self):
        super().__init__()
        resnet = models.resnet50(pretrained=True)
        modules = list(resnet.children())[:-2]
        self.resnet = nn.Sequential(*modules)
        
        # 모든 ResNet 파라미터를 trainable로 설정 (gradient 문제 해결)
        for p in self.resnet.parameters():
            p.requires_grad = True
        
    def forward(self, images):        
        img_embeddings = self.resnet(images)  
        size = img_embeddings.size()
        out = img_embeddings.view(*size[:2],-1)
        return out.view(*size).contiguous()

class DummyEmbedder(nn.Module):
    def __init__(self, embedding_dim):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.day_embedding = nn.Linear(1, embedding_dim)
        self.week_embedding = nn.Linear(1, embedding_dim)
        self.month_embedding = nn.Linear(1, embedding_dim)
        self.year_embedding = nn.Linear(1, embedding_dim)
        self.dummy_fusion = nn.Linear(embedding_dim*4, embedding_dim)
        self.dropout = nn.Dropout(0.2)

    def forward(self, temporal_features):
        d, w, m, y = temporal_features[:, 0].unsqueeze(1), temporal_features[:, 1].unsqueeze(1), \
            temporal_features[:, 2].unsqueeze(1), temporal_features[:, 3].unsqueeze(1)
        d_emb, w_emb, m_emb, y_emb = self.day_embedding(d), self.week_embedding(w), self.month_embedding(m), self.year_embedding(y)
        temporal_embeddings = self.dummy_fusion(torch.cat([d_emb, w_emb, m_emb, y_emb], dim=1))
        temporal_embeddings = self.dropout(temporal_embeddings)
        return temporal_embeddings

class TransformerDecoderLayer(nn.Module):
    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"):
        super(TransformerDecoderLayer, self).__init__()
        
        # Add self_attn for compatibility with nn.TransformerDecoder
        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)

        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(dim_feedforward, d_model)

        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(dropout)
        self.dropout3 = nn.Dropout(dropout)

        self.activation = F.relu

    def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, 
            memory_key_padding_mask=None, tgt_is_causal=None, memory_is_causal=None):
        
        # Self-attention block
        tgt2 = self.self_attn(tgt, tgt, tgt, attn_mask=tgt_mask,
                              key_padding_mask=tgt_key_padding_mask)[0]
        tgt = tgt + self.dropout1(tgt2)
        tgt = self.norm1(tgt)
        
        # Cross-attention block
        tgt2, attn_weights = self.multihead_attn(tgt, memory, memory, attn_mask=memory_mask,
                                                  key_padding_mask=memory_key_padding_mask)
        tgt = tgt + self.dropout2(tgt2)
        tgt = self.norm2(tgt)
        
        # Feedforward block
        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
        tgt = tgt + self.dropout3(tgt2)
        tgt = self.norm3(tgt)
        
        return tgt, attn_weights

print("✅ 모든 모델 컴포넌트 정의 완료 (PyTorch 2.x 호환, gradient 문제 해결)")

## 3. 🎯 GTM 메인 모델 클래스

In [None]:
class GTM(pl.LightningModule):
    def __init__(self, embedding_dim, hidden_dim, output_dim, num_heads, num_layers, use_text, use_img, \
                cat_dict, col_dict, fab_dict, trend_len, num_trends, gpu_num, use_encoder_mask=1, autoregressive=False):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.embedding_dim = embedding_dim
        self.output_len = output_dim
        self.use_encoder_mask = use_encoder_mask
        self.autoregressive = autoregressive
        self.gpu_num = gpu_num
        self.save_hyperparameters()

        # Encoder
        self.dummy_encoder = DummyEmbedder(embedding_dim)
        self.image_encoder = ImageEmbedder()
        self.text_encoder = TextEmbedder(embedding_dim, cat_dict, col_dict, fab_dict, gpu_num)
        self.gtrend_encoder = GTrendEmbedder(output_dim, hidden_dim, use_encoder_mask, trend_len, num_trends, gpu_num)
        self.static_feature_encoder = FusionNetwork(embedding_dim, hidden_dim, use_img, use_text)

        # Decoder - 단일 레이어만 사용
        self.decoder_layer = TransformerDecoderLayer(d_model=self.hidden_dim, nhead=num_heads, 
                                                    dim_feedforward=self.hidden_dim * 4, dropout=0.1)
        
        if self.autoregressive: 
            self.pos_encoder = PositionalEncoding(hidden_dim, max_len=12)
        
        self.decoder_fc = nn.Sequential(
            nn.Linear(hidden_dim, self.output_len if not self.autoregressive else 1),
            nn.Dropout(0.2)
        )
        
    def _generate_square_subsequent_mask(self, size):
        mask = (torch.triu(torch.ones(size, size)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def forward(self, category, color, fabric, temporal_features, gtrends, images):
        # Encode features and get inputs
        img_encoding = self.image_encoder(images)
        dummy_encoding = self.dummy_encoder(temporal_features)
        text_encoding = self.text_encoder(category, color, fabric)
        gtrend_encoding = self.gtrend_encoder(gtrends)

        # Fuse static features together
        static_feature_fusion = self.static_feature_encoder(img_encoding, text_encoding, dummy_encoding)

        if self.autoregressive == 1:
            # Decode
            tgt = torch.zeros(self.output_len, gtrend_encoding.shape[1], gtrend_encoding.shape[-1]).to(gtrend_encoding.device)
            tgt[0] = static_feature_fusion
            tgt = self.pos_encoder(tgt)
            tgt_mask = self._generate_square_subsequent_mask(self.output_len).to(tgt.device)
            memory = gtrend_encoding
            
            # 단일 decoder layer 사용
            decoder_out, attn_weights = self.decoder_layer(tgt, memory, tgt_mask)
            forecast = self.decoder_fc(decoder_out)
        else:
            # Decode (generatively/non-autoregressively)
            tgt = static_feature_fusion.unsqueeze(0)
            memory = gtrend_encoding
            
            # 단일 decoder layer 사용
            decoder_out, attn_weights = self.decoder_layer(tgt, memory)
            forecast = self.decoder_fc(decoder_out)

        return forecast.view(-1, self.output_len), attn_weights

    def configure_optimizers(self):
        optimizer = Adafactor(self.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
        return [optimizer]

    def training_step(self, train_batch, batch_idx):
        item_sales, category, color, fabric, temporal_features, gtrends, images = train_batch 
        
        # 입력 텐서들의 gradient 활성화
        temporal_features = temporal_features.requires_grad_(True)
        gtrends = gtrends.requires_grad_(True)
        images = images.requires_grad_(True)
        
        forecasted_sales, _ = self.forward(category, color, fabric, temporal_features, gtrends, images)
        loss = F.mse_loss(item_sales, forecasted_sales.squeeze())
        self.log('train_loss', loss)
        return loss

    def validation_step(self, test_batch, batch_idx):
        item_sales, category, color, fabric, temporal_features, gtrends, images = test_batch 
        forecasted_sales, _ = self.forward(category, color, fabric, temporal_features, gtrends, images)
        
        if not hasattr(self, 'validation_step_outputs'):
            self.validation_step_outputs = []
        self.validation_step_outputs.append((item_sales.squeeze(), forecasted_sales.squeeze()))
        
        return item_sales.squeeze(), forecasted_sales.squeeze()

    def on_validation_epoch_end(self):
        if hasattr(self, 'validation_step_outputs'):
            val_step_outputs = self.validation_step_outputs
            item_sales, forecasted_sales = [x[0] for x in val_step_outputs], [x[1] for x in val_step_outputs]
            item_sales, forecasted_sales = torch.stack(item_sales), torch.stack(forecasted_sales)
            rescaled_item_sales, rescaled_forecasted_sales = item_sales*1065, forecasted_sales*1065
            loss = F.mse_loss(item_sales, forecasted_sales.squeeze())
            mae = F.l1_loss(rescaled_item_sales, rescaled_forecasted_sales)
            self.log('val_mae', mae)
            self.log('val_loss', loss)

            print('Validation MAE:', mae.detach().cpu().numpy(), 'LR:', self.optimizers().param_groups[0]['lr'])
            self.validation_step_outputs.clear()

print("✅ GTM 모델 클래스 정의 완료 (decoder layer 수정)")

## 4. 📊 데이터셋 클래스 (data_multitrends.py 내용)

In [None]:
class ZeroShotDataset():
    def __init__(self, data_df, img_root, gtrends, cat_dict, col_dict, fab_dict, trend_len):
        self.data_df = data_df
        self.gtrends = gtrends
        self.cat_dict = cat_dict
        self.col_dict = col_dict
        self.fab_dict = fab_dict
        self.trend_len = trend_len
        self.img_root = img_root

    def __len__(self):
        return len(self.data_df)

    def __getitem__(self, idx):
        return self.data_df.iloc[idx, :]

    def preprocess_data(self):
        data = self.data_df

        gtrends, image_features = [], []
        img_transforms = Compose([Resize((256, 256)), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
        
        for (idx, row) in tqdm(data.iterrows(), total=len(data), ascii=True, desc="데이터 전처리"):
            cat, col, fab, fiq_attr, start_date, img_path = row['category'], row['color'], row['fabric'], row['extra'], \
                row['release_date'], row['image_path']

            # Get the gtrend signal up to the previous year (52 weeks) of the release date
            gtrend_start = start_date - pd.DateOffset(weeks=52)
            cat_gtrend = self.gtrends.loc[gtrend_start:start_date][cat][-52:].values[:self.trend_len]
            col_gtrend = self.gtrends.loc[gtrend_start:start_date][col][-52:].values[:self.trend_len]
            fab_gtrend = self.gtrends.loc[gtrend_start:start_date][fab][-52:].values[:self.trend_len]

            cat_gtrend = MinMaxScaler().fit_transform(cat_gtrend.reshape(-1,1)).flatten()
            col_gtrend = MinMaxScaler().fit_transform(col_gtrend.reshape(-1,1)).flatten()
            fab_gtrend = MinMaxScaler().fit_transform(fab_gtrend.reshape(-1,1)).flatten()
            multitrends = np.vstack([cat_gtrend, col_gtrend, fab_gtrend])

            # Read images
            img = Image.open(os.path.join(self.img_root, img_path)).convert('RGB')

            gtrends.append(multitrends)
            image_features.append(img_transforms(img))

        gtrends = np.array(gtrends)

        # Remove non-numerical information
        data = data.copy()  # 원본 데이터프레임 보존
        data.drop(['external_code', 'season', 'release_date', 'image_path'], axis=1, inplace=True)

        # Create tensors for each part of the input/output
        item_sales, temporal_features = torch.FloatTensor(data.iloc[:, :12].values), torch.FloatTensor(
            data.iloc[:, 13:17].values)
        categories, colors, fabrics = [self.cat_dict[val] for val in data.iloc[:].category.values], \
                                       [self.col_dict[val] for val in data.iloc[:].color.values], \
                                       [self.fab_dict[val] for val in data.iloc[:].fabric.values]

        categories, colors, fabrics = torch.LongTensor(categories), torch.LongTensor(colors), torch.LongTensor(fabrics)
        gtrends = torch.FloatTensor(gtrends)
        images = torch.stack(image_features)

        return TensorDataset(item_sales, categories, colors, fabrics, temporal_features, gtrends, images)

    def get_loader(self, batch_size, train=True):
        print('데이터셋 생성 시작...')
        data_with_gtrends = self.preprocess_data()
        if train:
            data_loader = DataLoader(data_with_gtrends, batch_size=batch_size, shuffle=True, num_workers=2)
        else:
            data_loader = DataLoader(data_with_gtrends, batch_size=1, shuffle=False, num_workers=2)
        print('데이터셋 생성 완료.')
        return data_loader

print("✅ 데이터셋 클래스 정의 완료")

## 5. 📂 데이터 로딩 및 설정

In [None]:
# 데이터셋 경로 설정
dataset_path = Path('/content/drive/MyDrive/GTM-dataset-small/')

# 필요한 파일들 확인
required_files = ['train.csv', 'test.csv', 'gtrends.csv', 'category_labels.pt', 'color_labels.pt', 'fabric_labels.pt']
print("📂 데이터 파일 확인:")
for file in required_files:
    file_path = dataset_path / file
    if file_path.exists():
        size = file_path.stat().st_size
        print(f"  ✅ {file}: {size/1024:.1f} KB")
    else:
        print(f"  ❌ {file}: 파일 없음!")

# 이미지 폴더 확인
image_path = dataset_path / 'images'
if image_path.exists():
    total_images = 0
    print(f"📁 이미지 폴더 구조:")
    for subdir in sorted(image_path.iterdir()):
        if subdir.is_dir():
            subdir_images = list(subdir.glob('*.png')) + list(subdir.glob('*.jpg'))
            print(f"  📂 {subdir.name}: {len(subdir_images)}개")
            total_images += len(subdir_images)
    print(f"  🖼️ 총 이미지: {total_images}개")
else:
    print(f"  ❌ images/ 폴더 없음")

In [None]:
# 데이터 로딩
print("📊 데이터 로딩 중...")
train_df = pd.read_csv(dataset_path / 'train.csv', parse_dates=['release_date'])
test_df = pd.read_csv(dataset_path / 'test.csv', parse_dates=['release_date'])
gtrends = pd.read_csv(dataset_path / 'gtrends.csv', index_col=[0], parse_dates=True)

print(f"  - 훈련 데이터: {len(train_df):,}개")
print(f"  - 테스트 데이터: {len(test_df):,}개")
print(f"  - Google Trends: {len(gtrends):,}개 시점")

# 라벨 딕셔너리 로딩
print("📋 라벨 딕셔너리 로딩 중...")
cat_dict = torch.load(dataset_path / 'category_labels.pt', weights_only=False)
col_dict = torch.load(dataset_path / 'color_labels.pt', weights_only=False)
fab_dict = torch.load(dataset_path / 'fabric_labels.pt', weights_only=False)

print(f"  - 카테고리: {len(cat_dict)}개")
print(f"  - 색상: {len(col_dict)}개")
print(f"  - 소재: {len(fab_dict)}개")

print("\n✅ 모든 데이터 로딩 완료!")

## 6. 🔧 데이터셋 및 DataLoader 생성

In [None]:
# 데이터셋 생성
print("🔄 훈련 데이터셋 생성 중...")
train_dataset = ZeroShotDataset(
    train_df, 
    dataset_path / 'images',
    gtrends, 
    cat_dict, 
    col_dict, 
    fab_dict, 
    trend_len=52
)

print("🔄 테스트 데이터셋 생성 중...")
test_dataset = ZeroShotDataset(
    test_df, 
    dataset_path / 'images',
    gtrends, 
    cat_dict, 
    col_dict, 
    fab_dict, 
    trend_len=52
)

# DataLoader 생성
BATCH_SIZE = 8 if torch.cuda.is_available() else 4

print(f"🔄 DataLoader 생성 중... (배치 크기: {BATCH_SIZE})")
train_loader = train_dataset.get_loader(batch_size=BATCH_SIZE, train=True)
test_loader = test_dataset.get_loader(batch_size=1, train=False)

print(f"✅ 데이터 준비 완료!")
print(f"  - 훈련 배치 수: {len(train_loader)}")
print(f"  - 테스트 배치 수: {len(test_loader)}")

## 7. 🤖 GTM 모델 생성 및 설정

In [None]:
# GTM 모델 생성
print("🎯 GTM 모델 생성 중...")

model = GTM(
    embedding_dim=32,
    hidden_dim=64,
    output_dim=12,
    num_heads=4,
    num_layers=1,
    use_text=True,
    use_img=True,
    cat_dict=cat_dict,
    col_dict=col_dict,
    fab_dict=fab_dict,
    trend_len=52,
    num_trends=3,
    gpu_num=0,
    use_encoder_mask=1,
    autoregressive=False
)

print(f"✅ GTM 모델 생성 완료!")
print(f"📊 모델 파라미터: {sum(p.numel() for p in model.parameters()):,}")

# 첫 번째 배치로 포워드 패스 테스트
print("\n🔬 포워드 패스 테스트...")
try:
    sample_batch = next(iter(train_loader))
    item_sales, category, color, fabric, temporal_features, gtrends_batch, images = sample_batch
    
    print(f"  입력 shape:")
    print(f"    - item_sales: {item_sales.shape}")
    print(f"    - images: {images.shape}")
    print(f"    - gtrends: {gtrends_batch.shape}")
    
    model.eval()
    with torch.no_grad():
        output, attn = model(category, color, fabric, temporal_features, gtrends_batch, images)
        
    print(f"  출력 shape: {output.shape}")
    print(f"  ✅ 포워드 패스 성공!")
except Exception as e:
    print(f"❌ 포워드 패스 실패: {e}")
    import traceback
    traceback.print_exc()

## 8. ⚡ PyTorch Lightning Trainer 설정 및 훈련

In [None]:
# Trainer 설정
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger

EPOCHS = 5  # 빠른 실험을 위해
ACCELERATOR = 'gpu' if torch.cuda.is_available() else 'cpu'

# 체크포인트 콜백
checkpoint_callback = ModelCheckpoint(
    dirpath='./checkpoints/',
    filename='gtm-all-in-one-{epoch:02d}-{val_mae:.2f}',
    monitor='val_mae',
    mode='min',
    save_top_k=2,
    verbose=True
)

# CSV 로거
csv_logger = CSVLogger(
    save_dir='./logs/',
    name='gtm_all_in_one'
)

# Trainer 생성
trainer = pl.Trainer(
    devices=1,
    accelerator=ACCELERATOR,
    max_epochs=EPOCHS,
    check_val_every_n_epoch=1,
    logger=csv_logger,
    callbacks=[checkpoint_callback],
    enable_progress_bar=True,
    log_every_n_steps=20
)

print(f"🚀 Trainer 설정 완료!")
print(f"  - 가속기: {ACCELERATOR}")
print(f"  - 에포크: {EPOCHS}")
print(f"  - 배치 크기: {BATCH_SIZE}")

In [None]:
# 모델 훈련 실행
print("🚀 GTM 모델 훈련 시작!")
print("=" * 50)

try:
    trainer.fit(
        model, 
        train_dataloaders=train_loader,
        val_dataloaders=test_loader
    )
    
    print("\n🎉 훈련 완료!")
    print(f"💾 최고 모델: {checkpoint_callback.best_model_path}")
    
except Exception as e:
    print(f"\n❌ 훈련 실패: {e}")
    import traceback
    traceback.print_exc()

## 9. 📊 결과 시각화

In [None]:
import matplotlib.pyplot as plt

# 훈련 메트릭 시각화
log_dir = './logs/gtm_all_in_one/'
version_dirs = [d for d in os.listdir(log_dir) if d.startswith('version_')]

if version_dirs:
    latest_version = max(version_dirs, key=lambda x: int(x.split('_')[1]))
    metrics_path = os.path.join(log_dir, latest_version, 'metrics.csv')
    
    if os.path.exists(metrics_path):
        metrics_df = pd.read_csv(metrics_path)
        
        # 메트릭 플롯
        fig, axes = plt.subplots(1, 2, figsize=(15, 5))
        
        # Loss 플롯
        train_loss = metrics_df.dropna(subset=['train_loss'])
        val_loss = metrics_df.dropna(subset=['val_loss'])
        
        if len(train_loss) > 0:
            axes[0].plot(train_loss['step'], train_loss['train_loss'], label='Training Loss', alpha=0.7)
        if len(val_loss) > 0:
            axes[0].plot(val_loss['step'], val_loss['val_loss'], label='Validation Loss', marker='o')
        axes[0].set_title('📉 Training/Validation Loss')
        axes[0].set_xlabel('Steps')
        axes[0].set_ylabel('Loss')
        axes[0].legend()
        axes[0].grid(True, alpha=0.3)
        
        # MAE 플롯
        val_mae = metrics_df.dropna(subset=['val_mae'])
        if len(val_mae) > 0:
            axes[1].plot(val_mae['step'], val_mae['val_mae'], label='Validation MAE', marker='s', color='red')
            final_mae = val_mae['val_mae'].iloc[-1]
            print(f"🎯 최종 Validation MAE: {final_mae:.2f}")
        
        axes[1].set_title('📊 Validation MAE')
        axes[1].set_xlabel('Steps')
        axes[1].set_ylabel('MAE')
        axes[1].legend()
        axes[1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
    else:
        print("⚠️ 메트릭 파일을 찾을 수 없습니다.")
else:
    print("⚠️ 로그 디렉토리를 찾을 수 없습니다.")

## 10. 🔮 예측 테스트

In [None]:
# 모델 예측 테스트
model.eval()

with torch.no_grad():
    sample_batch = next(iter(test_loader))
    item_sales, category, color, fabric, temporal_features, gtrends_batch, images = sample_batch
    
    # 예측 수행
    predictions, attention_weights = model(category, color, fabric, temporal_features, gtrends_batch, images)
    
    # 정규화 해제 (1065는 정규화 팩터)
    actual_sales = item_sales * 1065
    predicted_sales = predictions * 1065

# 예측 시각화
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.flatten()

for i in range(min(4, len(predictions))):
    actual = actual_sales[i].cpu().numpy()
    predicted = predicted_sales[i].cpu().numpy()
    
    axes[i].plot(months, actual, label='실제 매출', marker='o', linewidth=2)
    axes[i].plot(months, predicted, label='예측 매출', marker='s', linewidth=2, alpha=0.8)
    axes[i].set_title(f'All-in-One 예측 결과 {i+1}')
    axes[i].legend()
    axes[i].grid(True, alpha=0.3)
    axes[i].tick_params(axis='x', rotation=45)
    
    mae = np.mean(np.abs(actual - predicted))
    axes[i].text(0.02, 0.98, f'MAE: {mae:.1f}', transform=axes[i].transAxes, 
                bbox=dict(boxstyle="round,pad=0.3", facecolor="yellow", alpha=0.7),
                verticalalignment='top')

plt.tight_layout()
plt.show()

# 전체 성능
overall_mae = np.mean(np.abs(actual_sales.cpu().numpy() - predicted_sales.cpu().numpy()))
print(f"🔮 All-in-One 전체 예측 MAE: {overall_mae:.2f}")

## 11. 💾 결과 저장

In [None]:
# Google Drive에 결과 저장
drive_save_path = '/content/drive/MyDrive/GTM-Results-All-In-One/'
os.makedirs(drive_save_path, exist_ok=True)

# 최고 모델을 Google Drive에 복사
if checkpoint_callback.best_model_path:
    import shutil
    best_model_name = f"gtm_all_in_one_best_{pd.Timestamp.now().strftime('%Y%m%d_%H%M')}.ckpt"
    shutil.copy2(checkpoint_callback.best_model_path, drive_save_path + best_model_name)
    print(f"💾 최고 모델 저장: {drive_save_path + best_model_name}")

# 메트릭 CSV도 저장
if 'metrics_path' in locals() and os.path.exists(metrics_path):
    shutil.copy2(metrics_path, drive_save_path + 'training_metrics.csv')
    print(f"📊 훈련 메트릭 저장: {drive_save_path}training_metrics.csv")

print("\n✅ 모든 결과가 Google Drive에 저장되었습니다!")
print(f"📂 저장 위치: {drive_save_path}")

print("\n🎉 All-in-One GTM 모델 훈련 및 테스트 완료!")
print("📝 이 노트북은 외부 파일 없이 독립적으로 실행됩니다.")

## 📋 사용 가이드

### ✅ 실행 전 준비사항
1. Google Drive에 `GTM-dataset-small` 폴더 업로드
2. GPU 런타임 설정 (런타임 → 런타임 유형 변경 → GPU)

### 🚀 실행 방법
1. 모든 셀을 순서대로 실행
2. 첫 번째 셀에서 Google Drive 마운트 허용
3. 자동으로 모든 과정 진행

### 📊 예상 실행 시간
- 데이터 로딩: ~2-3분
- 모델 훈련 (5 에포크): ~5-10분
- 전체: ~10-15분

### 💡 특징
- ✅ 외부 .py 파일 불필요
- ✅ 모든 코드가 노트북 내 포함
- ✅ Gradient 문제 해결됨
- ✅ TransformerDecoder 호환성 해결
- ✅ 작은 데이터셋으로 빠른 실험

### 🔗 GitHub 저장소
https://github.com/LeeSaeBom/GTM-Transformer-Jupyter