In [None]:
# =====================================================================
# ①  업로드 & 저장  (3개 merged_*.csv 선택)
# =====================================================================
from google.colab import files
import pathlib, glob, os, warnings, re, collections
warnings.filterwarnings("ignore")

DATA_DIR = "/content/merged_csvs"
pathlib.Path(DATA_DIR).mkdir(exist_ok=True)

uploaded = files.upload()                                  # ★ 3개 파일만 업로드
for n, d in uploaded.items():
    with open(f"{DATA_DIR}/{n}", "wb") as f:
        f.write(d)
print("📝 Saved:", glob.glob(f"{DATA_DIR}/*.csv"))

# =====================================================================
# ②  중복 이름 정리 + PPIACO→PPI 통일 + 필수 열 검증 (3개 파일)
# =====================================================================
import pandas as pd

raw_paths = glob.glob(os.path.join(DATA_DIR, "*.csv"))
dedup = collections.OrderedDict()
for p in sorted(raw_paths):                                # 알파벳순 → 사본(1) 뒤로
    key = re.sub(r"\s*\(\d+\)", "", os.path.basename(p))
    if key not in dedup:                                   # 원본 1개만 채택
        dedup[key] = p
paths = list(dedup.values())

# PPIACO → PPI
for p in paths:
    df_tmp = pd.read_csv(p)
    if "PPIACO" in df_tmp.columns and "PPI" not in df_tmp.columns:
        df_tmp.rename(columns={"PPIACO": "PPI"}, inplace=True)
        df_tmp.to_csv(p, index=False)

# 필수 열(GDP 제거)
need_cols  = ["Date","CPI","PPI","FEDFUNDS","DGS10","DJIA","SP500_PE"]
macro_cols = ["CPI","PPI","FEDFUNDS","DGS10"]
dow_cols   = ["DJIA","SP500_PE"]

assert len(paths) == 3, f"❗ 현재 {len(paths)}개 CSV — 정확히 3개 필요!"
for p in paths:
    miss = set(need_cols) - set(pd.read_csv(p, nrows=1).columns)
    assert not miss, f"{os.path.basename(p)} 누락 열: {miss}"
print("✅ 3개 파일·필수 열 확인 완료:", [os.path.basename(p) for p in paths])

# =====================================================================
# ③  라이브러리 & 데이터 전처리
# =====================================================================
!pip -q install tsaug
import numpy as np, torch, torch.nn as nn, torch.optim as optim, random
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tsaug import TimeWarp, Drift, AddNoise

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
WINDOW = 24                         ## 24개월(2년) 윈도우

# CSV 합치기
df_list = []
for p in paths:
    tmp = pd.read_csv(p, parse_dates=["Date"])
    proto = os.path.basename(p).replace(".csv", "")
    tmp["Prototype"] = proto
    df_list.append(tmp)
df = (pd.concat(df_list, ignore_index=True)
        .sort_values("Date")
        .dropna(subset=need_cols)
        .reset_index(drop=True))

# 스케일링
sc_macro = StandardScaler().fit(df[macro_cols])
sc_dow   = StandardScaler().fit(df[dow_cols])
Xm_all   = sc_macro.transform(df[macro_cols]).astype("float32")
Xd_all   = sc_dow.transform(df[dow_cols]).astype("float32")
prot_codes = pd.Categorical(df["Prototype"]).codes

print(f"📊 Training prototypes (3개): {df['Prototype'].unique()}")
print(f"📊 Total data points: {len(df)}")

# =====================================================================
# ④  Contrastive Dataset & DataLoader
# =====================================================================
aug = TimeWarp(n_speed_change=3, max_speed_ratio=2.0) + \
      Drift(max_drift=(0,0.1)) + \
      AddNoise(scale=0.01)

class ContrastiveDataset(Dataset):
    def __init__(self, Xm, Xd, codes, win=24):  ## Default window to 24
        self.Xm, self.Xd, self.codes, self.win = Xm, Xd, codes, win
        self.starts = [i for b in np.unique(codes)
                       for i in np.where(codes==b)[0][:-win]
                       if i+win in np.where(codes==b)[0]]
    def __len__(self): return len(self.starts)
    def __getitem__(self, idx):
        s = self.starts[idx]
        anc = np.hstack([self.Xm[s:s+self.win], self.Xd[s:s+self.win]])
        alt = np.hstack([aug.augment(self.Xm[s:s+self.win]),
                         aug.augment(self.Xd[s:s+self.win])])
        return torch.tensor(anc), torch.tensor(alt)

train_ds   = ContrastiveDataset(Xm_all, Xd_all, prot_codes, WINDOW)
batch_size = min(64, len(train_ds))          # 데이터보다 크지 않게
loader     = DataLoader(train_ds, batch_size=batch_size,
                        shuffle=True, drop_last=False)
print(f"train_ds={len(train_ds)}, batch_size={batch_size}, len(loader)={len(loader)}")

# =====================================================================
# ⑤  [REVISED] Encoder + Bubble Classifier Architecture
# =====================================================================
class Encoder(nn.Module):
    def __init__(self, in_dim, emb=128):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, emb, 2, bidirectional=True, batch_first=True)
        self.fc   = nn.Linear(emb*2, emb)
    def forward(self, x):
        _, (h, _) = self.lstm(x)
        h = torch.cat([h[-2], h[-1]], 1)
        return nn.functional.normalize(self.fc(h), dim=1)

class BubbleDetector(nn.Module):
    """Combined model with encoder and probability classifier"""
    def __init__(self, in_dim, emb=128):
        super().__init__()
        self.encoder = Encoder(in_dim, emb)
        # Classification head: outputs probability score
        self.classifier = nn.Sequential(
            nn.Linear(emb, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1),
            nn.Sigmoid()  # Ensures output is in [0, 1]
        )

    def forward(self, x):
        z = self.encoder(x)
        prob = self.classifier(z)
        return z, prob.squeeze()

    def get_probability(self, x):
        """Just return the probability score"""
        with torch.no_grad():
            _, prob = self.forward(x)
        return prob

# Initialize model
model = BubbleDetector(in_dim=len(macro_cols)+len(dow_cols), emb=128).to(DEVICE)

# =====================================================================
# ⑥  [REVISED] Training with Contrastive + Classification Loss
# =====================================================================
temperature = 0.05
def ntxent(z1, z2):
    z1 = nn.functional.normalize(z1, dim=1)
    z2 = nn.functional.normalize(z2, dim=1)
    sim = torch.mm(z1, z2.t()) / temperature
    labels = torch.arange(z1.size(0), device=z1.device)
    return nn.CrossEntropyLoss()(sim, labels)

# Binary cross-entropy for classification
bce_loss = nn.BCELoss()

opt = optim.Adam(model.parameters(), lr=3e-4)
EPOCHS = 300

print(f"\n🚀 Starting training with bubble detection classifier")
for ep in range(1, EPOCHS+1):
    model.train()
    tot_loss = 0

    for anc, alt in loader:
        anc, alt = anc.to(DEVICE), alt.to(DEVICE)

        # Forward pass
        z1, prob1 = model(anc)
        z2, prob2 = model(alt)

        # Contrastive loss (for representation learning)
        cont_loss = ntxent(z1, z2)

        # Classification loss
        # Since all training data are bubble periods, label them as 1 (bubble)
        bubble_labels = torch.ones(anc.size(0), device=DEVICE)
        class_loss = bce_loss(prob1, bubble_labels) + bce_loss(prob2, bubble_labels)

        # Combined loss
        loss = cont_loss + 0.5 * class_loss  # Weight the classification loss

        opt.zero_grad()
        loss.backward()
        opt.step()
        tot_loss += loss.item()

    if ep % 10 == 0 or ep == 1:
        print(f"Epoch {ep:03d} | loss {tot_loss/len(loader):.4f}")

Saving Merged_Subprime_Bubble.csv to Merged_Subprime_Bubble.csv
Saving Merged_Black_Monday.csv to Merged_Black_Monday.csv
Saving Merged_Nifty_Fifty.csv to Merged_Nifty_Fifty.csv
📝 Saved: ['/content/merged_csvs/Merged_Black_Monday.csv', '/content/merged_csvs/Merged_Nifty_Fifty.csv', '/content/merged_csvs/Merged_Subprime_Bubble.csv']
✅ 3개 파일·필수 열 확인 완료: ['Merged_Black_Monday.csv', 'Merged_Nifty_Fifty.csv', 'Merged_Subprime_Bubble.csv']
📊 Training prototypes (3개): ['Merged_Nifty_Fifty' 'Merged_Black_Monday' 'Merged_Subprime_Bubble']
📊 Total data points: 120
train_ds=48, batch_size=48, len(loader)=1

🚀 Starting training with bubble detection classifier
Epoch 001 | loss 3.8496
Epoch 010 | loss 2.6379
Epoch 020 | loss 2.3320
Epoch 030 | loss 2.0593
Epoch 040 | loss 1.8107
Epoch 050 | loss 1.5816
Epoch 060 | loss 1.4309
Epoch 070 | loss 1.3234
Epoch 080 | loss 1.2461
Epoch 090 | loss 1.1566
Epoch 100 | loss 1.0648
Epoch 110 | loss 0.9360
Epoch 120 | loss 0.8100
Epoch 130 | loss 0.7136
Epoch 1

In [1]:
# --- Add this to your training script after training is done ---

# 1. Consolidate necessary info
package = {
    'model_config': {
        'in_dim': len(macro_cols) + len(dow_cols),
        'emb': 128,
        'window': WINDOW
    },
    'model_state_dict': model.state_dict(),
    'scalers': {
        'sc_macro': sc_macro,
        'sc_dow': sc_dow,
        'need_cols': need_cols,
        'macro_cols': macro_cols,
        'dow_cols': dow_cols
    }
}

# 2. Save to a single file
save_path = 'bubble_model_package.pth'
torch.save(package, save_path)

print(f"✅ Model and scalers saved to '{save_path}'")

# 3. (Optional, for Colab) Download the file
try:
  from google.colab import files
  files.download(save_path)
except ImportError:
  pass

NameError: name 'macro_cols' is not defined