In [None]:
# =====================================================================
# ①  Upload & Save (Two-stage: Non-bubble then Bubble files)
# =====================================================================
from google.colab import files
import pathlib, glob, os, warnings, re, collections
warnings.filterwarnings("ignore")

DATA_DIR = "/content/economic_indicators"
pathlib.Path(DATA_DIR).mkdir(exist_ok=True)

# Stage 1: Upload non-bubble files
print("📤 STAGE 1: Please upload 4 NON-BUBBLE era CSV files")
print("   These should be economic indicators from normal/stable periods")
uploaded_non_bubble = files.upload()
assert len(uploaded_non_bubble) == 4, f"❗ Expected 4 non-bubble files, got {len(uploaded_non_bubble)}"

non_bubble_files = list(uploaded_non_bubble.keys())
for n, d in uploaded_non_bubble.items():
    with open(f"{DATA_DIR}/{n}", "wb") as f:
        f.write(d)
print(f"✅ Saved {len(non_bubble_files)} non-bubble files")

# Stage 2: Upload bubble files
print("\n📤 STAGE 2: Please upload 3 BUBBLE era CSV files")
print("   These should be economic indicators from bubble periods")
uploaded_bubble = files.upload()
assert len(uploaded_bubble) == 3, f"❗ Expected 3 bubble files, got {len(uploaded_bubble)}"

bubble_files = list(uploaded_bubble.keys())
for n, d in uploaded_bubble.items():
    with open(f"{DATA_DIR}/{n}", "wb") as f:
        f.write(d)
print(f"✅ Saved {len(bubble_files)} bubble files")

# Summary
print(f"\n📝 Total files saved: {len(glob.glob(f'{DATA_DIR}/*.csv'))}")
print(f"✅ Non-bubble files (4): {non_bubble_files}")
print(f"✅ Bubble files (3): {bubble_files}")

# =====================================================================
# ②  Deduplication + PPIACO→PPI + Column Validation
# =====================================================================
import pandas as pd

# Process all files
raw_paths = glob.glob(os.path.join(DATA_DIR, "*.csv"))
dedup = collections.OrderedDict()
for p in sorted(raw_paths):
    key = re.sub(r"\s*\(\d+\)", "", os.path.basename(p))
    if key not in dedup:
        dedup[key] = p
paths = list(dedup.values())

# PPIACO → PPI renaming
for p in paths:
    df_tmp = pd.read_csv(p)
    if "PPIACO" in df_tmp.columns and "PPI" not in df_tmp.columns:
        df_tmp.rename(columns={"PPIACO": "PPI"}, inplace=True)
        df_tmp.to_csv(p, index=False)

# Required columns (GDP removed as in original)
need_cols  = ["Date","CPI","PPI","FEDFUNDS","DGS10","DJIA","SP500_PE"]
macro_cols = ["CPI","PPI","FEDFUNDS","DGS10"]
dow_cols   = ["DJIA","SP500_PE"]

# Validate columns
assert len(paths) == 7, f"❗ Found {len(paths)} CSV files — need exactly 7!"
for p in paths:
    miss = set(need_cols) - set(pd.read_csv(p, nrows=1).columns)
    assert not miss, f"{os.path.basename(p)} missing columns: {miss}"
print("✅ 7 files validated with all required columns")

# =====================================================================
# ③  Libraries & Data Preprocessing
# =====================================================================
!pip -q install tsaug
import numpy as np, torch, torch.nn as nn, torch.optim as optim, random
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from tsaug import TimeWarp, Drift, AddNoise

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
WINDOW = 24  # 24 months (2 years) window

# Combine all CSVs with bubble/non-bubble labels
df_list = []
for p in paths:
    tmp = pd.read_csv(p, parse_dates=["Date"])
    fname = os.path.basename(p)
    proto = fname.replace(".csv", "")
    tmp["Prototype"] = proto

    # Assign label based on file type
    if fname in bubble_files:
        tmp["IsBubble"] = 1
    elif fname in non_bubble_files:
        tmp["IsBubble"] = 0
    else:
        raise ValueError(f"File {fname} not classified as bubble or non-bubble!")

    df_list.append(tmp)

# Merge and clean
df = (pd.concat(df_list, ignore_index=True)
        .sort_values("Date")
        .dropna(subset=need_cols)  # Retain original missing value handling
        .reset_index(drop=True))

# Scaling
sc_macro = StandardScaler().fit(df[macro_cols])
sc_dow   = StandardScaler().fit(df[dow_cols])
Xm_all   = sc_macro.transform(df[macro_cols]).astype("float32")
Xd_all   = sc_dow.transform(df[dow_cols]).astype("float32")
prot_codes = pd.Categorical(df["Prototype"]).codes
labels_all = df["IsBubble"].values.astype("float32")

print(f"\n📊 Training prototypes (7 files): {df['Prototype'].unique()}")
print(f"📊 Total data points: {len(df)}")
print(f"📊 Bubble data points: {(labels_all == 1).sum()}")
print(f"📊 Non-bubble data points: {(labels_all == 0).sum()}")

# =====================================================================
# ④  Contrastive Dataset & DataLoader (ORIGINAL LOGIC PRESERVED)
# =====================================================================
aug = TimeWarp(n_speed_change=3, max_speed_ratio=2.0) + \
      Drift(max_drift=(0,0.1)) + \
      AddNoise(scale=0.01)

class ContrastiveDataset(Dataset):
    def __init__(self, Xm, Xd, codes, labels, win=24):
        self.Xm, self.Xd, self.codes, self.labels, self.win = Xm, Xd, codes, labels, win
        # Find valid window starts (same file/prototype)
        self.starts = [i for b in np.unique(codes)
                       for i in np.where(codes==b)[0][:-win]
                       if i+win in np.where(codes==b)[0]]

    def __len__(self): return len(self.starts)

    def __getitem__(self, idx):
        s = self.starts[idx]
        # Original anchor
        anc = np.hstack([self.Xm[s:s+self.win], self.Xd[s:s+self.win]])
        # Augmented alternative
        alt = np.hstack([aug.augment(self.Xm[s:s+self.win]),
                         aug.augment(self.Xd[s:s+self.win])])
        # Get label for this window (all points in window have same label)
        label = self.labels[s]
        return torch.tensor(anc), torch.tensor(alt), torch.tensor(label)

train_ds   = ContrastiveDataset(Xm_all, Xd_all, prot_codes, labels_all, WINDOW)
batch_size = min(64, len(train_ds))
loader     = DataLoader(train_ds, batch_size=batch_size,
                        shuffle=True, drop_last=False)
print(f"train_ds={len(train_ds)}, batch_size={batch_size}, len(loader)={len(loader)}")

# =====================================================================
# ⑤  [ORIGINAL] Encoder + Bubble Classifier Architecture
# =====================================================================
class Encoder(nn.Module):
    def __init__(self, in_dim, emb=128):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, emb, 2, bidirectional=True, batch_first=True)
        self.fc   = nn.Linear(emb*2, emb)
    def forward(self, x):
        _, (h, _) = self.lstm(x)
        h = torch.cat([h[-2], h[-1]], 1)
        return nn.functional.normalize(self.fc(h), dim=1)

class BubbleDetector(nn.Module):
    """Combined model with encoder and probability classifier"""
    def __init__(self, in_dim, emb=128):
        super().__init__()
        self.encoder = Encoder(in_dim, emb)
        # Classification head: outputs probability score
        self.classifier = nn.Sequential(
            nn.Linear(emb, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1),
            nn.Sigmoid()  # Ensures output is in [0, 1]
        )

    def forward(self, x):
        z = self.encoder(x)
        prob = self.classifier(z)
        return z, prob.squeeze()

    def get_probability(self, x):
        """Just return the probability score"""
        with torch.no_grad():
            _, prob = self.forward(x)
        return prob

# Initialize model
model = BubbleDetector(in_dim=len(macro_cols)+len(dow_cols), emb=128).to(DEVICE)

# =====================================================================
# ⑥  [ORIGINAL] Training with Contrastive + Classification Loss
# =====================================================================
temperature = 0.05
def ntxent(z1, z2):
    z1 = nn.functional.normalize(z1, dim=1)
    z2 = nn.functional.normalize(z2, dim=1)
    sim = torch.mm(z1, z2.t()) / temperature
    labels = torch.arange(z1.size(0), device=z1.device)
    return nn.CrossEntropyLoss()(sim, labels)

# Binary cross-entropy for classification
bce_loss = nn.BCELoss()

opt = optim.Adam(model.parameters(), lr=3e-4)
EPOCHS = 300

print(f"\n🚀 Starting training with bubble detection classifier")
print(f"   - Non-bubble labels → 0")
print(f"   - Bubble labels → 1")

for ep in range(1, EPOCHS+1):
    model.train()
    tot_loss = 0

    for anc, alt, labels in loader:
        anc, alt, labels = anc.to(DEVICE), alt.to(DEVICE), labels.to(DEVICE)

        # Forward pass
        z1, prob1 = model(anc)
        z2, prob2 = model(alt)

        # Contrastive loss (for representation learning)
        cont_loss = ntxent(z1, z2)

        # Classification loss - NOW USING ACTUAL LABELS (0 or 1)
        class_loss = bce_loss(prob1, labels) + bce_loss(prob2, labels)

        # Combined loss
        loss = cont_loss + 0.5 * class_loss  # Weight the classification loss

        opt.zero_grad()
        loss.backward()
        opt.step()
        tot_loss += loss.item()

    if ep % 10 == 0 or ep == 1:
        print(f"Epoch {ep:03d} | loss {tot_loss/len(loader):.4f}")

📤 STAGE 1: Please upload 4 NON-BUBBLE era CSV files
   These should be economic indicators from normal/stable periods


TypeError: 'NoneType' object is not subscriptable

In [None]:
# =====================================================================
# ⑨  Save and Download Model Package
# =====================================================================
import torch
from google.colab import files

# Create a dictionary to hold everything needed for inference
model_package = {
    'model_state_dict': model.state_dict(),
    'model_config': {
        'in_dim': len(macro_cols) + len(dow_cols),
        'emb': 128 # The embedding size used during training
    },
    'scalers': {
        'sc_macro': sc_macro,
        'sc_dow': sc_dow
    },
    'training_info': {
        'need_cols': need_cols,
        'macro_cols': macro_cols,
        'dow_cols': dow_cols,
        'window': WINDOW
    }
}

PACKAGE_SAVE_PATH = "bubble_vs_nonbubble_model.pth"

# Save the entire package to a single file
torch.save(model_package, PACKAGE_SAVE_PATH)

print(f"\n✅ Model package saved to: {PACKAGE_SAVE_PATH}")
print("📦 Package contains: Model Weights, Config, Scalers, and Training Info.")

# Trigger the download in your browser
files.download(PACKAGE_SAVE_PATH)


✅ Model package saved to: bubble_vs_nonbubble_model.pth
📦 Package contains: Model Weights, Config, Scalers, and Training Info.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>