In [None]:
# =====================================================================
# Approach 1 (bubble → non-bubble): INFERENCE on a non-bubble CSV
# - Loads saved package (works for bilstm/transformer).
# - Prints mean/latest prob and simple trend.
# =====================================================================

import torch, torch.nn as nn, numpy as np, pandas as pd, warnings, os, time, math
warnings.filterwarnings("ignore")

# Colab detection
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ---- Encoder classes (must match training)
class EncoderBiLSTM(nn.Module):
    def __init__(self, in_dim, emb=128):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, emb, num_layers=2, bidirectional=True, batch_first=True)
        self.fc   = nn.Linear(emb*2, emb)
    def forward(self, x):
        _, (h, _) = self.lstm(x)
        h = torch.cat([h[-2], h[-1]], dim=1)
        return nn.functional.normalize(self.fc(h), dim=1)

class SinusoidalPE(nn.Module):
    def __init__(self, d_model, max_len=512):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        pos = torch.arange(0, max_len).unsqueeze(1)
        div = torch.exp(torch.arange(0, d_model, 2)*(-math.log(10000.0)/d_model))
        pe[:, 0::2] = torch.sin(pos * div); pe[:, 1::2] = torch.cos(pos * div)
        self.register_buffer("pe", pe.unsqueeze(0))
    def forward(self, x): return x + self.pe[:, :x.size(1)]

class EncoderTransformer(nn.Module):
    def __init__(self, in_dim, emb=128, nhead=4, num_layers=2, dropout=0.1, pool="last"):
        super().__init__()
        self.input = nn.Linear(in_dim, emb)
        layer = nn.TransformerEncoderLayer(d_model=emb, nhead=nhead, batch_first=True,
                                           dropout=dropout, norm_first=True)
        self.enc = nn.TransformerEncoder(layer, num_layers=num_layers)
        self.pos = SinusoidalPE(emb)
        self.pool = pool
        self.cls = nn.Parameter(torch.zeros(1,1,emb)) if pool == "cls" else None
    def forward(self, x):
        h = self.input(x)
        if self.cls is not None:
            cls = self.cls.expand(x.size(0), -1, -1)
            h = torch.cat([cls, h], dim=1)
        h = self.enc(self.pos(h))
        if self.pool == "cls":   z = h[:, 0, :]
        elif self.pool == "mean": z = h.mean(dim=1)
        else:                    z = h[:, -1, :]
        return nn.functional.normalize(z, dim=1)

class BubbleDetector(nn.Module):
    def __init__(self, in_dim, emb=128, kind="bilstm", pool="last", nhead=4, num_layers=2, dropout=0.1):
        super().__init__()
        if kind == "bilstm":
            self.encoder = EncoderBiLSTM(in_dim, emb)
        elif kind == "transformer":
            self.encoder = EncoderTransformer(in_dim, emb, nhead, num_layers, dropout, pool)
        else:
            raise ValueError(f"Unknown encoder: {kind}")
        self.classifier = nn.Sequential(
            nn.Linear(emb, 64), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(64, 32), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(32, 1), nn.Sigmoid()
        )
    def forward(self, x):
        z = self.encoder(x); prob = self.classifier(z)
        return z, prob.squeeze()
    @torch.no_grad()
    def get_probability(self, x):
        _, p = self.forward(x); return p

def get_risk_interpretation(prob):
    if prob >= 0.8: return "🔴 Very High Risk"
    if prob >= 0.6: return "🟠 High Risk"
    if prob >= 0.4: return "🟡 Moderate Risk"
    if prob >= 0.2: return "🟢 Low Risk"
    return "🔵 Very Low Risk"

def calculate_all_probabilities(csv_path, model, scalers, info):
    df = pd.read_csv(csv_path, parse_dates=["Date"])
    if "PPIACO" in df.columns and "PPI" not in df.columns:
        df.rename(columns={"PPIACO":"PPI"}, inplace=True)
    need_cols  = scalers["need_cols"]
    macro_cols = scalers["macro_cols"]
    dow_cols   = scalers["dow_cols"]

    df_clean = df.dropna(subset=need_cols).reset_index(drop=True)
    window = info.get("window", 24)
    if len(df_clean) < window:
        print(f"❗ Warning: only {len(df_clean)} rows (need {window}). Using all rows.")
        window = len(df_clean)
        if window == 0:
            return []

    Xm = scalers["sc_macro"].transform(df_clean[macro_cols]).astype("float32")
    Xd = scalers["sc_dow"].transform(df_clean[dow_cols]).astype("float32")

    probs = []
    for t in range(len(df_clean) - window + 1):
        seq = np.hstack([Xm[t:t+window], Xd[t:t+window]])
        seq = torch.tensor(seq).unsqueeze(0).to(DEVICE)
        probs.append(model.get_probability(seq).cpu().item())
    return probs

# ---- Load package
if IN_COLAB:
    from google.colab import files
    print("📦 Upload 'bubble_model_package.pth' (trained on 4 bubbles)")
    up = files.upload()
    package_path = next(iter(up))
else:
    package_path = "bubble_model_package.pth"

package = torch.load(package_path, map_location=DEVICE, weights_only=False)
cfg     = package["model_config"]
scalers = package["scalers"]

print(f"✅ Loaded package | encoder={cfg['encoder_kind']} | emb={cfg['emb']} | pool={cfg['pool']} | trained_on={cfg.get('train_prototypes', [])}")

# Build model
model = BubbleDetector(in_dim=cfg.get("in_dim",6),
                       emb=cfg["emb"],
                       kind=cfg["encoder_kind"],
                       pool=cfg.get("pool","last"),
                       nhead=cfg.get("transformer",{}).get("nhead",4),
                       num_layers=cfg.get("transformer",{}).get("num_layers",2),
                       dropout=cfg.get("transformer",{}).get("dropout",0.1)).to(DEVICE)
model.load_state_dict(package["model_state_dict"])
model.eval()

# ---- Upload ONE non-bubble CSV to analyze
if IN_COLAB:
    print("\n📂 Upload ONE non-bubble CSV to evaluate (session k of 4):")
    up2 = files.upload()
    csv_path = next(iter(up2))
else:
    csv_path = "YOUR_NON_BUBBLE.csv"

t0 = time.time()
probs = calculate_all_probabilities(csv_path, model, scalers, cfg)
dt = (time.time() - t0)*1000

print("\n" + "="*70)
print(f"🔎 File: {os.path.basename(csv_path)}")
if not probs:
    print("❌ Not enough clean rows to analyze.")
else:
    mean_prob = float(np.mean(probs))
    latest    = float(probs[-1])
    trend = "N/A"
    if len(probs) > 1:
        slope = np.polyfit(range(len(probs)), probs, 1)[0]
        trend = "Increasing ↗️" if slope > 0.01 else ("Decreasing ↘️" if slope < -0.01 else "Stable ↔️")
    print(f"📈 Mean Bubble Probability:   {mean_prob:.4f}")
    print(f"📉 Latest Bubble Probability: {latest:.4f}")
    print(f"💬 Interpretation:            {get_risk_interpretation(latest)}")
    print(f"📊 Probability Trend:         {trend}")
    print(f"⏱️ Processing Time:           {dt:.2f} ms")
print("="*70)

📦 Upload 'bubble_model_package.pth' (trained on 4 bubbles)


Saving bubble_model_package.pth to bubble_model_package (2).pth
✅ Loaded package | encoder=transformer | emb=128 | pool=last | trained_on=['Merged_Nifty_Fifty', 'Merged_Black_Monday', 'Merged_Dot_Com', 'Merged_Subprime_Bubble']

📂 Upload ONE non-bubble CSV to evaluate (session k of 4):
