In [None]:
# =====================================================================
# INFERENCE SCRIPT FOR BUBBLE VS. NON-BUBBLE MODEL
# =====================================================================
import torch, torch.nn as nn, numpy as np, pandas as pd
from google.colab import files
import warnings
warnings.filterwarnings("ignore")

# --- 1. Define the exact same model architecture ---
# This is required to reconstruct the model from the saved weights.
class Encoder(nn.Module):
    def __init__(self, in_dim, emb=128):
        super().__init__()
        self.lstm = nn.LSTM(in_dim, emb, 2, bidirectional=True, batch_first=True)
        self.fc   = nn.Linear(emb*2, emb)
    def forward(self, x):
        _, (h, _) = self.lstm(x)
        h = torch.cat([h[-2], h[-1]], 1)
        return nn.functional.normalize(self.fc(h), dim=1)

class BubbleDetector(nn.Module):
    def __init__(self, in_dim, emb=128):
        super().__init__()
        self.encoder = Encoder(in_dim, emb)
        self.classifier = nn.Sequential(
            nn.Linear(emb, 64), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(64, 32), nn.ReLU(), nn.Dropout(0.2),
            nn.Linear(32, 1), nn.Sigmoid()
        )
    def forward(self, x):
        z = self.encoder(x)
        prob = self.classifier(z)
        return z, prob.squeeze()
    def get_probability(self, x):
        with torch.no_grad():
            _, prob = self.forward(x)
        return prob

# --- 2. Upload and load the trained model package ---
print("üìÇ Please upload the 'bubble_vs_nonbubble_model.pth' file:")
uploaded = files.upload()
package_path = next(iter(uploaded))

# Load the entire package
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Use weights_only=False because the package contains scikit-learn scaler objects
package = torch.load(package_path, map_location=DEVICE, weights_only=False)

# --- 3. Re-create the model and load the saved state ---
# Re-create the model with the saved configuration
model_config = package['model_config']
model = BubbleDetector(in_dim=model_config['in_dim'], emb=model_config['emb']).to(DEVICE)

# Load the trained weights
model.load_state_dict(package['model_state_dict'])
model.eval() # Set model to evaluation mode

# Extract the scalers and other info
scalers = package['scalers']
training_info = package['training_info']
print("‚úÖ Model and scalers loaded successfully!")


# --- 4. Define the prediction function ---
def analyze_single_file(csv_path, model, scalers, info, aggregation='mean'):
    window = info['window']

    # Read and preprocess the new data
    try:
        new = pd.read_csv(csv_path, parse_dates=["Date"])
        # Use forward-fill then backward-fill to handle NaNs without deleting rows
        new[info['need_cols']] = new[info['need_cols']].ffill().bfill()
    except Exception as e:
        print(f"‚ùå Error reading or processing CSV file: {e}")
        return None

    if len(new) < window:
        print(f"‚ùó Error: Input data must have at least {window} months. The provided file has only {len(new)}.")
        return None

    # Use the loaded scalers to transform new data
    Xm = scalers['sc_macro'].transform(new[info['macro_cols']]).astype("float32")
    Xd = scalers['sc_dow'].transform(new[info['dow_cols']]).astype("float32")

    probabilities = []
    with torch.no_grad():
        for t in range(len(new) - window + 1):
            seq_data = np.hstack([Xm[t:t+window], Xd[t:t+window]])
            seq = torch.tensor(seq_data).unsqueeze(0).to(DEVICE)
            prob = model.get_probability(seq).cpu().item()
            probabilities.append(prob)

    if not probabilities: return None
    if aggregation == 'mean': return np.mean(probabilities)
    if aggregation == 'max': return np.max(probabilities)
    if aggregation == 'last': return probabilities[-1]

    raise ValueError(f"Unknown aggregation method: {aggregation}")


# --- 5. Analyze the new file and display results ---
print("\nüìÇ Please upload the NEW economic CSV file you want to analyze:")
uploaded_data = files.upload()
new_csv_path = next(iter(uploaded_data))

# Calculate and print the result
probability = analyze_single_file(new_csv_path, model, scalers, training_info, aggregation='mean')

print("\n" + "="*60)
print("‚úÖ Analysis Complete")
print(f"File Analyzed: '{new_csv_path}'")

if probability is not None:
    print(f"üìä Calculated Bubble Probability Score: {probability:.4f}")
else:
    print("   Could not generate a result due to errors.")
print("="*60)

üìÇ Please upload the 'bubble_vs_nonbubble_model.pth' file:


Saving bubble_vs_nonbubble_model.pth to bubble_vs_nonbubble_model.pth
‚úÖ Model and scalers loaded successfully!

üìÇ Please upload the NEW economic CSV file you want to analyze:


Saving Merged_Subprime_Bubble_Last_24_months.csv to Merged_Subprime_Bubble_Last_24_months.csv

‚úÖ Analysis Complete
File Analyzed: 'Merged_Subprime_Bubble_Last_24_months.csv'
üìä Calculated Bubble Probability Score: 0.7763
