# 🧪 Meme Stock Deep Learning - Local Test Version

## 📋 Overview
- **Purpose**: Test on MacBook with small data subset
- **Models**: Lightweight versions of MLP, LSTM, Transformer
- **Data**: 100 samples for quick testing
- **Time**: ~2-5 minutes total

## 1️⃣ Setup

In [6]:
#!/usr/bin/env python3
"""
🧪 Local Test Version - Small data for MacBook testing
"""

import pandas as pd
import numpy as np
import json
import warnings
from datetime import datetime
import time
warnings.filterwarnings('ignore')

# ML libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from scipy.stats import spearmanr

print("✅ Libraries imported")
print(f"PyTorch version: {torch.__version__}")
print(f"Device: {'MPS' if torch.backends.mps.is_available() else 'CPU'}")

# Use MPS (Metal) on Mac if available
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("🎯 Using Apple Metal GPU")
else:
    device = torch.device("cpu")
    print("💻 Using CPU")

✅ Libraries imported
PyTorch version: 2.2.2
Device: CPU
💻 Using CPU


## 2️⃣ Create Small Test Data

In [9]:
def create_test_data(n_samples=100, n_features=47, seq_length=20, seq_features=49):
    """Create small synthetic test data"""
    
    print(f"🔧 Creating test data: {n_samples} samples")
    
    # Set seed for reproducibility
    np.random.seed(42)
    
    # Tabular data
    X_tabular = np.random.randn(n_samples, n_features).astype(np.float32)
    
    # Sequence data
    X_sequence = np.random.randn(n_samples, seq_length, seq_features).astype(np.float32)
    
    # Targets (with some pattern)
    y = (
        0.1 * X_tabular[:, 0] + 
        0.05 * X_tabular[:, 1] + 
        0.02 * np.random.randn(n_samples)
    ).astype(np.float32)
    
    # Split data
    train_size = int(0.6 * n_samples)
    val_size = int(0.2 * n_samples)
    
    # Tabular splits
    X_train_tab = X_tabular[:train_size]
    X_val_tab = X_tabular[train_size:train_size+val_size]
    X_test_tab = X_tabular[train_size+val_size:]
    
    # Sequence splits
    X_train_seq = X_sequence[:train_size]
    X_val_seq = X_sequence[train_size:train_size+val_size]
    X_test_seq = X_sequence[train_size+val_size:]
    
    # Target splits
    y_train = y[:train_size]
    y_val = y[train_size:train_size+val_size]
    y_test = y[train_size+val_size:]
    
    print(f"✅ Test data created:")
    print(f"   Train: {len(y_train)} samples")
    print(f"   Val: {len(y_val)} samples")
    print(f"   Test: {len(y_test)} samples")
    
    return (
        (X_train_tab, X_val_tab, X_test_tab),
        (X_train_seq, X_val_seq, X_test_seq),
        (y_train, y_val, y_test)
    )

# Create test data
tabular_data, sequence_data, targets = create_test_data(n_samples=100)
X_train_tab, X_val_tab, X_test_tab = tabular_data
X_train_seq, X_val_seq, X_test_seq = sequence_data
y_train, y_val, y_test = targets

🔧 Creating test data: 100 samples
✅ Test data created:
   Train: 60 samples
   Val: 20 samples
   Test: 20 samples


## 3️⃣ Lightweight Model Definitions

In [10]:
class SimpleMLP(nn.Module):
    """Lightweight MLP for testing"""
    
    def __init__(self, input_dim, hidden_dims=[64, 32]):
        super(SimpleMLP, self).__init__()
        
        layers = []
        prev_dim = input_dim
        
        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            prev_dim = hidden_dim
        
        layers.append(nn.Linear(prev_dim, 1))
        self.network = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.network(x)


class SimpleLSTM(nn.Module):
    """Lightweight LSTM for testing"""
    
    def __init__(self, input_size, hidden_size=32, num_layers=1):
        super(SimpleLSTM, self).__init__()
        
        self.lstm = nn.LSTM(
            input_size, hidden_size, num_layers,
            batch_first=True
        )
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_output = lstm_out[:, -1, :]
        return self.fc(last_output)


class SimpleTransformer(nn.Module):
    """Lightweight Transformer for testing"""
    
    def __init__(self, input_size, d_model=64, nhead=4, num_layers=2):
        super(SimpleTransformer, self).__init__()
        
        self.input_projection = nn.Linear(input_size, d_model)
        
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=d_model * 2,
            dropout=0.1,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        
        self.fc = nn.Linear(d_model, 1)
    
    def forward(self, x):
        x = self.input_projection(x)
        x = self.transformer(x)
        x = x.mean(dim=1)  # Global pooling
        return self.fc(x)

print("✅ Model architectures defined")

✅ Model architectures defined


## 4️⃣ Quick Training Function

In [11]:
def quick_train(model, X_train, y_train, X_val, y_val, 
                epochs=20, batch_size=16, lr=0.001, model_name="Model"):
    """Quick training for testing"""
    
    print(f"\n🎯 Training {model_name}...")
    start_time = time.time()
    
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    # Create data loaders
    train_dataset = TensorDataset(
        torch.FloatTensor(X_train),
        torch.FloatTensor(y_train)
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    # Training loop
    train_losses = []
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_X).squeeze()
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
        
        avg_loss = epoch_loss / len(train_loader)
        train_losses.append(avg_loss)
        
        if epoch % 5 == 0:
            print(f"  Epoch {epoch:2d}: Loss={avg_loss:.4f}")
    
    # Validation
    model.eval()
    with torch.no_grad():
        X_val_tensor = torch.FloatTensor(X_val).to(device)
        val_pred = model(X_val_tensor).cpu().numpy().flatten()
    
    # Calculate metrics
    val_corr, _ = spearmanr(y_val, val_pred)
    val_rmse = np.sqrt(mean_squared_error(y_val, val_pred))
    
    elapsed = time.time() - start_time
    print(f"✅ {model_name} completed in {elapsed:.1f}s")
    print(f"   Val Correlation: {val_corr:.4f}")
    print(f"   Val RMSE: {val_rmse:.4f}")
    
    return model, train_losses, val_corr

## 5️⃣ Train Models

In [12]:
# Train MLP
mlp_model = SimpleMLP(X_train_tab.shape[1])
mlp_model, mlp_losses, mlp_corr = quick_train(
    mlp_model, X_train_tab, y_train, X_val_tab, y_val,
    epochs=20, model_name="MLP"
)


🎯 Training MLP...
  Epoch  0: Loss=0.0328
  Epoch  5: Loss=0.0085
  Epoch 10: Loss=0.0048
  Epoch 15: Loss=0.0037
✅ MLP completed in 6.9s
   Val Correlation: 0.6617
   Val RMSE: 0.0800


In [13]:
# Train LSTM
lstm_model = SimpleLSTM(X_train_seq.shape[2])
lstm_model, lstm_losses, lstm_corr = quick_train(
    lstm_model, X_train_seq, y_train, X_val_seq, y_val,
    epochs=20, model_name="LSTM"
)


🎯 Training LSTM...
  Epoch  0: Loss=0.0373
  Epoch  5: Loss=0.0081
  Epoch 10: Loss=0.0027
  Epoch 15: Loss=0.0006
✅ LSTM completed in 0.5s
   Val Correlation: 0.1308
   Val RMSE: 0.1381


In [14]:
# Train Transformer
transformer_model = SimpleTransformer(X_train_seq.shape[2])
transformer_model, trans_losses, trans_corr = quick_train(
    transformer_model, X_train_seq, y_train, X_val_seq, y_val,
    epochs=20, model_name="Transformer"
)


🎯 Training Transformer...
  Epoch  0: Loss=0.0592
  Epoch  5: Loss=0.0050
  Epoch 10: Loss=0.0020
  Epoch 15: Loss=0.0007
✅ Transformer completed in 4.0s
   Val Correlation: 0.1835
   Val RMSE: 0.1278


## 6️⃣ Test Evaluation

In [15]:
def evaluate_model(model, X_test, y_test, model_name):
    """Evaluate model on test set"""
    
    model.eval()
    with torch.no_grad():
        X_test_tensor = torch.FloatTensor(X_test).to(device)
        y_pred = model(X_test_tensor).cpu().numpy().flatten()
    
    # Metrics
    corr, _ = spearmanr(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    
    return {
        'model': model_name,
        'correlation': corr,
        'rmse': rmse
    }

# Evaluate all models
results = []

mlp_results = evaluate_model(mlp_model, X_test_tab, y_test, 'MLP')
results.append(mlp_results)

lstm_results = evaluate_model(lstm_model, X_test_seq, y_test, 'LSTM')
results.append(lstm_results)

trans_results = evaluate_model(transformer_model, X_test_seq, y_test, 'Transformer')
results.append(trans_results)

# Display results
print("\n" + "="*50)
print("📊 TEST RESULTS")
print("="*50)
for result in results:
    print(f"{result['model']:12s}: Corr={result['correlation']:.4f}, RMSE={result['rmse']:.4f}")
print("="*50)


📊 TEST RESULTS
MLP         : Corr=0.6226, RMSE=0.0880
LSTM        : Corr=0.3489, RMSE=0.1302
Transformer : Corr=0.3789, RMSE=0.1009


## 7️⃣ Load Real Data Test

In [16]:
def test_real_data_loading():
    """Test loading real data files"""
    
    print("\n🔍 Testing real data loading...")
    
    try:
        # Try to load metadata
        import glob
        metadata_files = glob.glob('../data/colab_datasets/*metadata*.json')
        
        if metadata_files:
            with open(metadata_files[0], 'r') as f:
                metadata = json.load(f)
            
            timestamp = metadata['timestamp']
            print(f"✅ Found data with timestamp: {timestamp}")
            
            # Try loading CSVs
            train_df = pd.read_csv(f'../data/colab_datasets/tabular_train_{timestamp}.csv', nrows=50)
            print(f"✅ Loaded train data: {train_df.shape}")
            
            # Try loading NPZ
            sequences = np.load(f'../data/colab_datasets/sequences_{timestamp}.npz')
            print(f"✅ Loaded sequences: {list(sequences.keys())[:3]}...")
            
            return True
        else:
            print("⚠️ No metadata file found")
            return False
            
    except Exception as e:
        print(f"❌ Error loading real data: {e}")
        return False

# Test real data loading
real_data_ok = test_real_data_loading()


🔍 Testing real data loading...
✅ Found data with timestamp: 20250814_031335
✅ Loaded train data: (50, 47)
✅ Loaded sequences: ['AMC_sequences', 'AMC_targets_1d', 'AMC_targets_5d']...


## 8️⃣ Summary

In [17]:
print("\n" + "="*50)
print("🎯 LOCAL TEST SUMMARY")
print("="*50)

print("\n✅ Models Tested:")
print("   - MLP: Working")
print("   - LSTM: Working")
print("   - Transformer: Working")

print("\n✅ Device:")
print(f"   - Using: {device}")
if device.type == "mps":
    print("   - Apple Metal GPU acceleration enabled")

print("\n✅ Data:")
print("   - Synthetic test data: Working")
if real_data_ok:
    print("   - Real data loading: Working")
else:
    print("   - Real data loading: Not available (expected on local)")

print("\n" + "="*50)
print("🚀 Ready for Colab A100 deployment!")
print("="*50)

# Save test results
test_summary = {
    'timestamp': datetime.now().strftime('%Y%m%d_%H%M%S'),
    'device': str(device),
    'models_tested': ['MLP', 'LSTM', 'Transformer'],
    'test_results': results,
    'real_data_available': real_data_ok,
    'status': 'PASSED'
}

with open('local_test_results.json', 'w') as f:
    json.dump(test_summary, f, indent=2, default=str)

print("\n✅ Test results saved to local_test_results.json")


🎯 LOCAL TEST SUMMARY

✅ Models Tested:
   - MLP: Working
   - LSTM: Working
   - Transformer: Working

✅ Device:
   - Using: cpu

✅ Data:
   - Synthetic test data: Working
   - Real data loading: Working

🚀 Ready for Colab A100 deployment!

✅ Test results saved to local_test_results.json
