# Model Training

Multi-branch neural model with influence-controlled news embedding.

**Table of Contents:**
1. Load and split data
2. Dataset and DataLoader
3. Model architecture
4. Training loop
5. Evaluation: Rank IC and Basket Returns
   - 5.1 Long-Short Analysis
   - 5.2 Data Quality Check
   - 5.3 Clipped Returns Evaluation
6. News Ablation Analysis

**Architecture** (per README section 9):
- Fundamentals encoder → h_f (32 dims)
- Price encoder → h_p (16 dims)
- News encoder → h_n (32 dims)
- Fusion: h = concat([h_f, h_p, α * h_n]), α=0.3
- Output: scalar score for ranking

**Training objective** (per README section 10):
- Pairwise ranking loss within each day

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [2]:
from dataclasses import dataclass

@dataclass(frozen=True)
class ModelConfig:
    # Feature dimensions (set after loading data)
    n_fundamental_features: int = 19
    n_price_features: int = 9
    n_embedding_dim: int = 768
    
    # Encoder latent dimensions
    fundamental_latent: int = 32
    price_latent: int = 16
    news_latent: int = 32
    
    # Dropout
    fundamental_dropout: float = 0.2
    price_dropout: float = 0.2
    news_dropout: float = 0.3
    
    # News influence cap
    news_alpha: float = 0.8
    
    # Training
    batch_size: int = 512
    learning_rate: float = 1e-3
    weight_decay: float = 1e-3
    n_epochs: int = 20
    pairs_per_day: int = 1000

    #hard fraction over-sampling
    hard_fraction = 0.0    

## 1. Load and split data

In [3]:
df = pd.read_parquet("data/ml_dataset.pqt")
df["feature_date"] = pd.to_datetime(df["feature_date"])

print(f"Dataset: {len(df):,} rows")
print(f"Date range: {df['feature_date'].min().date()} to {df['feature_date'].max().date()}")
print(f"Symbols: {df['symbol'].nunique():,}")

Dataset: 2,092,929 rows
Date range: 2021-01-13 to 2025-12-18
Symbols: 3,506


In [4]:
# Identify feature columns
price_feat_cols = [
    "overnight_gap_z", "intraday_ret_z",
    "ret_1d_z", "ret_2d_z", "ret_3d_z", "ret_5d_z",
    "vol_5d_z", "dist_from_high_5d_z", "dist_from_low_5d_z"
]

# Fundamental feature columns (normalized)
fund_feat_cols = [c for c in df.columns if c.endswith("_z") and c not in price_feat_cols and c != "news_count_z"]

# Embedding columns
emb_cols = [c for c in df.columns if c.startswith("emb_")]

print(f"Price features: {len(price_feat_cols)}")
print(f"Fundamental features: {len(fund_feat_cols)}")
print(f"Embedding dims: {len(emb_cols)}")

Price features: 9
Fundamental features: 19
Embedding dims: 768


In [5]:
# Time-based split (per README section 13)
# Use last 20% of dates as test
dates = sorted(df["feature_date"].unique())
n_dates = len(dates)
train_end_idx = int(n_dates * 0.7)
val_end_idx = int(n_dates * 0.85)

train_dates = set(dates[:train_end_idx])
val_dates = set(dates[train_end_idx:val_end_idx])
test_dates = set(dates[val_end_idx:])

train_df = df[df["feature_date"].isin(train_dates)].copy()
val_df = df[df["feature_date"].isin(val_dates)].copy()
test_df = df[df["feature_date"].isin(test_dates)].copy()

print(f"Train: {len(train_df):,} rows, {len(train_dates)} days")
print(f"Val: {len(val_df):,} rows, {len(val_dates)} days")
print(f"Test: {len(test_df):,} rows, {len(test_dates)} days")

Train: 1,418,494 rows, 830 days
Val: 322,222 rows, 178 days
Test: 352,213 rows, 178 days


## 2. Dataset and DataLoader

In [6]:
class PairwiseRankingDataset(Dataset):
    """Dataset that generates hard pairs from same day for ranking loss.

    Only uses rows with news, and samples from top vs bottom quartiles
    for harder training signal.

    Call resample_pairs() at the start of each epoch to generate fresh pairs.
    This prevents overfitting to specific pair combinations.
    """

    def __init__(
        self,
        df: pd.DataFrame,
        price_cols: list[str],
        fund_cols: list[str],
        emb_cols: list[str],
        pairs_per_day: int = 100,
        hard_fraction: float = 0.7,  # Mix of hard vs random pairs
    ):
        # Filter to rows with news only
        has_news = (df[emb_cols].abs().sum(axis=1) > 0)
        df_news = df[has_news].copy()
        print(f"Filtered to news-only: {len(df_news):,} / {len(df):,} rows ({len(df_news)/len(df)*100:.1f}%)")

        self.df = df_news.reset_index(drop=True)
        self.price_cols = price_cols
        self.fund_cols = fund_cols
        self.emb_cols = emb_cols
        self.pairs_per_day = pairs_per_day
        self.hard_fraction = hard_fraction

        # Group by date and precompute quartile indices
        self.date_groups = {}
        for date, group in df_news.groupby("feature_date"):
            indices = group.index.tolist()
            if len(indices) < 4:  # Need at least 4 for quartiles
                continue

            # Sort by target return and get quartile indices
            sorted_idx = group["target_return"].sort_values().index.tolist()
            q_size = len(sorted_idx) // 4
            if q_size < 1:
                continue

            bottom_q = sorted_idx[:q_size]  # Losers
            top_q = sorted_idx[-q_size:]    # Winners

            self.date_groups[date] = {
                "all": np.array(indices),
                "top": np.array(top_q),
                "bottom": np.array(bottom_q),
            }

        self.dates = list(self.date_groups.keys())
        print(f"Days with sufficient news coverage: {len(self.dates)}")

        # Precompute arrays for speed
        self.price_arr = df_news[price_cols].values.astype(np.float32)
        self.fund_arr = df_news[fund_cols].values.astype(np.float32)
        self.emb_arr = df_news[emb_cols].values.astype(np.float32)
        self.target_arr = df_news["target_return"].values.astype(np.float32)

        # Map original index to position in filtered df
        self.idx_map = {old_idx: new_idx for new_idx, old_idx in enumerate(df_news.index)}

        # Pre-generate pairs for first epoch
        self.pairs = []
        self.resample_pairs()

    def resample_pairs(self):
        """Generate fresh pairs for a new epoch (vectorized for speed)."""
        pairs = []

        for date in self.dates:
            groups = self.date_groups[date]
            n_hard = int(self.pairs_per_day * self.hard_fraction)
            n_random = self.pairs_per_day - n_hard

            # Hard pairs: top quartile vs bottom quartile (vectorized)
            if n_hard > 0:
                winners = np.random.choice(groups["top"], size=n_hard, replace=True)
                losers = np.random.choice(groups["bottom"], size=n_hard, replace=True)
                for w, l in zip(winners, losers):
                    pairs.append((self.idx_map[w], self.idx_map[l]))

            # Random pairs from all stocks that day
            if n_random > 0 and len(groups["all"]) >= 2:
                all_idx = groups["all"]
                idx1 = np.random.choice(all_idx, size=n_random, replace=True)
                idx2 = np.random.choice(all_idx, size=n_random, replace=True)
                for i1, i2 in zip(idx1, idx2):
                    if i1 != i2:
                        pairs.append((self.idx_map[i1], self.idx_map[i2]))

        self.pairs = pairs
        # Shuffle pairs so batches mix different days
        np.random.shuffle(self.pairs)

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        i, j = self.pairs[idx]

        # Get features
        price_i = self.price_arr[i]
        price_j = self.price_arr[j]
        fund_i = self.fund_arr[i]
        fund_j = self.fund_arr[j]
        emb_i = self.emb_arr[i]
        emb_j = self.emb_arr[j]

        # Label based on actual returns
        actual_label = 1.0 if self.target_arr[i] > self.target_arr[j] else 0.0

        # Randomly swap to balance labels
        if np.random.random() < 0.5:
            price_i, price_j = price_j, price_i
            fund_i, fund_j = fund_j, fund_i
            emb_i, emb_j = emb_j, emb_i
            label = 1.0 - actual_label
        else:
            label = actual_label

        return {
            "price_i": torch.tensor(price_i),
            "price_j": torch.tensor(price_j),
            "fund_i": torch.tensor(fund_i),
            "fund_j": torch.tensor(fund_j),
            "emb_i": torch.tensor(emb_i),
            "emb_j": torch.tensor(emb_j),
            "label": torch.tensor(label),
        }

In [7]:
class PointwiseDataset(Dataset):
    """Dataset for inference - one sample per stock-day."""
    
    def __init__(
        self,
        df: pd.DataFrame,
        price_cols: list[str],
        fund_cols: list[str],
        emb_cols: list[str],
    ):
        self.df = df.reset_index(drop=True)
        self.price_arr = df[price_cols].values.astype(np.float32)
        self.fund_arr = df[fund_cols].values.astype(np.float32)
        self.emb_arr = df[emb_cols].values.astype(np.float32)
        self.target_arr = df["target_return"].values.astype(np.float32)
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        return {
            "price": torch.tensor(self.price_arr[idx]),
            "fund": torch.tensor(self.fund_arr[idx]),
            "emb": torch.tensor(self.emb_arr[idx]),
            "target": torch.tensor(self.target_arr[idx]),
        }

In [8]:
# Create config with actual dimensions
config = ModelConfig(
    n_fundamental_features=len(fund_feat_cols),
    n_price_features=len(price_feat_cols),
    n_embedding_dim=len(emb_cols),
)

print(f"Config: {config}")

Config: ModelConfig(n_fundamental_features=19, n_price_features=9, n_embedding_dim=768, fundamental_latent=32, price_latent=16, news_latent=32, fundamental_dropout=0.2, price_dropout=0.2, news_dropout=0.3, news_alpha=0.8, batch_size=512, learning_rate=0.001, weight_decay=0.001, n_epochs=20, pairs_per_day=1000)


In [9]:
# Create datasets (both use same hard pair ratio for consistent distribution)
train_dataset = PairwiseRankingDataset(
    train_df, price_feat_cols, fund_feat_cols, emb_cols,
    pairs_per_day=config.pairs_per_day,
    hard_fraction=config.hard_fraction,
)
val_dataset = PairwiseRankingDataset(
    val_df, price_feat_cols, fund_feat_cols, emb_cols,
    pairs_per_day=config.pairs_per_day,
    hard_fraction=config.hard_fraction,  # Same distribution as training
)

# For evaluation (pointwise)
val_pointwise = PointwiseDataset(val_df, price_feat_cols, fund_feat_cols, emb_cols)
test_pointwise = PointwiseDataset(test_df, price_feat_cols, fund_feat_cols, emb_cols)

print(f"Train pairs: {len(train_dataset):,}")
print(f"Val pairs: {len(val_dataset):,}")

Filtered to news-only: 339,872 / 1,418,494 rows (24.0%)
Days with sufficient news coverage: 830
Filtered to news-only: 88,579 / 322,222 rows (27.5%)
Days with sufficient news coverage: 178
Train pairs: 828,008
Val pairs: 177,668


In [10]:
# Create dataloaders
# num_workers=0 for Jupyter compatibility (avoids multiprocessing issues)
train_loader = DataLoader(
    train_dataset, 
    batch_size=config.batch_size, 
    shuffle=True,
    num_workers=0,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=config.batch_size,
    shuffle=False,
    num_workers=0,
)

## 3. Model architecture

In [11]:
class MultiBranchRanker(nn.Module):
    """Multi-branch model with influence-controlled news embedding."""
    
    def __init__(self, config: ModelConfig):
        super().__init__()
        self.config = config
        
        # Fundamentals encoder
        self.fund_encoder = nn.Sequential(
            nn.Linear(config.n_fundamental_features, 64),
            nn.ReLU(),
            nn.Dropout(config.fundamental_dropout),
            nn.Linear(64, config.fundamental_latent),
            nn.ReLU(),
        )
        
        # Price encoder
        self.price_encoder = nn.Sequential(
            nn.Linear(config.n_price_features, 32),
            nn.ReLU(),
            nn.Dropout(config.price_dropout),
            nn.Linear(32, config.price_latent),
            nn.ReLU(),
        )
        
        # News encoder
        self.news_encoder = nn.Sequential(
            nn.Linear(config.n_embedding_dim, 128),
            nn.ReLU(),
            nn.Dropout(config.news_dropout),
            nn.Linear(128, config.news_latent),
            nn.ReLU(),
        )
        
        # Output head
        fused_dim = config.fundamental_latent + config.price_latent + config.news_latent
        self.output_head = nn.Sequential(
            nn.Linear(fused_dim, 32),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(32, 1),
        )
    
    def encode(self, price: torch.Tensor, fund: torch.Tensor, emb: torch.Tensor) -> torch.Tensor:
        """Encode features and return fused representation."""
        h_f = self.fund_encoder(fund)
        h_p = self.price_encoder(price)
        h_n = self.news_encoder(emb)
        
        # Apply news influence cap (α * h_n)
        h_n_scaled = self.config.news_alpha * h_n
        
        # Fuse
        h = torch.cat([h_f, h_p, h_n_scaled], dim=-1)
        return h
    
    def forward(self, price: torch.Tensor, fund: torch.Tensor, emb: torch.Tensor) -> torch.Tensor:
        """Forward pass returning score."""
        h = self.encode(price, fund, emb)
        score = self.output_head(h)
        return score.squeeze(-1)
    
    def forward_pair(
        self,
        price_i: torch.Tensor, fund_i: torch.Tensor, emb_i: torch.Tensor,
        price_j: torch.Tensor, fund_j: torch.Tensor, emb_j: torch.Tensor,
    ) -> torch.Tensor:
        """Forward pass for pair, returning P(i > j)."""
        score_i = self.forward(price_i, fund_i, emb_i)
        score_j = self.forward(price_j, fund_j, emb_j)
        return torch.sigmoid(score_i - score_j)

In [12]:
model = MultiBranchRanker(config).to(device)

# Count parameters
n_params = sum(p.numel() for p in model.parameters())
print(f"Model parameters: {n_params:,}")
print(model)

Model parameters: 109,393
MultiBranchRanker(
  (fund_encoder): Sequential(
    (0): Linear(in_features=19, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=64, out_features=32, bias=True)
    (4): ReLU()
  )
  (price_encoder): Sequential(
    (0): Linear(in_features=9, out_features=32, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=32, out_features=16, bias=True)
    (4): ReLU()
  )
  (news_encoder): Sequential(
    (0): Linear(in_features=768, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=32, bias=True)
    (4): ReLU()
  )
  (output_head): Sequential(
    (0): Linear(in_features=80, out_features=32, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=32, out_features=1, bias=True)
  )
)


## 4. Training loop

In [13]:
def pairwise_ranking_loss(pred_prob: torch.Tensor, label: torch.Tensor, smoothing: float = 0.2) -> torch.Tensor:
    """Binary cross-entropy for pairwise ranking with label smoothing."""
    # Smooth labels: 0 -> smoothing/2, 1 -> 1 - smoothing/2
    smoothed_label = label * (1 - smoothing) + 0.5 * smoothing
    return F.binary_cross_entropy(pred_prob, smoothed_label)

In [14]:
def train_epoch(model, loader, optimizer, device):
    model.train()
    total_loss = 0
    total_correct = 0
    total_samples = 0
    
    for batch in tqdm(loader, desc="Training", leave=False):
        price_i = batch["price_i"].to(device)
        price_j = batch["price_j"].to(device)
        fund_i = batch["fund_i"].to(device)
        fund_j = batch["fund_j"].to(device)
        emb_i = batch["emb_i"].to(device)
        emb_j = batch["emb_j"].to(device)
        label = batch["label"].to(device)
        
        optimizer.zero_grad()
        pred_prob = model.forward_pair(price_i, fund_i, emb_i, price_j, fund_j, emb_j)
        loss = pairwise_ranking_loss(pred_prob, label)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * len(label)
        total_correct += ((pred_prob > 0.5) == (label > 0.5)).sum().item()
        total_samples += len(label)
    
    return total_loss / total_samples, total_correct / total_samples

In [15]:
@torch.no_grad()
def eval_epoch(model, loader, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    total_samples = 0
    
    for batch in tqdm(loader, desc="Evaluating", leave=False):
        price_i = batch["price_i"].to(device)
        price_j = batch["price_j"].to(device)
        fund_i = batch["fund_i"].to(device)
        fund_j = batch["fund_j"].to(device)
        emb_i = batch["emb_i"].to(device)
        emb_j = batch["emb_j"].to(device)
        label = batch["label"].to(device)
        
        pred_prob = model.forward_pair(price_i, fund_i, emb_i, price_j, fund_j, emb_j)
        loss = pairwise_ranking_loss(pred_prob, label)
        
        total_loss += loss.item() * len(label)
        total_correct += ((pred_prob > 0.5) == (label > 0.5)).sum().item()
        total_samples += len(label)
    
    return total_loss / total_samples, total_correct / total_samples

In [16]:
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=config.learning_rate,
    weight_decay=config.weight_decay,
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=config.n_epochs,
)

In [17]:
# Training
best_val_acc = 0
history = []

for epoch in range(config.n_epochs):
    # Resample pairs each epoch for fresh training data
    train_dataset.resample_pairs()
    val_dataset.resample_pairs()
    
    train_loss, train_acc = train_epoch(model, train_loader, optimizer, device)
    val_loss, val_acc = eval_epoch(model, val_loader, device)
    scheduler.step()
    
    history.append({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_acc": train_acc,
        "val_loss": val_loss,
        "val_acc": val_acc,
    })
    
    print(f"Epoch {epoch+1}/{config.n_epochs}: "
          f"train_loss={train_loss:.4f}, train_acc={train_acc:.4f}, "
          f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}")
    
    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "data/model_best.pt")
        print(f"  -> New best model saved")

Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 1/20: train_loss=0.6926, train_acc=0.5131, val_loss=0.6930, val_acc=0.5043
  -> New best model saved


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 2/20: train_loss=0.6925, train_acc=0.5160, val_loss=0.6927, val_acc=0.5100
  -> New best model saved


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 3/20: train_loss=0.6922, train_acc=0.5178, val_loss=0.6929, val_acc=0.5068


Training:   0%|          | 0/1617 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 4/20: train_loss=0.6919, train_acc=0.5199, val_loss=0.6929, val_acc=0.5111
  -> New best model saved


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 5/20: train_loss=0.6918, train_acc=0.5216, val_loss=0.6933, val_acc=0.5065


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 6/20: train_loss=0.6915, train_acc=0.5240, val_loss=0.6931, val_acc=0.5054


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 7/20: train_loss=0.6911, train_acc=0.5269, val_loss=0.6934, val_acc=0.5064


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 8/20: train_loss=0.6906, train_acc=0.5284, val_loss=0.6937, val_acc=0.5033


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 9/20: train_loss=0.6901, train_acc=0.5319, val_loss=0.6937, val_acc=0.5045


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 10/20: train_loss=0.6896, train_acc=0.5345, val_loss=0.6939, val_acc=0.5045


Training:   0%|          | 0/1617 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 11/20: train_loss=0.6891, train_acc=0.5369, val_loss=0.6938, val_acc=0.5063


Training:   0%|          | 0/1617 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 12/20: train_loss=0.6885, train_acc=0.5393, val_loss=0.6946, val_acc=0.5032


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 13/20: train_loss=0.6882, train_acc=0.5412, val_loss=0.6941, val_acc=0.5046


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 14/20: train_loss=0.6877, train_acc=0.5438, val_loss=0.6948, val_acc=0.5046


Training:   0%|          | 0/1617 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 15/20: train_loss=0.6874, train_acc=0.5452, val_loss=0.6947, val_acc=0.5034


Training:   0%|          | 0/1617 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 16/20: train_loss=0.6871, train_acc=0.5465, val_loss=0.6949, val_acc=0.5043


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 17/20: train_loss=0.6869, train_acc=0.5465, val_loss=0.6952, val_acc=0.5028


Training:   0%|          | 0/1617 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 18/20: train_loss=0.6868, train_acc=0.5476, val_loss=0.6949, val_acc=0.5033


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 19/20: train_loss=0.6866, train_acc=0.5478, val_loss=0.6949, val_acc=0.5032


Training:   0%|          | 0/1618 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/347 [00:00<?, ?it/s]

Epoch 20/20: train_loss=0.6864, train_acc=0.5483, val_loss=0.6952, val_acc=0.5014


## 5. Evaluation: Rank IC and Basket Returns

In [18]:
# Load best model
model.load_state_dict(torch.load("data/model_best.pt", weights_only=True))
model.eval()

MultiBranchRanker(
  (fund_encoder): Sequential(
    (0): Linear(in_features=19, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=64, out_features=32, bias=True)
    (4): ReLU()
  )
  (price_encoder): Sequential(
    (0): Linear(in_features=9, out_features=32, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=32, out_features=16, bias=True)
    (4): ReLU()
  )
  (news_encoder): Sequential(
    (0): Linear(in_features=768, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=32, bias=True)
    (4): ReLU()
  )
  (output_head): Sequential(
    (0): Linear(in_features=80, out_features=32, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [19]:
@torch.no_grad()
def get_scores(model, df, price_cols, fund_cols, emb_cols, device, batch_size=1024):
    """Get model scores for all rows."""
    model.eval()
    dataset = PointwiseDataset(df, price_cols, fund_cols, emb_cols)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    scores = []
    for batch in tqdm(loader, desc="Scoring", leave=False):
        price = batch["price"].to(device)
        fund = batch["fund"].to(device)
        emb = batch["emb"].to(device)
        
        score = model(price, fund, emb)
        scores.append(score.cpu().numpy())
    
    return np.concatenate(scores)

In [20]:
# Score test set
test_df = test_df.copy()
test_df["score"] = get_scores(model, test_df, price_feat_cols, fund_feat_cols, emb_cols, device)

print(f"Scored {len(test_df):,} test samples")

Scoring:   0%|          | 0/344 [00:00<?, ?it/s]

Scored 352,213 test samples


In [21]:
from scipy.stats import spearmanr

def compute_daily_ic(df):
    """Compute Spearman rank IC per day."""
    ics = []
    for date, group in df.groupby("feature_date"):
        if len(group) < 10:
            continue
        ic, _ = spearmanr(group["score"], group["target_return"])
        if not np.isnan(ic):
            ics.append({"date": date, "ic": ic})
    return pd.DataFrame(ics)

In [22]:
ic_df = compute_daily_ic(test_df)

print(f"Daily Rank IC (Spearman):")
print(f"  Mean: {ic_df['ic'].mean():.4f}")
print(f"  Std: {ic_df['ic'].std():.4f}")
print(f"  Sharpe (IC): {ic_df['ic'].mean() / ic_df['ic'].std() * np.sqrt(252):.2f}")
print(f"  % Positive: {(ic_df['ic'] > 0).mean()*100:.1f}%")

Daily Rank IC (Spearman):
  Mean: 0.0251
  Std: 0.0963
  Sharpe (IC): 4.13
  % Positive: 59.6%


In [23]:
def compute_basket_returns(df, top_k=20):
    """Compute daily returns of top-K basket (equal weight)."""
    returns = []
    for date, group in df.groupby("feature_date"):
        if len(group) < top_k:
            continue
        # Select top-K by score
        top = group.nlargest(top_k, "score")
        # Equal weight return
        ret = top["target_return"].mean()
        # Market return (all stocks)
        mkt_ret = group["target_return"].mean()
        returns.append({
            "date": date,
            "basket_return": ret,
            "market_return": mkt_ret,
            "excess_return": ret - mkt_ret,
        })
    return pd.DataFrame(returns)

In [24]:
basket_df = compute_basket_returns(test_df, top_k=20)

print(f"\nTop-20 Basket Performance (Test Set):")
print(f"  Daily mean return: {basket_df['basket_return'].mean()*100:.3f}%")
print(f"  Daily mean excess: {basket_df['excess_return'].mean()*100:.3f}%")
print(f"  Sharpe (basket): {basket_df['basket_return'].mean() / basket_df['basket_return'].std() * np.sqrt(252):.2f}")
print(f"  Sharpe (excess): {basket_df['excess_return'].mean() / basket_df['excess_return'].std() * np.sqrt(252):.2f}")


Top-20 Basket Performance (Test Set):
  Daily mean return: -0.137%
  Daily mean excess: 0.007%
  Sharpe (basket): -1.68
  Sharpe (excess): 0.20


In [25]:
# Cumulative returns
basket_df["cum_basket"] = (1 + basket_df["basket_return"]).cumprod()
basket_df["cum_market"] = (1 + basket_df["market_return"]).cumprod()
basket_df["cum_excess"] = (1 + basket_df["excess_return"]).cumprod()

print(f"\nCumulative Returns:")
print(f"  Basket: {(basket_df['cum_basket'].iloc[-1] - 1)*100:.1f}%")
print(f"  Market: {(basket_df['cum_market'].iloc[-1] - 1)*100:.1f}%")
print(f"  Excess: {(basket_df['cum_excess'].iloc[-1] - 1)*100:.1f}%")


Cumulative Returns:
  Basket: -22.8%
  Market: -23.7%
  Excess: 1.0%


In [26]:
# Long-short analysis: long top-K, short bottom-K
def compute_long_short_returns(df, top_k=20):
    """Compute daily returns of long-short portfolio."""
    returns = []
    for date, group in df.groupby("feature_date"):
        if len(group) < top_k * 2:
            continue
        # Long top-K
        top = group.nlargest(top_k, "score")
        long_ret = top["target_return"].mean()
        # Short bottom-K
        bottom = group.nsmallest(top_k, "score")
        short_ret = bottom["target_return"].mean()
        # Long-short return (long winners, short losers)
        ls_ret = long_ret - short_ret
        # Market
        mkt_ret = group["target_return"].mean()
        returns.append({
            "date": date,
            "long_return": long_ret,
            "short_return": short_ret,
            "long_short_return": ls_ret,
            "market_return": mkt_ret,
        })
    return pd.DataFrame(returns)

ls_df = compute_long_short_returns(test_df, top_k=20)

print("Long-Short Portfolio Performance (Test Set):")
print(f"\n  Long (top-20):")
print(f"    Daily mean: {ls_df['long_return'].mean()*100:.3f}%")
print(f"    Cumulative: {((1 + ls_df['long_return']).cumprod().iloc[-1] - 1)*100:.1f}%")

print(f"\n  Short (bottom-20):")
print(f"    Daily mean: {ls_df['short_return'].mean()*100:.3f}%")
print(f"    Cumulative: {((1 + ls_df['short_return']).cumprod().iloc[-1] - 1)*100:.1f}%")
print(f"    Short P&L (inverted): {((1 - ls_df['short_return']).cumprod().iloc[-1] - 1)*100:.1f}%")

print(f"\n  Long-Short (market neutral):")
print(f"    Daily mean: {ls_df['long_short_return'].mean()*100:.3f}%")
print(f"    Std: {ls_df['long_short_return'].std()*100:.3f}%")
print(f"    Sharpe: {ls_df['long_short_return'].mean() / ls_df['long_short_return'].std() * np.sqrt(252):.2f}")
print(f"    Cumulative: {((1 + ls_df['long_short_return']).cumprod().iloc[-1] - 1)*100:.1f}%")

print(f"\n  Market:")
print(f"    Cumulative: {((1 + ls_df['market_return']).cumprod().iloc[-1] - 1)*100:.1f}%")

Long-Short Portfolio Performance (Test Set):

  Long (top-20):
    Daily mean: -0.137%
    Cumulative: -22.8%

  Short (bottom-20):
    Daily mean: -2.412%
    Cumulative: -99.0%
    Short P&L (inverted): 5449.1%

  Long-Short (market neutral):
    Daily mean: 2.275%
    Std: 4.761%
    Sharpe: 7.59
    Cumulative: 4430.5%

  Market:
    Cumulative: -23.7%


In [27]:
# Investigate the short basket - are these real shortable stocks or distressed junk?
print("Bottom-20 basket analysis:")
print("=" * 50)

# Get all bottom-20 selections
bottom_picks = []
for date, group in test_df.groupby("feature_date"):
    if len(group) < 40:
        continue
    bottom = group.nsmallest(20, "score")
    bottom_picks.append(bottom[["symbol", "feature_date", "target_return", "score"]])

bottom_df = pd.concat(bottom_picks)

print(f"\nMost frequent bottom-20 stocks:")
freq = bottom_df["symbol"].value_counts().head(20)
print(freq)

print(f"\n\nWorst single-day returns in bottom basket:")
print(bottom_df.nsmallest(20, "target_return")[["feature_date", "symbol", "target_return"]])

print(f"\n\nReturn distribution of bottom-20 picks:")
print(bottom_df["target_return"].describe())

# Check for extreme outliers driving the result
print(f"\n\nDays with extreme short returns (< -10%):")
extreme_days = ls_df[ls_df["short_return"] < -0.10]
print(f"Number of extreme days: {len(extreme_days)}")
if len(extreme_days) > 0:
    print(extreme_days[["date", "short_return", "long_return", "long_short_return"]].head(10))

Bottom-20 basket analysis:

Most frequent bottom-20 stocks:
symbol
SBET    63
FIG     61
RGC     56
DFDV    36
TLRY    36
CHAI    35
MLGO    33
NEGG    31
RUN     31
QUBT    30
UPXI    29
ABTC    28
ACON    27
MRNA    27
NCNA    26
NUKK    26
CHYM    25
SPRB    25
SMX     25
WBUY    23
Name: count, dtype: int64


Worst single-day returns in bottom basket:
        feature_date symbol  target_return
2050954   2025-11-05   MGNI      -3.540930
2092018   2025-12-18   DFLI      -2.164435
1793247   2025-03-31   GDHG      -2.161439
1922868   2025-08-06    SMX      -2.019265
2088179   2025-12-16   MENS      -1.662784
1764290   2025-03-05   ACON      -1.474477
1799271   2025-04-04   SUNE      -1.463255
1743903   2025-02-12   PPCB      -1.287354
1863749   2025-06-12   SBET      -1.261873
1921292   2025-08-05   ELWS      -0.895174
2079979   2025-12-05    SMX      -0.893744
1973584   2025-09-11   NXTT      -0.853514
1825836   2025-05-12   DEVS      -0.832146
1882126   2025-06-27    OST      -0.7916

In [28]:
# Check for data quality issues - returns < -100% are impossible
print("Data quality check:")
print("=" * 50)
impossible_returns = test_df[test_df["target_return"] < -1.0]
print(f"\nRows with impossible returns (< -100%): {len(impossible_returns)}")
if len(impossible_returns) > 0:
    print(impossible_returns[["symbol", "feature_date", "target_return"]].head(20))

# Also check extreme positive returns
extreme_positive = test_df[test_df["target_return"] > 1.0]
print(f"\nRows with extreme positive returns (> 100%): {len(extreme_positive)}")
if len(extreme_positive) > 0:
    print(extreme_positive[["symbol", "feature_date", "target_return"]].head(10))

Data quality check:

Rows with impossible returns (< -100%): 28
        symbol feature_date  target_return
1743903   PPCB   2025-02-12      -1.287354
1746731   INLF   2025-02-14      -1.572174
1764290   ACON   2025-03-05      -1.474477
1767070    HIT   2025-03-11      -1.731924
1793247   GDHG   2025-03-31      -2.161439
1799271   SUNE   2025-04-04      -1.463255
1863749   SBET   2025-06-12      -1.261873
1877320    OST   2025-06-25      -2.797281
1888515   SKBL   2025-07-03      -2.085914
1904441   PTNM   2025-07-17      -1.132462
1906158    YHC   2025-07-17      -1.015413
1910322   PTHL   2025-07-28      -2.931921
1922868    SMX   2025-08-06      -2.019265
1935021   FLYE   2025-08-14      -2.048982
1951089   TRIB   2025-08-26      -1.602658
1970120   EPSM   2025-09-09      -1.228176
1993179    SDM   2025-09-25      -1.995619
1995860   MLTX   2025-09-26      -2.294392
1999569    UFG   2025-10-01      -1.245843
2006898   YYAI   2025-10-06      -2.635081

Rows with extreme positive retur

In [29]:
# Re-evaluate with winsorized returns (clip extreme values)
def compute_long_short_returns_clipped(df, top_k=20, clip_pct=0.50):
    """Compute long-short returns with clipped extreme returns."""
    df = df.copy()
    df["target_return_clipped"] = df["target_return"].clip(-clip_pct, clip_pct)
    
    returns = []
    for date, group in df.groupby("feature_date"):
        if len(group) < top_k * 2:
            continue
        # Use original score for ranking, clipped returns for P&L
        top = group.nlargest(top_k, "score")
        long_ret = top["target_return_clipped"].mean()
        
        bottom = group.nsmallest(top_k, "score")
        short_ret = bottom["target_return_clipped"].mean()
        
        ls_ret = long_ret - short_ret
        mkt_ret = group["target_return_clipped"].mean()
        
        returns.append({
            "date": date,
            "long_return": long_ret,
            "short_return": short_ret,
            "long_short_return": ls_ret,
            "market_return": mkt_ret,
        })
    return pd.DataFrame(returns)

ls_clipped = compute_long_short_returns_clipped(test_df, top_k=20, clip_pct=0.50)

print("Long-Short with clipped returns (max ±50%):")
print("=" * 50)
print(f"\n  Long (top-20):")
print(f"    Cumulative: {((1 + ls_clipped['long_return']).cumprod().iloc[-1] - 1)*100:.1f}%")

print(f"\n  Short (bottom-20):")
print(f"    Cumulative: {((1 + ls_clipped['short_return']).cumprod().iloc[-1] - 1)*100:.1f}%")
print(f"    Short P&L: {((1 - ls_clipped['short_return']).cumprod().iloc[-1] - 1)*100:.1f}%")

print(f"\n  Long-Short:")
print(f"    Daily mean: {ls_clipped['long_short_return'].mean()*100:.3f}%")
print(f"    Sharpe: {ls_clipped['long_short_return'].mean() / ls_clipped['long_short_return'].std() * np.sqrt(252):.2f}")
print(f"    Cumulative: {((1 + ls_clipped['long_short_return']).cumprod().iloc[-1] - 1)*100:.1f}%")

Long-Short with clipped returns (max ±50%):

  Long (top-20):
    Cumulative: -22.8%

  Short (bottom-20):
    Cumulative: -98.2%
    Short P&L: 3765.4%

  Long-Short:
    Daily mean: 2.020%
    Sharpe: 8.86
    Cumulative: 3042.7%


### 5.4 Shortability Analysis

Filter to stocks that are practically shortable:
- Market cap > $500M (institutional threshold)
- Exclude penny stocks and micro-caps

In [30]:
# Load fundamentals to get market cap
key_metrics = pd.read_parquet("data/key_metrics.pqt")

# Get latest market cap per symbol (most recent quarter)
key_metrics["date"] = pd.to_datetime(key_metrics["date"])
latest_mcap = (
    key_metrics[["symbol", "date", "marketCap"]]
    .sort_values("date")
    .groupby("symbol")
    .last()
    .reset_index()
)

print(f"Market cap data for {len(latest_mcap):,} symbols")
print(f"\nMarket cap distribution:")
print(latest_mcap["marketCap"].describe())

# Define market cap tiers
def mcap_tier(mcap):
    if pd.isna(mcap) or mcap <= 0:
        return "Unknown"
    elif mcap < 300_000_000:
        return "Micro (<$300M)"
    elif mcap < 2_000_000_000:
        return "Small ($300M-$2B)"
    elif mcap < 10_000_000_000:
        return "Mid ($2B-$10B)"
    else:
        return "Large (>$10B)"

latest_mcap["tier"] = latest_mcap["marketCap"].apply(mcap_tier)
print(f"\nMarket cap tiers:")
print(latest_mcap["tier"].value_counts())

Market cap data for 5,564 symbols

Market cap distribution:
count    5.564000e+03
mean     2.053745e+11
std      5.247073e+12
min      0.000000e+00
25%      8.948057e+07
50%      5.611468e+08
75%      4.123179e+09
max      3.111042e+14
Name: marketCap, dtype: float64

Market cap tiers:
tier
Micro (<$300M)       1941
Small ($300M-$2B)    1414
Mid ($2B-$10B)        979
Large (>$10B)         879
Unknown               351
Name: count, dtype: int64


In [31]:
# Merge market cap into test_df
test_df_mcap = test_df.merge(
    latest_mcap[["symbol", "marketCap"]], 
    on="symbol", 
    how="left"
)

# Check how many of our bottom-20 picks have market cap data
bottom_with_mcap = bottom_df.merge(latest_mcap[["symbol", "marketCap"]], on="symbol", how="left")
print("Bottom-20 picks by market cap tier:")
bottom_with_mcap["tier"] = bottom_with_mcap["marketCap"].apply(mcap_tier)
print(bottom_with_mcap["tier"].value_counts())

print("\n\nMost frequent bottom-20 stocks with their market caps:")
freq_symbols = bottom_df["symbol"].value_counts().head(20).index.tolist()
for sym in freq_symbols:
    mcap = latest_mcap[latest_mcap["symbol"] == sym]["marketCap"].values
    mcap_str = f"${mcap[0]/1e9:.1f}B" if len(mcap) > 0 and mcap[0] > 0 else "N/A"
    count = bottom_df[bottom_df["symbol"] == sym].shape[0]
    print(f"  {sym}: {count} days, mcap={mcap_str}")

Bottom-20 picks by market cap tier:
tier
Micro (<$300M)       1488
Small ($300M-$2B)     780
Mid ($2B-$10B)        745
Large (>$10B)         319
Unknown               228
Name: count, dtype: int64


Most frequent bottom-20 stocks with their market caps:
  SBET: 63 days, mcap=$2.5B
  FIG: 61 days, mcap=$47.4B
  RGC: 56 days, mcap=$8.4B
  DFDV: 36 days, mcap=$0.4B
  TLRY: 36 days, mcap=$1.5B
  CHAI: 35 days, mcap=$0.0B
  MLGO: 33 days, mcap=$0.0B
  NEGG: 31 days, mcap=$0.3B
  RUN: 31 days, mcap=$4.0B
  QUBT: 30 days, mcap=$3.0B
  UPXI: 29 days, mcap=$0.3B
  ABTC: 28 days, mcap=$4.0B
  ACON: 27 days, mcap=$0.0B
  MRNA: 27 days, mcap=$10.1B
  NCNA: 26 days, mcap=$0.0B
  NUKK: 26 days, mcap=$0.1B
  CHYM: 25 days, mcap=$7.5B
  SPRB: 25 days, mcap=$0.0B
  SMX: 25 days, mcap=N/A
  WBUY: 23 days, mcap=$0.0B


In [32]:
# Re-evaluate long-short with market cap filter
MIN_MCAP = 500_000_000  # $500M minimum

# Filter to shortable universe
shortable_symbols = set(latest_mcap[latest_mcap["marketCap"] >= MIN_MCAP]["symbol"])
test_shortable = test_df_mcap[test_df_mcap["symbol"].isin(shortable_symbols)].copy()

print(f"Shortable universe (mcap >= ${MIN_MCAP/1e6:.0f}M): {len(shortable_symbols):,} symbols")
print(f"Test rows: {len(test_df_mcap):,} -> {len(test_shortable):,} ({len(test_shortable)/len(test_df_mcap)*100:.1f}%)")

# Re-run long-short on shortable universe
def compute_long_short_returns_filtered(df, top_k=20, clip_pct=0.50):
    """Long-short with clipped returns on filtered universe."""
    df = df.copy()
    df["target_return_clipped"] = df["target_return"].clip(-clip_pct, clip_pct)
    
    returns = []
    for date, group in df.groupby("feature_date"):
        if len(group) < top_k * 2:
            continue
        
        top = group.nlargest(top_k, "score")
        long_ret = top["target_return_clipped"].mean()
        
        bottom = group.nsmallest(top_k, "score")
        short_ret = bottom["target_return_clipped"].mean()
        
        ls_ret = long_ret - short_ret
        mkt_ret = group["target_return_clipped"].mean()
        
        returns.append({
            "date": date,
            "long_return": long_ret,
            "short_return": short_ret,
            "long_short_return": ls_ret,
            "market_return": mkt_ret,
        })
    return pd.DataFrame(returns)

ls_shortable = compute_long_short_returns_filtered(test_shortable, top_k=20, clip_pct=0.50)

print(f"\nLong-Short on Shortable Universe (mcap >= $500M):")
print("=" * 50)
print(f"\n  Trading days: {len(ls_shortable)}")

print(f"\n  Long (top-20):")
print(f"    Cumulative: {((1 + ls_shortable['long_return']).cumprod().iloc[-1] - 1)*100:.1f}%")

print(f"\n  Short (bottom-20):")
print(f"    Cumulative: {((1 + ls_shortable['short_return']).cumprod().iloc[-1] - 1)*100:.1f}%")
print(f"    Short P&L: {((1 - ls_shortable['short_return']).cumprod().iloc[-1] - 1)*100:.1f}%")

print(f"\n  Long-Short:")
print(f"    Daily mean: {ls_shortable['long_short_return'].mean()*100:.3f}%")
print(f"    Sharpe: {ls_shortable['long_short_return'].mean() / ls_shortable['long_short_return'].std() * np.sqrt(252):.2f}")
print(f"    Cumulative: {((1 + ls_shortable['long_short_return']).cumprod().iloc[-1] - 1)*100:.1f}%")
print(f"    Win rate: {(ls_shortable['long_short_return'] > 0).mean()*100:.1f}%")

Shortable universe (mcap >= $500M): 2,861 symbols
Test rows: 352,213 -> 334,455 (95.0%)

Long-Short on Shortable Universe (mcap >= $500M):

  Trading days: 178

  Long (top-20):
    Cumulative: -20.6%

  Short (bottom-20):
    Cumulative: -49.4%
    Short P&L: 72.2%

  Long-Short:
    Daily mean: 0.222%
    Sharpe: 1.62
    Cumulative: 42.4%
    Win rate: 53.4%


In [33]:
# What stocks end up in the shortable bottom-20?
bottom_shortable = []
for date, group in test_shortable.groupby("feature_date"):
    if len(group) < 40:
        continue
    bottom = group.nsmallest(20, "score")
    bottom_shortable.append(bottom[["symbol", "feature_date", "target_return", "score", "marketCap"]])

bottom_shortable_df = pd.concat(bottom_shortable)

print("Most frequent bottom-20 stocks (shortable universe):")
freq = bottom_shortable_df["symbol"].value_counts().head(20)
for sym, count in freq.items():
    mcap = bottom_shortable_df[bottom_shortable_df["symbol"] == sym]["marketCap"].iloc[0]
    print(f"  {sym}: {count} days, mcap=${mcap/1e9:.1f}B")

print(f"\n\nReturn distribution of shortable bottom-20:")
print(bottom_shortable_df["target_return"].describe())

Most frequent bottom-20 stocks (shortable universe):
  SBET: 81 days, mcap=$2.5B
  FIG: 61 days, mcap=$47.4B
  RGC: 56 days, mcap=$8.4B
  TLRY: 53 days, mcap=$1.5B
  QUBT: 49 days, mcap=$3.0B
  AEVA: 43 days, mcap=$0.8B
  RUN: 40 days, mcap=$4.0B
  MRNA: 38 days, mcap=$10.1B
  ABTC: 38 days, mcap=$4.0B
  ARQQ: 36 days, mcap=$0.6B
  BEPC: 35 days, mcap=$5.9B
  PTON: 32 days, mcap=$3.7B
  PSKY: 31 days, mcap=$20.8B
  FTRE: 30 days, mcap=$0.8B
  ALAB: 30 days, mcap=$32.8B
  GRAL: 29 days, mcap=$2.1B
  AMN: 28 days, mcap=$0.7B
  PL: 27 days, mcap=$1.9B
  ASTS: 26 days, mcap=$16.6B
  CHYM: 26 days, mcap=$7.5B


Return distribution of shortable bottom-20:
count    3560.000000
mean       -0.004362
std         0.102958
min        -3.540930
25%        -0.031859
50%        -0.003293
75%         0.024884
max         1.343235
Name: target_return, dtype: float64


In [34]:
# Save model and config
torch.save({
    "model_state_dict": model.state_dict(),
    "config": config,
    "price_cols": price_feat_cols,
    "fund_cols": fund_feat_cols,
    "emb_cols": emb_cols,
}, "data/model_final.pt")

print("Model saved to data/model_final.pt")

Model saved to data/model_final.pt


In [35]:
# Training history
history_df = pd.DataFrame(history)
history_df

Unnamed: 0,epoch,train_loss,train_acc,val_loss,val_acc
0,1,0.692637,0.513108,0.692965,0.504324
1,2,0.692459,0.51603,0.69273,0.509986
2,3,0.692179,0.517811,0.692919,0.50677
3,4,0.691949,0.519894,0.692892,0.511132
4,5,0.691778,0.521649,0.693288,0.506512
5,6,0.691481,0.523973,0.693141,0.505377
6,7,0.691127,0.526862,0.693427,0.506365
7,8,0.690606,0.528435,0.693693,0.503291
8,9,0.690073,0.531941,0.693685,0.504475
9,10,0.689611,0.534544,0.693882,0.504464
