# Neural Network Volatility Prediction

This notebook implements a neural network approach for volatility prediction using PyTorch.

## 1. Import Libraries and Setup

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully")

Libraries imported successfully


## 2. Load and Prepare Data

In [2]:
# Load data
print("Loading data...")
train = pd.read_parquet('train_data.parquet')
test = pd.read_parquet('test_data.parquet')
sample_sub = pd.read_csv('sample_submission.csv')

print(f"\nTrain data shape: {train.shape}")
print(f"Test data shape: {test.shape}")
print(f"Sample submission shape: {sample_sub.shape}")

# Get all IV columns from TEST data
iv_columns = [col for col in test.columns if col.startswith(('call_iv_', 'put_iv_'))]
print(f"\nNumber of IV columns: {len(iv_columns)}")

# Create strike dictionary from TEST columns
strike_dict = {}
for col in iv_columns:
    strike = col.split('_')[-1]
    if strike not in strike_dict:
        strike_dict[strike] = {'call': None, 'put': None}
    
    if col.startswith('call_iv_'):
        strike_dict[strike]['call'] = col
    else:
        strike_dict[strike]['put'] = col

print(f"\nNumber of unique strikes: {len(strike_dict)}")

Loading data...

Train data shape: (178340, 97)
Test data shape: (12065, 96)
Sample submission shape: (12065, 53)

Number of IV columns: 52

Number of unique strikes: 36


## 3. Define Neural Network Model

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim):
        super(NeuralNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        return self.model(x)

print("Neural Network model defined successfully")

Neural Network model defined successfully


## 4. Feature Engineering

In [10]:
def create_features(df):
    """Create features for neural network"""
    print("Creating features...")
    features = df.copy()
    
    # Convert timestamps to datetime and calculate time to expiry in days
    features['timestamp'] = pd.to_datetime(features['timestamp'])
    features['expiry'] = pd.to_datetime(features['expiry'])
    features['time_to_expiry'] = (features['expiry'] - features['timestamp']).dt.total_seconds() / (24 * 3600)  # Convert to days
    
    # Basic features
    features['log_underlying'] = np.log(features['underlying'])
    features['underlying_returns'] = features['underlying'].pct_change()
    features['underlying_volatility'] = features['underlying_returns'].rolling(window=20).std()
    
    # Technical indicators
    windows = [5, 10, 20]
    for window in windows:
        features[f'underlying_ma_{window}'] = features['underlying'].rolling(window=window).mean()
        features[f'underlying_std_{window}'] = features['underlying'].rolling(window=window).std()
        features[f'underlying_skew_{window}'] = features['underlying_returns'].rolling(window=window).skew()
        features[f'underlying_kurt_{window}'] = features['underlying_returns'].rolling(window=window).kurt()
    
    # Moneyness features
    for strike in strike_dict.keys():
        strike_price = float(strike)
        features[f'moneyness_{strike}'] = features['underlying'] / strike_price
        features[f'log_moneyness_{strike}'] = np.log(features['underlying'] / strike_price)
    
    # Time features
    features['time_to_expiry_squared'] = features['time_to_expiry'] ** 2
    features['time_to_expiry_log'] = np.log1p(features['time_to_expiry'])
    
    # Drop datetime columns
    features = features.drop(['timestamp', 'expiry'], axis=1)
    
    # Fill missing values
    features = features.fillna(method='ffill').fillna(method='bfill').fillna(0)
    
    print(f"Created {len(features.columns)} features")
    return features

# Test feature creation on a small sample
sample_features = create_features(train.head(100))
print(f"\nSample features shape: {sample_features.shape}")
print("\nFeature columns:")
print(sample_features.columns.tolist())

Creating features...
Created 185 features

Sample features shape: (100, 185)

Feature columns:
['underlying', 'call_iv_23500', 'call_iv_23600', 'call_iv_23700', 'call_iv_23800', 'call_iv_23900', 'call_iv_24000', 'call_iv_24100', 'call_iv_24200', 'call_iv_24300', 'call_iv_24400', 'call_iv_24500', 'call_iv_24600', 'call_iv_24700', 'call_iv_24800', 'call_iv_24900', 'call_iv_25000', 'call_iv_25100', 'call_iv_25200', 'call_iv_25300', 'call_iv_25400', 'call_iv_25500', 'call_iv_25600', 'call_iv_25700', 'call_iv_25800', 'call_iv_25900', 'call_iv_26000', 'put_iv_22500', 'put_iv_22600', 'put_iv_22700', 'put_iv_22800', 'put_iv_22900', 'put_iv_23000', 'put_iv_23100', 'put_iv_23200', 'put_iv_23300', 'put_iv_23400', 'put_iv_23500', 'put_iv_23600', 'put_iv_23700', 'put_iv_23800', 'put_iv_23900', 'put_iv_24000', 'put_iv_24100', 'put_iv_24200', 'put_iv_24300', 'put_iv_24400', 'put_iv_24500', 'put_iv_24600', 'put_iv_24700', 'put_iv_24800', 'put_iv_24900', 'put_iv_25000', 'X0', 'X1', 'X2', 'X3', 'X4', 'X

## 5. Model Training Function

In [11]:
def train_model(model, train_loader, val_loader, num_epochs=100, patience=10):
    """Train the model with early stopping"""
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    best_val_loss = float('inf')
    patience_counter = 0
    best_model_state = None
    
    print("\nStarting training...")
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                val_loss += criterion(outputs, batch_y).item()
        
        # Print progress
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss/len(train_loader):.6f} - Val Loss: {val_loss/len(val_loader):.6f}")
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_model_state = model.state_dict().copy()
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"\nEarly stopping at epoch {epoch+1}")
                break
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    return model

## 6. Prediction Function

In [12]:
def predict_iv(data):
    print("Starting IV prediction...")
    data = data.copy()
    
    # Phase 1: Put-call parity
    print("\nPhase 1: Applying put-call parity...")
    for strike, cols in strike_dict.items():
        call_col = cols['call']
        put_col = cols['put']
        
        if call_col in data.columns and put_col in data.columns:
            call_mask = data[call_col].isna() & data[put_col].notna()
            data.loc[call_mask, call_col] = data.loc[call_mask, put_col]
            
            put_mask = data[put_col].isna() & data[call_col].notna()
            data.loc[put_mask, put_col] = data.loc[put_mask, call_col]
    
    # Phase 2: Neural network prediction
    print("\nPhase 2: Creating features and training models...")
    features = create_features(data)
    feature_cols = [col for col in features.columns if col not in ['timestamp'] + iv_columns]
    
    scaler = StandardScaler()
    X = scaler.fit_transform(features[feature_cols])
    X_tensor = torch.FloatTensor(X)
    
    models = {}
    for col in iv_columns:
        print(f"\nTraining model for {col}...")
        y = data[col].values
        y_tensor = torch.FloatTensor(y).reshape(-1, 1)
        
        train_size = int(0.8 * len(X_tensor))
        train_dataset = TensorDataset(X_tensor[:train_size], y_tensor[:train_size])
        val_dataset = TensorDataset(X_tensor[train_size:], y_tensor[train_size:])
        
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=32)
        
        model = NeuralNetwork(X.shape[1])
        model = train_model(model, train_loader, val_loader)
        models[col] = model
    
    # Phase 3: Make predictions
    print("\nPhase 3: Making predictions...")
    for col in iv_columns:
        mask = data[col].isna()
        if mask.any():
            X_pred = scaler.transform(features.loc[mask, feature_cols])
            X_pred_tensor = torch.FloatTensor(X_pred)
            
            models[col].eval()
            with torch.no_grad():
                predictions = models[col](X_pred_tensor).numpy()
            
            data.loc[mask, col] = predictions.flatten()
    
    # Phase 4: Smoothing and consistency
    print("\nPhase 4: Applying smoothing and consistency checks...")
    for idx, row in data.iterrows():
        for strike, cols in strike_dict.items():
            call_col = cols['call']
            put_col = cols['put']
            
            if call_col in data.columns and put_col in data.columns:
                avg_iv = (data.at[idx, call_col] + data.at[idx, put_col]) / 2
                data.at[idx, call_col] = 0.9 * data.at[idx, call_col] + 0.1 * avg_iv
                data.at[idx, put_col] = 0.9 * data.at[idx, put_col] + 0.1 * avg_iv
    
    # Ensure all values are within reasonable bounds
    for col in iv_columns:
        if col in data.columns:
            data[col] = np.clip(data[col], 0.01, 1.0)
    
    print("\nPrediction completed successfully")
    return data

## 7. Validation and Testing

In [None]:
# Create validation split
print("Creating validation split...")
train_df, val_df = train_test_split(train, test_size=0.2, random_state=42)
print(f"Training set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")

# Apply to validation set
print("\nRunning validation...")
val_pred = predict_iv(val_df)

# Calculate MSE only on originally masked validation points
mse_vals = []
for col in iv_columns:
    if col in val_df.columns and col in val_pred.columns:
        mask = val_df[col].isna() & val_pred[col].notna()
        if mask.any():
            se = (val_df.loc[mask, col] - val_pred.loc[mask, col]) ** 2
            mse_vals.append(se.mean())

validation_mse = np.mean(mse_vals) if mse_vals else 0
print(f"\nValidation MSE (masked points only): {validation_mse:.12f}")

Creating validation split...
Training set size: 142672
Validation set size: 35668

Running validation...
Starting IV prediction...

Phase 1: Applying put-call parity...

Phase 2: Creating features and training models...
Creating features...
Created 185 features

Training model for call_iv_24000...
Using device: cpu

Starting training...
Created 185 features

Training model for call_iv_24000...
Using device: cpu

Starting training...
Epoch 10/100 - Train Loss: 0.000456 - Val Loss: 2027414.529348
Epoch 10/100 - Train Loss: 0.000456 - Val Loss: 2027414.529348

Early stopping at epoch 11

Training model for call_iv_24100...
Using device: cpu

Starting training...

Early stopping at epoch 11

Training model for call_iv_24100...
Using device: cpu

Starting training...
Epoch 10/100 - Train Loss: 0.000249 - Val Loss: 0.000104
Epoch 10/100 - Train Loss: 0.000249 - Val Loss: 0.000104
Epoch 20/100 - Train Loss: 0.000203 - Val Loss: 0.000070
Epoch 20/100 - Train Loss: 0.000203 - Val Loss: 0.000070

## 8. Generate Final Predictions

In [None]:
# Apply to test set
print("Generating final predictions...")
test_pred = predict_iv(test)

# Prepare submission
submission = test_pred[['timestamp'] + iv_columns].copy()
submission.columns = sample_sub.columns

# Verify no missing values
assert submission.isna().sum().sum() == 0, "Missing values detected"
submission.to_csv('submission.csv', index=False)

print("\nFinal Submission Preview:")
print(submission.head())
print(f"\nSubmission shape: {submission.shape}")
print(f"Validation MSE: {validation_mse:.12f}")

Generating final predictions...
Starting IV prediction...

Phase 1: Applying put-call parity...

Phase 2: Creating features and training models...
Creating features...
Created 183 features

Training model for call_iv_24000...
Using device: cpu

Starting training...


KeyboardInterrupt: 