### Import and Flag

In [1]:
import os
import pandas as pd
import polars as pl
import logging
import numpy as np

from IPython.display import HTML, display


# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

### Environment Setup ###
IS_KAGGLE = False  # Flag to switch between environments

### Submit

In [4]:
# Setup paths based on environment
if IS_KAGGLE:
    data_path = '/kaggle/input/jane-street-real-time-market-data-forecasting'
else:
    # For local testing
    import sys
    sys.path.append(os.path.join(os.path.dirname(os.getcwd()), "jane-street-real-time-market-data-forecasting"))
    data_path = os.path.join(os.path.dirname(os.getcwd()), "jane-street-real-time-market-data-forecasting")
    
    # Import local testing metrics
    sys.path.append(os.path.join(os.path.dirname(os.getcwd()), "training", "src", "utils"))
    from metrics import r2_score_weighted

import kaggle_evaluation.jane_street_inference_server

# Global variable to store lags
lags_ : pl.DataFrame | None = None

def predict(test: pl.DataFrame, lags: pl.DataFrame | None) -> pl.DataFrame | pd.DataFrame:
    """Make a prediction."""
    global lags_
    
    logger.info(f"Received test data with shape: {test.shape}")
    if lags is not None:
        lags_ = lags
        logger.info(f"Received lags data with shape: {lags_.shape}")
        
    # Simple prediction
    predictions = test.select(
        'row_id',
        pl.lit(0.0).alias('responder_6'),
    )    
    
    logger.info(f"Generated predictions with shape: {predictions.shape}")
    
    # Log score if we have actual values
    if 'responder_6' in test.columns and 'weight' in test.columns:
        weights = test['weight'].to_numpy()
        y_true = test['responder_6'].to_numpy()
        y_pred = predictions['responder_6'].to_numpy()
        
        # Calculate weighted R2
        numerator = np.sum(weights * (y_true - y_pred) ** 2)
        denominator = np.sum(weights * y_true ** 2)
        r2 = 1 - numerator / denominator
        logger.info(f"Batch weighted R2 score: {r2:.4f}")

    # The predict function must return a DataFrame
    assert isinstance(predictions, pl.DataFrame | pd.DataFrame)
    # with columns 'row_id', 'responer_6'
    assert predictions.columns == ['row_id', 'responder_6']
    # and as many rows as the test data.
    assert len(predictions) == len(test)

    return predictions

# Set up inference server
inference_server = kaggle_evaluation.jane_street_inference_server.JSInferenceServer(predict)

# Run based on environment
if os.getenv('KAGGLE_IS_COMPETITION_RERUN') or IS_KAGGLE:
    inference_server.serve()
else:
    logger.info("Starting local gateway...")
    inference_server.run_local_gateway(
        (
            os.path.join(data_path, "test.parquet"),
            os.path.join(data_path, "lags.parquet"),
        )
    )
    
    # Add local scoring here
    logger.info("Scoring submission...")
    # Read submission predictions
    predictions = pl.read_parquet("submission.parquet")
    # Read test data with actual values
    test_data = pl.read_parquet(os.path.join(data_path, "test.parquet"))
    
    # Score only rows marked for scoring
    mask = test_data['is_scored']
    score = r2_score_weighted(
        test_data.filter(mask)['responder_6'].to_numpy(),
        predictions.filter(mask)['responder_6'].to_numpy(),
        test_data.filter(mask)['weight'].to_numpy()
    )
    logger.info(f"Overall R² score: {score:.4f}")

INFO:__main__:Starting local gateway...
INFO:__main__:Received test data with shape: (39, 85)
INFO:__main__:Received lags data with shape: (39, 12)
INFO:__main__:Generated predictions with shape: (39, 2)
INFO:__main__:Scoring submission...


ColumnNotFoundError: "responder_6" not found