In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Imports

In [None]:
import os
import pandas as pd
import polars as pl
import numpy as np
from xgboost import XGBRegressor
import kaggle_evaluation.default_inference_server


## Model Training

In [None]:
# Load training data
train = pd.read_csv('/kaggle/input/hull-tactical-market-prediction/train.csv')

# Feature selection
features = [col for col in train.columns if col.startswith(('M', 'E', 'V', 'MOM'))]
X = train[features].fillna(0)
y = train['market_forward_excess_returns']

# Train XGBoost model
model = XGBRegressor(
    n_estimators=200,
    max_depth=9,
    learning_rate=0.05,
    subsample=0.8,
    random_state=42
)
model.fit(X, y)


## Define Prediction

In [None]:
def predict(test: pl.DataFrame) -> float:
    # Convert Polars to Pandas
    df = test.to_pandas()

    # Feature selection
    features = [col for col in df.columns if col.startswith(('M', 'E', 'V', 'MOM'))]
    X_test = df[features].fillna(0)

    # Predict excess return
    raw_pred = model.predict(X_test)[0]

    # Volatility-aware allocation
    volatility = np.std([raw_pred]) + 1e-6
    scaling = 1.2 / volatility
    allocation = np.clip(scaling * raw_pred, 0, 2)

    return allocation


## Inference Server

In [None]:
inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('/kaggle/input/hull-tactical-market-prediction/',))


## Public Visual set

In [None]:
# üìÅ Load public test set
test_df = pd.read_csv('/kaggle/input/hull-tactical-market-prediction/test.csv')

# üîç Feature selection
features = [col for col in test_df.columns if col.startswith(('M', 'E', 'V', 'MOM'))]
X_test = test_df[features].fillna(0)

# üìà Predict using trained model
raw_preds = model.predict(X_test)

# üìä Volatility-aware allocation
vol_window = 20
volatility_estimate = pd.Series(raw_preds).rolling(window=vol_window, min_periods=1).std().bfill()
target_vol = 1.2
scaling_factor = target_vol / (volatility_estimate + 1e-6)
allocations = np.clip(scaling_factor * raw_preds, 0, 2)

# ‚úÖ Create output DataFrame
output_df = pd.DataFrame({
    'date_id': test_df['date_id'],
    'market_forward_excess_returns': raw_preds,
    'allocation': allocations
})

# üíæ Save to /kaggle/working/
output_df.to_parquet('/kaggle/working/predictions.parquet', index=False)
output_df.to_csv('/kaggle/working/predictions.csv', index=False)


In [None]:
output_df