In [1]:
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/hull-tactical-market-prediction/train.csv
/kaggle/input/hull-tactical-market-prediction/test.csv
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/default_inference_server.py
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/default_gateway.py
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/__init__.py
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/core/templates.py
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/core/base_gateway.py
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/core/relay.py
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/core/kaggle_evaluation.proto
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/core/__init__.py
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/core/generated/kaggle_evaluation_pb2.py
/kaggle/input/hull-tactical-market-prediction/kaggle_evaluation/core/generated/kaggle_evaluation_pb2_grpc.py
/kaggl

In [2]:
import os
import polars as pl
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
import kaggle_evaluation.default_inference_server

# 1. Load Training Data
train_path = "/kaggle/input/hull-tactical-market-prediction/train.csv"
df_train = pl.read_csv(train_path)

# 2. Preprocessing
ignore_cols = ['date_id', 'forward_returns', 'market_forward_excess_returns', 'risk_free_rate']
feature_cols = [c for c in df_train.columns if c not in ignore_cols]
target_col = 'market_forward_excess_returns'

X_pandas = df_train.select(feature_cols).to_pandas()
y = df_train.select(target_col).to_pandas().values.ravel()

# 3. Imputation (Mean Strategy)
imputer = SimpleImputer(strategy='mean')
print("Imputing missing values with mean...")
X_imputed = imputer.fit_transform(X_pandas)

# 4. Calculate Scale Factor for Betting Strategy
# Set a scale factor to map small return predictions (e.g., 0.001) to weights.
# Use the inverse of the target's standard deviation.
# If the prediction moves 1 std dev, the weight adjusts significantly.
target_std = np.std(y)
scale_factor = 1.0 / target_std if target_std != 0 else 100.0
print(f"Calculated Scale Factor: {scale_factor:.4f} (based on target std: {target_std:.6f})")

# 5. Time-Series Cross-Validation
tscv = TimeSeriesSplit(n_splits=5)
cv_scores = []

print("Starting Time-Series Cross-Validation...")
for fold, (train_index, val_index) in enumerate(tscv.split(X_imputed)):
    X_train_cv, X_val_cv = X_imputed[train_index], X_imputed[val_index]
    y_train_cv, y_val_cv = y[train_index], y[val_index]
    
    model_cv = Ridge(alpha=1.0)
    model_cv.fit(X_train_cv, y_train_cv)
    
    preds = model_cv.predict(X_val_cv)
    mse = mean_squared_error(y_val_cv, preds)
    cv_scores.append(mse)
    print(f"Fold {fold+1} MSE: {mse:.6f}")

print(f"Average CV MSE: {np.mean(cv_scores):.6f}")

# 6. Final Model Training
final_model = Ridge(alpha=1.0)
final_model.fit(X_imputed, y)
print("Final model training completed.")

# 7. Inference Function with Sigmoid Betting Strategy
def predict(test: pl.DataFrame) -> float:
    """
    Inference function that returns a continuous weight between 0 and 2.
    """
    try:
        X_test_pandas = test.select(feature_cols).to_pandas()
        X_test_imputed = imputer.transform(X_test_pandas)
        
        # Predict excess return
        pred_return = final_model.predict(X_test_imputed)[0]
        
        # Sigmoid Betting Strategy
        # (-inf, +inf) -> (0, 2)
        # 0 prediction -> weight 1.0
        sigmoid_value = 1 / (1 + np.exp(-pred_return * scale_factor))
        weight = 2.0 * sigmoid_value
        print(pred_return, float(weight))
        
        return float(weight)
        
    except Exception as e:
        return 0.0

# 8. Initialize Inference Server
inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('/kaggle/input/hull-tactical-market-prediction/',))

Imputing missing values with mean...
Calculated Scale Factor: 94.7184 (based on target std: 0.010558)
Starting Time-Series Cross-Validation...
Fold 1 MSE: 0.000356
Fold 2 MSE: 0.000186
Fold 3 MSE: 0.000235
Fold 4 MSE: 0.000104
Fold 5 MSE: 0.000139
Average CV MSE: 0.000204
Final model training completed.
0.0011803336631922567 1.0558415310037341
0.001683840211316622 1.0795767476412121
0.001802073400168665 1.0851381849914468
0.0010346218103822885 1.048959707068875
0.001838009680095008 1.0868275164640275
0.0019351142789416848 1.0913897903241254
0.0015624068955389725 1.0738596248941756
0.0018598702531310018 1.0878549183107828
0.001097312142564772 1.051921116435276
0.0008492446102450662 1.0401978907419143
