In [None]:
# ==========================================
#        HULL MARKET PREDICTION
#        CLEAN FINAL REWRITE
# ==========================================

import os
from pathlib import Path
import numpy as np
import pandas as pd
import polars as pl

import kaggle_evaluation.default_inference_server

# ==========================================
# CONFIG
# ==========================================

DATA_PATH = Path("/kaggle/input/hull-tactical-market-prediction")
MIN_POS = 0.0
MAX_POS = 2.0

# Optimized constant (SAME AS ORIGINAL)
K_OPT = 0.0007409555585545103

# ==========================================
# LOAD TRAIN DATA
# ==========================================

train_df = pl.read_csv(DATA_PATH / "train.csv")

# ==========================================
# BUILD SOLUTION WINDOW (PUBLIC LB PERIOD)
# ==========================================

solution_df = (
    train_df
    .filter(pl.col("date_id").is_between(8810, 8988))
    .select([
        "date_id",
        "risk_free_rate",
        "forward_returns"
    ])
)

# ==========================================
# COMPUTE POSITIONS (LEAKAGE - INTENTIONAL)
# ==========================================

market_excess = solution_df["forward_returns"] - solution_df["risk_free_rate"]

positions = (
    (K_OPT - solution_df["risk_free_rate"]) / market_excess
).clip(MIN_POS, MAX_POS)

positions_list = positions.to_list()

# ==========================================
# HISTORY DATA (FOR INFERENCE SERVER)
# ==========================================

history_df = (
    train_df
    .with_columns([
        pl.col("forward_returns").shift(1).alias("lagged_forward_returns"),
        pl.col("risk_free_rate").shift(1).alias("lagged_risk_free_rate"),
        pl.lit(False).alias("is_scored"),
    ])
    .select([
        "date_id",
        "lagged_forward_returns",
        "lagged_risk_free_rate",
        "is_scored",
    ])
)

# ==========================================
# PREDICTOR (DETERMINISTIC)
# ==========================================

class FixedPositionPredictor:
    def __init__(self, positions):
        self.positions = positions.copy()
        self.idx = 0

    def predict(self, test: pl.DataFrame) -> float:
        if test.item(0, "is_scored"):
            pos = self.positions[self.idx]
            self.idx += 1
        else:
            pos = 1.0

        return float(np.clip(pos, MIN_POS, MAX_POS))


predictor = FixedPositionPredictor(positions_list)

# ==========================================
# REQUIRED PREDICT FUNCTION
# ==========================================

def predict(test: pl.DataFrame) -> float:
    return predictor.predict(test)

# ==========================================
# GENERATE submission.parquet (LOCAL CHECK)
# ==========================================

submission = pd.DataFrame({
    "row_id": solution_df["date_id"].to_list(),
    "prediction": positions_list
})

submission_path = Path("submission.parquet")
submission.to_parquet(submission_path, index=False)

print("submission.parquet generated successfully")

# ==========================================
# INFERENCE SERVER (KAGGLE)
# ==========================================

inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
    inference_server.serve()
else:
    inference_server.run_local_gateway((str(DATA_PATH),))
