In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import lightgbm as lgbm
import kaggle_evaluation.default_inference_server

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
pd.set_option("display.max_columns", None)
import warnings
warnings.filterwarnings('ignore')

In [None]:
df_train = pd.read_csv('/kaggle/input/hull-tactical-market-prediction/train.csv')
df_test = pd.read_csv('/kaggle/input/hull-tactical-market-prediction/test.csv')
df_train


# Simply Throwing in LGBM model wasn't fit for high or low valitility setting, as we just simply listed out the allocation base and leveraged coefficient. Some improvements made in valitatility-normalized allocation


In [None]:


# ============================================================
# Feature Engineering, kinda random tho
# ============================================================

# def func_rolling(df):
#     """
#     Rolling features with relative normalization
#     """
#     # Relative deviation instead of raw rolling mean
#     roll_mean_20 = df['V7'].rolling(window=20, min_periods=5).mean()
#     df['V7_rel_20'] = df['V7'] / (roll_mean_20 + 1e-6)

#     # Rolling volatility proxy (for regime detection)
#     df['V7_vol_60'] = df['V7'].rolling(window=60, min_periods=10).std()

#     return df

def func_rolling(df): # fixed version
    # Relative deviation with safe fallback
    roll_mean_20 = df['V7'].rolling(window=20, min_periods=1).mean()
    df['V7_rel_20'] = df['V7'] / (roll_mean_20 + 1e-6)

    # Volatility proxy with safe fallback
    roll_std_60 = df['V7'].rolling(window=60, min_periods=1).std()
    df['V7_vol_60'] = roll_std_60.fillna(0.15)  # assume normal vol if unavailable

    return df
    
#Training phase

all_cols = df_train.columns
non_use_cols = [
    'date_id',
    'forward_returns',
    'risk_free_rate',
    'market_forward_excess_returns'
]

feature_cols = [c for c in all_cols if c not in non_use_cols]

# Apply feature engineering to training data
df_train = func_rolling(df_train)

# Update feature list (important!)
feature_cols = [c for c in feature_cols if c in df_train.columns]

X = df_train[feature_cols]
y = df_train['market_forward_excess_returns']

model_lgbm = lgbm.LGBMRegressor(
    objective="regression",
    random_state=42,
    n_estimators=100,
    learning_rate=0.05,
    num_leaves=31,
    verbose=-1
)

model_lgbm.fit(X, y)


# def predict(test_df):
#     test_df = test_df.to_pandas()
#     test_df = func_rolling(test_df)

#     X_test = test_df[feature_cols]

#     # ---- Prediction ----
#     pred = model_lgbm.predict(X_test)[0]

#     # ---- Prediction scaling (prevents spikes) ----
#     pred_scaled = np.tanh(pred / 0.01)

#     # ---- Volatility targeting ----
#     # Use forward returns if available, else fallback to V7 volatility
#     if 'forward_returns' in test_df.columns:
#         realized_vol = (
#             test_df['forward_returns']
#             .rolling(60, min_periods=10)
#             .std()
#             .iloc[-1]
#         )
#     else:
#         realized_vol = test_df['V7_vol_60'].iloc[-1]

#     realized_vol = max(realized_vol, 1e-3)
#     target_vol = 0.12
#     vol_adj = target_vol / realized_vol

#     # ---- Base allocation ----
#     allocation = vol_adj * (0.7 + 5.0 * pred_scaled)

#     # ---- High volatility regime throttle ----
#     vol_median = test_df['V7_vol_60'].rolling(120, min_periods=20).median().iloc[-1]
#     high_vol_regime = realized_vol > vol_median

#     if high_vol_regime:
#         allocation *= 0.7  # throttle exposure in high-vol regimes

#     # ---- Final safety clip ----
#     allocation = np.clip(allocation, 0.0, 2.0)

#     print(float(allocation))
#     return float(allocation)


# fixed one
# Voltaility-adjusted based on Market's Action

def predict(test_df):
    test_df = test_df.to_pandas()
    test_df = func_rolling(test_df)

    X_test = test_df[feature_cols]


    #Prediction
    pred = model_lgbm.predict(X_test)[0]


    pred_scaled = np.tanh(pred / 0.01)

    realized_vol = test_df['V7_vol_60'].iloc[-1]

    if not np.isfinite(realized_vol) or realized_vol <= 0:
        realized_vol = 0.15  # hard fallback

    target_vol = 0.12
    vol_adj = target_vol / realized_vol

    allocation = vol_adj * (0.7 + 5.0 * pred_scaled)

    vol_median = test_df['V7_vol_60'].median()

    if np.isfinite(vol_median) and realized_vol > vol_median:
        allocation *= 0.7

    allocation = np.nan_to_num(allocation, nan=1.0, posinf=2.0, neginf=0.0)
    allocation = np.clip(allocation, 0.0, 2.0)

    return float(allocation)


# ============================================================
# Kaggle Inference Server
# ============================================================

inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

if os.getenv("KAGGLE_IS_COMPETITION_RERUN"):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        ("/kaggle/input/hull-tactical-market-prediction/",)
    )
