Published on September 26, 2025. By Prata, MarÃ­lia (mpwolke)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

#Two lines Required to Plot Plotly
import plotly.io as pio
pio.renderers.default = 'iframe'

import plotly.graph_objs as go
import plotly.offline as py
import plotly.express as px

#Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

![](https://www.levels.fyi/api/company-og-generator?host=https%3A%2F%2Fwww.levels.fyi&meta=Overview&company=Hull+Tactical&logo=https%3A%2F%2Fimg.logo.dev%2Fhulltactical.com%3Ftoken%3Dpk_Ez-J4YOpSS-Bjtug_T41Dw)Levels.fyi

## When to re-enter the market: 

"Try and time the market" describes the investment strategy of trying to predict when the market is at a high or low point to buy or sell investments accordingly, aiming to profit from these short-term price movements."

"**Market timing** requires two correct decisions: when to sell and **when to re-enter the market**." 

https://tribeimpactcapital.com/impact-hub/time-in-the-market-beats-timing-the-market/#:~:text=Trying%20to%20predict%20exactly%20when,markets%20will%20continue%20climbing%20indefinitely.

"Your task is to predict the stock market returns as represented by the excess returns of the S&P 500 while also managing volatility constraints. Your work will test the Efficient Market Hypothesis and challenge common tenets of personal finance."

https://www.kaggle.com/competitions/hull-tactical-market-prediction/overview

## Competition Citation

@misc{hull-tactical-market-prediction,
    author = {Blair Hull and Petra Bakosova and Laurent Lanteigne and Aishvi Shah and Euan C Sinclair and Petri Fast and Will Raj and Harold Janecek and Sohier Dane and Addison Howard},
    title = {Hull Tactical - Market Prediction},    
    year = {2025},    
    howpublished = {\url{https://kaggle.com/competitions/hull-tactical-market-prediction}},
    note = {Kaggle}
}

## test file

date_id

[feature_name] - The feature columns are the same as in train.csv.

is_scored - Whether this row is included in the evaluation metric calculation. During the model training phase this will be true for the first 180 rows only. Test set only.

lagged_forward_returns - The returns from buying the S&P 500 and selling it a day later, provided with a lag of one day.

lagged_risk_free_rate - The federal funds rate, provided with a lag of one day.

lagged_market_forward_excess_returns - Forward returns relative to expectations. Computed by subtracting the rolling five-year mean forward returns and winsorizing the result using a median absolute deviation (MAD) with a criterion of 4, provided with a lag of one day.

https://www.kaggle.com/competitions/hull-tactical-market-prediction/data

In [None]:
test = pd.read_csv('/kaggle/input/hull-tactical-market-prediction/test.csv')
test.tail(3)

## train file

date_id - An identifier for a single trading day.

M* - Market Dynamics/Technical features.

E* - Macro Economic features.

I* - Interest Rate features.

P* - Price/Valuation features.

V* - Volatility features.

S* - Sentiment features.

MOM* - Momentum features.

D* - Dummy/Binary features.

forward_returns - The returns from buying the S&P 500 and selling it a day later. Train set only.
risk_free_rate - The federal funds rate. Train set only.

market_forward_excess_returns - Forward returns relative to expectations. Computed by subtracting the rolling five-year mean forward returns and winsorizing the result using a median absolute deviation (MAD) with a criterion of 4. Train set only.

In [None]:
train = pd.read_csv('/kaggle/input/hull-tactical-market-prediction/train.csv')
train.tail(3)

### info method()

In [None]:
train.info()

In [None]:
test.info()

## Describe method

In [None]:
#Describe showing Only the requested statistics (mean, minimum and maximum). Then, transpose the table.

train.describe().loc[['mean','min','max']].T

In [None]:
test.describe().loc[['mean','min','max']].T

## Missing values

In [None]:
print("train missing values:")
print(train.isnull().sum()[train.isnull().sum() > 0])

In [None]:
#By Shreyansh Dangi  https://www.kaggle.com/code/shreyanshdangi/data-cleaning-eda-using-clustering-algorithm

missing_values = train.isnull().sum()
missing_values = missing_values[missing_values > 0]

missing_percentage = (missing_values / len(train)) * 100

plt.figure(figsize=(10, 5))
ax = missing_values.plot(kind="bar", color="orange", edgecolor="black")

ax.set_ylim(0, 17000)

for p, perc in zip(ax.patches, missing_percentage):
    ax.text(
        p.get_x() + p.get_width() / 2, 
        p.get_height() + 250,             
        f'{perc:.1f}%',              
        ha='center', 
        fontsize=7.5,
        rotation=90
    )

plt.title("Missing Values per Column (Count) with %", fontsize=16)
plt.xlabel("Columns", fontsize=12)
plt.ylabel("Number of Missing Values", fontsize=12)
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

## select_dtypes (include=['int64','float64'])

In [None]:
train.select_dtypes(include=['int64','float64']).columns

## Fillna of numerical columns with 0 (zero).

In [None]:
for col in  ('date_id', 'D1', 'D2', 'D3', 'D4', 'D5', 'D6', 'D7', 'D8', 'D9', 'E1',
       'E10', 'E11', 'E12', 'E13', 'E14', 'E15', 'E16', 'E17', 'E18', 'E19',
       'E2', 'E20', 'E3', 'E4', 'E5', 'E6', 'E7', 'E8', 'E9', 'I1', 'I2', 'I3',
       'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'M1', 'M10', 'M11', 'M12', 'M13',
       'M14', 'M15', 'M16', 'M17', 'M18', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7',
       'M8', 'M9', 'P1', 'P10', 'P11', 'P12', 'P13', 'P2', 'P3', 'P4', 'P5',
       'P6', 'P7', 'P8', 'P9', 'S1', 'S10', 'S11', 'S12', 'S2', 'S3', 'S4',
       'S5', 'S6', 'S7', 'S8', 'S9', 'V1', 'V10', 'V11', 'V12', 'V13', 'V2',
       'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'forward_returns',
       'risk_free_rate', 'market_forward_excess_returns'):
    train[col] = train[col].fillna(0)

In [None]:
#Show the rows that contain the missing values. 

train[train.isna().any(axis=1)]

In [None]:
#Code by  https://www.kaggle.com/kartushovdanil/ubiquant-market-prediction-eda

from pathlib import Path
import random
import tqdm

from argparse import Namespace
import random
import os
import gc

from cycler import cycler

In [None]:
inv_ids = random.choices(train['market_forward_excess_returns'].unique(), k=3)

In [None]:
#Code by  https://www.kaggle.com/kartushovdanil/ubiquant-market-prediction-eda

plt.rcParams['figure.dpi'] = 600
fig = plt.figure(figsize=(10, 10), facecolor='#f6f5f5')
gs = fig.add_gridspec(5, 5)
gs.update(wspace=0.3, hspace=0.3)
background_color = '#f6f5f5'
run_no = 0

colormap = ['#1DBA94','#1C5ED2', '#FFC300', '#C70039']
plt.rc('axes', prop_cycle=(cycler('color', colormap)))

for row in range(0, 5):
    for col in range(0, 5):
        locals()["ax"+str(run_no)] = fig.add_subplot(gs[row, col])
        locals()["ax"+str(run_no)].set_facecolor(background_color)
        for s in ["top","right"]:
            locals()["ax"+str(run_no)].spines[s].set_visible(False)
        run_no += 1  


features = list(train.columns[0:25]) #columns 0 till 25 

run_no = 0
for col in features:
    sns.kdeplot(ax=locals()["ax"+str(run_no)], x=train[col], zorder=2, alpha=1, linewidth=1, color='#ffd514')
    sns.kdeplot(ax=locals()["ax"+str(run_no)], x=train[train['market_forward_excess_returns'].isin(inv_ids)][col], hue=train[train['market_forward_excess_returns'].isin(inv_ids)]['market_forward_excess_returns'],zorder=2, alpha=1, fill=True, color=colormap, linewidth=0.5, legend=False, hue_order=inv_ids.sort(reverse=True))
    
    locals()["ax"+str(run_no)].grid(which='major', axis='x', zorder=0, color='#EEEEEE', linewidth=0.4)
    locals()["ax"+str(run_no)].grid(which='major', axis='y', zorder=0, color='#EEEEEE', linewidth=0.4)
    locals()["ax"+str(run_no)].set_ylabel('')
    locals()["ax"+str(run_no)].set_xlabel(col, fontsize=4, fontweight='bold')
    locals()["ax"+str(run_no)].tick_params(labelsize=4, width=0.5)
    locals()["ax"+str(run_no)].xaxis.offsetText.set_fontsize(4)
    locals()["ax"+str(run_no)].yaxis.offsetText.set_fontsize(4)
    #locals()["ax"+str(run_no)].get_legend().remove()
    
    run_no += 1

plt.show()

## Target (market_forward_excess_returns) histogram

Normal distribution.

In [None]:
plt.figure(figsize = (2,2))
ax = sns.distplot(train['market_forward_excess_returns'])
#plt.xlim(-3,3)
plt.xlabel("Histogram of Target", size=6)
plt.show();
gc.collect()

## Draft Session: Almost 3hs since I re-started it many times.

Just to take an Overview of our Hull Tactical data.

#Acknowledgements:

Shreyansh Dangi  https://www.kaggle.com/code/shreyanshdangi/data-cleaning-eda-using-clustering-algorithm

Torch me https://www.kaggle.com/kartushovdanil/ubiquant-market-prediction-eda