## 1) Load libraries

Start by importing all the essential libraries.

In [1]:
import xarray as xr
import pandas as pd
import numpy as np

# Importar bibliotecas de Quantiacs.
import qnt.data as qndata  # Cargar y manipular datos.
import qnt.output as qnout  # Manejar la salida de resultados.
import qnt.stats as qnstats  # Funciones estadísticas para análisis.
import qnt.graph as qngraph  # Herramientas gráficas para visualización.
import qnt.ta as qnta  # Biblioteca de indicadores técnicos.

# ML
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier


import dask.array as da

## 2) Data

The variable **qndata.stocks.load_spx_data(tail=period)** is an xarray.DataArray structure which contains historical market data for the last (tail=period) days and whose coordinates are:

* **time**: a date in format yyyy-mm-dd;
* **field**: an attribute, for example the opening daily price;
* **asset**: the identifying symbol for the asset, for example NAS:APPL for Apple.

![data_example](./img/data.jpg)

Load daily stock data for the Q22 S&P500 contest


In [2]:
data = qndata.stocks.load_spx_data(min_date="2005-06-01")

close = data.sel(field="close").to_dataframe()
print(close.head())
print(close.columns)

fetched chunk 1/13 0s
fetched chunk 2/13 0s
fetched chunk 3/13 0s
fetched chunk 4/13 0s
fetched chunk 5/13 0s
fetched chunk 6/13 0s
fetched chunk 7/13 0s
fetched chunk 8/13 0s
fetched chunk 9/13 1s
fetched chunk 10/13 1s
fetched chunk 11/13 1s
fetched chunk 12/13 1s
fetched chunk 13/13 1s
Data loaded 1s
                     field  stocks_s&p500
time       asset                         
2005-06-01 NAS:AAL   close            NaN
           NAS:AAPL  close         1.4393
           NAS:ABNB  close            NaN
           NAS:ACGL  close         4.8467
           NAS:ADBE  close        33.1800
Index(['field', 'stocks_s&p500'], dtype='object')


In [3]:
sma_20 = close.groupby(level='asset')['stocks_s&p500'].transform(lambda x: qnta.sma(x, 20))  # Media móvil de 20 días
sma_50 = close.groupby(level='asset')['stocks_s&p500'].transform(lambda x: qnta.sma(x, 50))  # Media móvil de 50 días
returns = close.groupby(level='asset')['stocks_s&p500'].transform(lambda x: x.pct_change(fill_method=None).shift(-1))  # Rendimiento del día siguiente

# Crear un DataArray para el objetivo (Target).
target = (returns > 0).astype(int)  # 1 si el rendimiento es positivo, 0 si negativo.

# Concatenar los datos en un nuevo DataArray para la entrada del modelo.
sma_20_array = sma_20.to_xarray()
sma_50_array = sma_50.to_xarray()
features = xr.concat([sma_20_array, sma_50_array], dim='feature')  # Concatenar las características.

# Asegúrate de que las dimensiones sean correctas para el modelo.
features = features.transpose('time', 'feature', 'asset')  # Transponer para que 'time' sea la primera dimensión.

# Dividir los datos en conjuntos de entrenamiento y prueba.
min_length = min(len(features), len(target))
features = features[:min_length]
target = target[:min_length]

features_reshaped = features.values.reshape(features.shape[0], -1)
X_train, X_test, y_train, y_test = train_test_split(features_reshaped, target.values, test_size=0.2, random_state=42)


In [4]:
# Crear un imputador que reemplace NaN con la media de la columna
imputer = SimpleImputer(strategy='mean')

In [5]:
# Modelo de regresion logisctica (precision 0.60, Sharpe ratio 0.5364004217530096)
pipeline = Pipeline([
    ('imputer', imputer),
    ('scaler', StandardScaler()),  # Escalar los datos
    ('classifier', LogisticRegression(solver='lbfgs', max_iter=500000))  # Aumentar iteraciones
])

In [None]:
# Gradient Boosting (Precision 0.60, Sharpe ratio 0.32655346880643144)
#pipeline = Pipeline([
#    ('imputer', imputer),
#    ('scaler', StandardScaler()),
#    ('classifier', GradientBoostingClassifier(random_state=42))
#])

In [None]:
# Redes Neuronales (Precision 0.60, Sharpe ratio 0.5364004217530096)
#pipeline = Pipeline([
#    ('imputer', imputer),
#    ('scaler', StandardScaler()),
#    ('classifier', MLPClassifier(random_state=42, max_iter=500))
#])

In [6]:
# Ajustar el modelo usando el pipeline
pipeline.fit(X_train, y_train)

In [7]:
# Hacer predicciones
y_pred = pipeline.predict(X_test)

# Calcular y mostrar la precisión del modelo.
accuracy = accuracy_score(y_test, y_pred)
print(f'Precisión del modelo: {accuracy:.2f}')

# Reporte de clasificación
print(classification_report(y_test, y_pred))

# Matriz de confusión
print(confusion_matrix(y_test, y_pred))

Precisión del modelo: 0.60
              precision    recall  f1-score   support

           0       0.63      0.79      0.70       588
           1       0.51      0.32      0.39       400

    accuracy                           0.60       988
   macro avg       0.57      0.55      0.55       988
weighted avg       0.58      0.60      0.58       988

[[463 125]
 [272 128]]


In [8]:
# Generar señales de trading basadas en las predicciones del modelo.
signals = pipeline.predict(features_reshaped)

In [9]:
# Convertir señales a pesos para la estrategia.
num_times = features.shape[0]
num_assets = len(data.coords['asset'])

# Crear un DataArray para los pesos, inicializándolo en 0
weights = xr.DataArray(
    0, 
    coords=[data.coords['time'], data.coords['asset']], 
    dims=['time', 'asset']
)

# Asegurarte de que las señales tengan la forma correcta
# Generar un arreglo de señales para todos los tiempos y activos
signals_expanded = np.repeat(signals[:, np.newaxis], num_assets, axis=1)

# Asignar señales a pesos, asegurando que la forma sea correcta
weights[:] = signals_expanded

In [10]:
# Filtrar por liquidez.
is_liquid = data.sel(field="is_liquid")  # Seleccionar datos de liquidez.
weights = weights * is_liquid  # Aplicar filtro de liquidez.
weights = qnout.clean(weights, data, "stocks_s&p500")  # Limpiar pesos.

Output cleaning...
fix uniq
ffill if the current price is None...
Check liquidity...
Ok.
Check missed dates...
Ok.
Normalization...
Output cleaning is complete.


## 4) Performance estimation

Once we have our trading algorithm, we can assess its performance by calculating various statistics.

In [11]:
# Calcular estadísticas de la estrategia.
stats = qnstats.calc_stat(data, weights.sel(time=slice("2006-01-01", None)))  # Calcular estadísticas.
print(stats.to_pandas().tail())  # Mostrar las últimas estadísticas calculadas.

field         equity  relative_return  volatility  underwater  max_drawdown  \
time                                                                          
2025-01-03  2.459986         0.009373    0.087221   -0.022732      -0.14935   
2025-01-06  2.459112        -0.000355    0.087212   -0.023079      -0.14935   
2025-01-07  2.451021        -0.003290    0.087206   -0.026293      -0.14935   
2025-01-08  2.454628         0.001472    0.087198   -0.024860      -0.14935   
2025-01-10  2.417613        -0.015080    0.087259   -0.039565      -0.14935   

field       sharpe_ratio  mean_return  bias  instruments  avg_turnover  \
time                                                                     
2025-01-03      0.556849     0.048569   1.0        515.0      0.033614   
2025-01-06      0.556562     0.048539   1.0        515.0      0.033609   
2025-01-07      0.554391     0.048346   1.0        515.0      0.033605   
2025-01-08      0.555258     0.048417   1.0        515.0      0.033601   
20

These stats show how well the algorithm is doing if you started with 1M USD. They include:

* **equity**: the cumulative value of profits and losses since inception (1M USD);
* **relative_return**: the relative daily variation of **equity**;
* **volatility**: the volatility of the investment since inception (i.e. the annualized standard deviation of the daily returns);
* **underwater**: the time evolution of drawdowns;
* **max_drawdown**: the absolute minimum of the underwater chart;
* **sharpe_ratio**: the annualized Sharpe ratio since inception; the value must be larger than 1 for taking part to contests;
* **mean_return**: the annualized mean return of the investment since inception;
* **bias**: the daily asymmetry between long and short exposure: 1 for a long-only system, -1 for a short-only one;
* **instruments**: the number of instruments which get allocations on a given day;
* **avg_turnover**: the average turnover;
* **avg_holding_time**: the average holding time in days.

We can also plot a chart to show how profits and losses have accumulated over time.

In [12]:
# Graficar el rendimiento acumulado de la estrategia.
performance = stats.to_pandas()["equity"]  # Obtener rendimiento acumulado.
qngraph.make_plot_filled(performance.index, performance, name="PnL (Equity)", type="log")  # Graficar.

## 5) Submit Your strategy to the competition

> Send strategy use **Submit** button

In [13]:
weights = weights.sel(time=slice("2006-01-01", None))

qnout.check(weights, data, "stocks_s&p500")

Check liquidity...
Ok.
Check missed dates...
Ok.
Check the sharpe ratio...
Period: 2006-01-01 - 2025-01-10
Sharpe Ratio = 0.5451425999830357


ERROR! The Sharpe Ratio is too low. 0.5451425999830357 < 0.7
Improve the strategy and make sure that the in-sample Sharpe Ratio more than 0.7.


Check correlation.

Ok. This strategy does not correlate with other strategies.


In [None]:
qnout.write(weights)  # to participate in the competition
#After submitting the strategy to the contest, any weight exceeding 0.1 will be capped at that limit.

## Strategy Guidelines

* Your trading algorithm can open both short and long positions.

* At any given time, your algorithm can trade all or a subset of stocks that are or were part of the S&P500 stock index. Keep in mind that this index's composition changes over time. Quantiacs provides a suitable filter function for selecting these stocks.

* The Sharpe ratio of your system since January 1, 2006, must be greater than 0.7.

* The maximum allocation to any single asset is capped at 10% of total capital. If a weight exceeds this, it will be limited to 0.1. Other normalization methods are allowed.

* Manual stock selection or direct hand-picking is not permitted. The allocation process must be [dynamic](https://quantiacs.com/documentation/en/user_guide/dynamic_assets_selection.html#risk-management).

* Your system must not replicate the current examples. We use a correlation filter to identify and remove duplicates in the submissions.

For more detailed rules, please visit our [competition rules page](https://quantiacs.com/contest).