In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models

**Download Data**

In [3]:


data = yf.download("AAPL", start="2018-01-01", end="2023-01-01")

data["Return"] = data["Close"].pct_change()
data.dropna(inplace=True)

data["Target"] = (data["Return"] > 0).astype(int)
print(data)

  data = yf.download("AAPL", start="2018-01-01", end="2023-01-01")
[*********************100%***********************]  1 of 1 completed

Price            Close        High         Low        Open     Volume  \
Ticker            AAPL        AAPL        AAPL        AAPL       AAPL   
Date                                                                    
2018-01-03   40.373951   40.917804   40.310660   40.444277  118071600   
2018-01-04   40.561512   40.664657   40.338815   40.446645   89738400   
2018-01-05   41.023293   41.110026   40.566177   40.657600   94640000   
2018-01-08   40.870937   41.166304   40.772478   40.870937   82271200   
2018-01-09   40.866238   41.037362   40.650573   40.917810   86336000   
...                ...         ...         ...         ...        ...   
2022-12-23  130.026230  130.578440  127.837102  129.099300   63814900   
2022-12-27  128.221649  129.582462  126.929870  129.552881   69007800   
2022-12-28  124.287163  129.207765  124.119529  127.866678   85438400   
2022-12-29  127.807487  128.665383  125.953636  126.210014   75703700   
2022-12-30  128.123047  128.142773  125.657822  126




**Build Features**

In [4]:
lookback = 5
X, y = [], []
for i in range(lookback, len(data)):
    X.append(data["Return"].values[i-lookback:i])
    y.append(data["Target"].values[i])

X = np.array(X)
y = np.array(y)

# Standardisation
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split train / test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

**MLP Model**

In [5]:
model = models.Sequential([
    layers.Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")  # sortie = proba
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


**Training**

In [6]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                    epochs=20, batch_size=32, verbose=1)

Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.5419 - loss: 0.6981 - val_accuracy: 0.4542 - val_loss: 0.7224
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5479 - loss: 0.6905 - val_accuracy: 0.4701 - val_loss: 0.7165
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5389 - loss: 0.6874 - val_accuracy: 0.4661 - val_loss: 0.7137
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5589 - loss: 0.6847 - val_accuracy: 0.4741 - val_loss: 0.7164
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5639 - loss: 0.6826 - val_accuracy: 0.4622 - val_loss: 0.7170
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5639 - loss: 0.6810 - val_accuracy: 0.4502 - val_loss: 0.7195
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━

**Long Only Strategy**

In [7]:
proba = model.predict(X_test).flatten()
positions = (proba > 0.5).astype(int)  # 1 si on prend position

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


**Strategy  Simulation**

In [8]:
returns = data["Return"].iloc[-len(y_test):].values
strategy_returns = positions * returns

In [9]:
print("Rendement cumulé stratégie :", np.cumprod(1+strategy_returns)[-1] - 1)

Rendement cumulé stratégie : -0.19163491097583618


**Résultat :** ici, nous obtenons un rendement cumulé de stratégie de -0,218, soit -22 %.  

L'idée est d'améliorer ce rendement. Plusieurs facteurs peuvent l'influencer :  

### 1. Les données / features
- Ici, nous utilisons seulement les 5 derniers rendements.  
- Si ces 5 rendements n’apportent pas assez d’information pour prédire le rendement du lendemain, même un modèle parfait ne pourra pas améliorer significativement la performance.

### 2. L’entraînement (`model.fit`)
- Le modèle doit apprendre correctement à partir des données d’entraînement.  
- Un nombre insuffisant d’epochs, un `batch_size` mal choisi ou la présence de bruit dans les données peuvent réduire la performance du modèle.

### 3. Prétraitement des données
- La standardisation, la suppression des valeurs manquantes (`NaN`), et la division correcte entre train et test impactent directement la qualité de l’apprentissage.  
- Un prétraitement approprié est essentiel pour que le modèle exploite au mieux les informations contenues dans les features.


In [38]:
np.random.seed(42)
print(np.random.rand(3))  # [0.37454012 0.95071431 0.73199394]

np.random.seed(42)
print(np.random.rand(3))  # [0.37454012 0.95071431 0.73199394] à nouveau


[0.37454012 0.95071431 0.73199394]
[0.37454012 0.95071431 0.73199394]


In [37]:
print(np.random.rand(3))

[0.05808361 0.86617615 0.60111501]
