In [8]:
#Prediction de stock avec Regression linéaire
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [9]:
# Télécharger les données de prix d'actions
ticker = 'AAPL'  # Exemple avec l'action Apple
data = yf.download(ticker, start='2010-01-01', end='2022-01-01')

# Utiliser uniquement la colonne 'Close' pour la prédiction
data = data[['Close']]

# Ajouter une colonne 'Target' qui est le prix de clôture du jour suivant
data['Target'] = data['Close'].shift(-1)
print(data)
# Supprimer la dernière ligne (qui n'a pas de target)
data = data[:-1]

# Afficher les 5 premières lignes des données
print(data)


[*********************100%%**********************]  1 of 1 completed

                 Close      Target
Date                              
2010-01-04    7.643214    7.656429
2010-01-05    7.656429    7.534643
2010-01-06    7.534643    7.520714
2010-01-07    7.520714    7.570714
2010-01-08    7.570714    7.503929
...                ...         ...
2021-12-27  180.330002  179.289993
2021-12-28  179.289993  179.380005
2021-12-29  179.380005  178.199997
2021-12-30  178.199997  177.570007
2021-12-31  177.570007         NaN

[3021 rows x 2 columns]
                 Close      Target
Date                              
2010-01-04    7.643214    7.656429
2010-01-05    7.656429    7.534643
2010-01-06    7.534643    7.520714
2010-01-07    7.520714    7.570714
2010-01-08    7.570714    7.503929
...                ...         ...
2021-12-23  176.279999  180.330002
2021-12-27  180.330002  179.289993
2021-12-28  179.289993  179.380005
2021-12-29  179.380005  178.199997
2021-12-30  178.199997  177.570007

[3020 rows x 2 columns]



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Target'] = data['Close'].shift(-1)


In [13]:
# Diviser les données en features (X) et target (y)
#p=1 ici, donc X est un nx2, et y un n= nx1
X = data[['Close']] #d'ou le double crochet
y = data['Target']
print(X.shape)
print(y.shape)
# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"X_train est : {X_train}")
print(f"X_test est : {X_test}")
print(f"y_train est : {y_train}")
print(f"y_test est : {y_test}")

(3020, 1)
(3020,)
X_train est :                 Close
Date                 
2018-08-03  51.997501
2010-10-18  11.357143
2011-02-16  12.968929
2010-12-02  11.362500
2017-02-07  32.882500
...               ...
2016-07-07  23.985001
2014-05-12  21.172501
2014-07-01  23.379999
2015-02-25  32.197498
2013-06-05  15.896786

[2416 rows x 1 columns]
X_test est :                  Close
Date                  
2015-08-13   28.787500
2016-03-22   26.680000
2010-10-22   10.981071
2015-08-06   28.782499
2010-03-19    7.937500
...                ...
2013-02-11   17.140356
2011-08-25   13.347143
2019-06-20   49.865002
2021-07-15  148.479996
2014-05-28   22.286072

[604 rows x 1 columns]
y_train est : Date
2018-08-03    52.267502
2010-10-18    11.053214
2011-02-16    12.796429
2010-12-02    11.337143
2017-02-07    33.009998
                ...    
2016-07-07    24.170000
2014-05-12    21.205713
2014-07-01    23.370001
2015-02-25    32.605000
2013-06-05    15.659286
Name: Target, Length: 2416, dtype: flo

In [14]:
# Créer et entraîner le modèle de régression linéaire
model = LinearRegression()
model.fit(X_train, y_train)


LinearRegression()

In [15]:
# Faire des prédictions sur l'ensemble de test
y_pred = model.predict(X_test)

#On a entrainé le modele à partir de X_train et y_train.
#y_test sont les valeurs réelles observées, on essaye de les prédire.
#y_pred est la prédiction que l'on se fait de y_test grace à l'entrainement du modele

# Évaluer le modèle
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"y_pred vaut : {y_pred}")
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


y_pred vaut : [ 28.82262403  26.71212578  10.99086331  28.81761585   7.94296199
  22.62059902  17.97256612  75.18849256  30.05687749  52.1883159
   9.29631355 126.06326316  13.72546288   9.58136071  12.39571683
  60.43252962   9.19474027   7.97050126  11.8628162   33.2914721
  12.01589144  32.44526876 141.30491669  16.62136147  15.08704012
  12.4872747   29.0254115   23.82302129 115.47822636  26.17636483
  24.19605068  12.62675889  23.84233393  27.04009108  55.24766232
  20.44965876  38.6065216   26.55690459  53.61284019  36.45346525
  78.3680048   56.83241407  32.16737383  43.18052049  21.48755975
 129.81859799  34.87872604  25.05727475   9.01984925  49.70979644
  21.73183568   8.56134076  26.44925158 148.89570199  93.3793354
  54.75946955  20.93713525  12.58312448   7.64825756  17.93143675
  49.31423405  31.74427407  28.30939418  52.33602864  28.96532668
  23.53260781  28.71246693  41.99884149   9.29094818  12.3928546
  67.76794727   7.25698755  22.79584721  20.25259193  73.26075096
