In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import os, csv, sys

In [None]:
# Import data 
path = 'C:/Users/massw/OneDrive/Bureau/Programmation/Stock_Pred'
file_name = 'DASSAULTSYSTEMES_2024-02-07.txt'
file = os.path.join(path,file_name)
stock_name = file_name.split('_')[0]
df = pd.read_csv(file, delimiter='\t')
df.reset_index(drop=True, inplace=True)
data = df[['clot']]

Normaliser les données entre 0 et 1

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data.values)

Création séquences temprorelles pour le train du modèle LSTM

In [None]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data)-seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

In [None]:
# Longueur de la séquence temporelle
sequence_length = 10
split_r = 0.8
# Création séquence temporelles
X_seq, y_seq = create_sequences(scaled_data, sequence_length)

In [None]:
# Divide train and test set
split = int(split_r*len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
# Create LSTM Model
model = Sequential([LSTM(units=50,return_sequences=True,input_shape=(X_train.shape[1],1)),
LSTM(units=50, return_sequences=False),Dense(units=1)])
model.compile(optimizer='adam',loss='mse')

In [None]:
# Train model
model.fit(X_train, y_train, epochs=10, batch_size=32)

In [None]:
# Denormalisation of predictions and real values
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

print(data.index[split+sequence_length:].shape)
print(predictions.shape)

In [None]:
# Data visualization
plt.figure(figsize=(14,6))
""" plt.plot(data.index[split+sequence_length:], data['clot'][split+sequence_length:], label='Actual')
plt.plot(data.index[split+sequence_length:], predictions, label='Predicted') """
# Sélectionner un sous-ensemble spécifique du DataFrame
subset_data = data['clot'].iloc[split + sequence_length:split + sequence_length + 257]

# Plot avec le sous-ensemble sélectionné
plt.plot(subset_data.index, subset_data.values, label='Actual')
plt.plot(data.index[split + sequence_length:split + sequence_length + len(predictions)], predictions, label='Predicted')

plt.title(f'{stock_name} Prediction with LSTM model')
plt.xlabel('Time')
plt.ylabel('Stock Closing Price')
plt.legend()
plt.show()

In [None]:
def invest_strat(actual, predicted, invest_amount):
    diff = np.diff(predicted, axis=0)
    buy_signal = np.where(diff > 0, 1, 0)
    sell_signal = np.where(diff < 0, 1, 0)
    buy_price = actual[:-1] * buy_signal
    sell_price = actual[1:] * sell_signal
    buy_count = np.count_nonzero(buy_price)
    sell_count = np.count_nonzero(sell_price)
    if buy_count == 0 or sell_count == 0 :
        return 0
    total_buy_amount = invest_amount * buy_count
    total_sell_amount = invest_amount * np.sum(sell_price)