In [1]:
import pandas as pd
import os, sys
project_root = os.path.abspath("../")
sys.path.append(project_root)
print("Added path:", project_root)

Added path: /Users/hippolyteheger/code/ArnaudThs/StockProphet/StockProphet


In [2]:
from Project.param import *
from Project.data import *
from Project.sentiment_analysis import *
from Project.model import *

In [3]:
from Project.main import *
print("main.py imports successfully!")

main.py imports successfully!


In [4]:
df_ohlcv = load_market_data(TICKER)
df_local = df_ohlcv.rename(columns={"date": "Date"})[["Date", "Close"]].copy()
df_ohlcv.head()

  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,date,Close,High,Low,Open,Volume
0,2020-01-02,72.468262,72.528582,71.223259,71.4766,135480400
1,2020-01-03,71.763725,72.523754,71.539337,71.696167,146322800
2,2020-01-06,72.335533,72.374139,70.634517,70.88545,118387200
3,2020-01-07,71.995354,72.60096,71.775789,72.345204,108872000
4,2020-01-08,73.153473,73.455072,71.698559,71.698559,132079200


In [5]:
df = load_data(ticker = TICKER, start_date = START_DATE, end_date = END_DATE)
df

  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2020-01-02,72.468262,72.528582,71.223259,71.476600,135480400
1,2020-01-03,71.763725,72.523754,71.539337,71.696167,146322800
2,2020-01-06,72.335533,72.374139,70.634517,70.885450,118387200
3,2020-01-07,71.995354,72.600960,71.775789,72.345204,108872000
4,2020-01-08,73.153473,73.455072,71.698559,71.698559,132079200
...,...,...,...,...,...,...
1374,2025-06-23,201.076645,201.874967,198.541988,201.206377,55814300
1375,2025-06-24,199.879181,203.012583,199.779385,202.164363,54064000
1376,2025-06-25,201.136520,203.242088,200.198493,201.026751,39525700
1377,2025-06-26,200.577713,202.214267,199.040955,201.006802,50799100


In [None]:
WINDOW_SIZE = 50
TRAIN_RATIO = 0.8
BATCH_SIZE = 32
LSTM_EPOCHS = 10
RNN_MODEL_SAVE = "lstm_rnn.keras"


In [7]:
# -------------------------
# Utility: Build RNN predictions aligned to dates
# -------------------------
def build_rnn_predictions(df_ohlc: pd.DataFrame, window_size: int = WINDOW_SIZE,
                          epochs: int = LSTM_EPOCHS, batch_size: int = BATCH_SIZE,
                          force_retrain: bool = True) -> pd.Series:
    """
    Train LSTM on historical Close and produce a one-day-ahead prediction for each day
    where enough history exists. Returns a pd.Series indexed by date with predicted value
    in the same scale as the original Close (not scaled).
    """
    df_local = df_ohlc.copy()

    # Normalize date column name
    if "date" in df_local.columns:
        df_local = df_local.rename(columns={"date": "Date"})
    elif "Date" not in df_local.columns:
        raise ValueError("DataFrame must contain either 'Date' or 'date' column.")

    df_local = df_local[["Date", "Close"]].reset_index(drop=True)
    # Use the helper which returns X_train,y_train,X_test,y_test,scaler
    X_train, y_train, X_test, y_test, scaler = train_test_split_lstm(df_local)

    # Build model
    input_shape = (X_train.shape[1], X_train.shape[2])  # (seq_len, n_features)
    model = LSTM_model(input_shape)
    model = compile_LSTM(model)

    # Train model (if force_retrain or no saved model)
    if force_retrain or not os.path.exists(RNN_MODEL_SAVE):
        print("Training LSTM predictor...")
        train_LSTM(model, X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)
        model.save(RNN_MODEL_SAVE)
    else:
        print("Loading existing LSTM predictor...")
        from keras.models import load_model
        model = load_model(RNN_MODEL_SAVE)

    # Now generate sliding-window predictions across the full dataset
    closes = df_local["Close"].values.reshape(-1, 1)
    closes_scaled = scaler.transform(closes)  # use same scaler

    preds = []
    dates = []
    for end_idx in range(window_size, len(closes_scaled)):
        start_idx = end_idx - window_size
        seq = closes_scaled[start_idx:end_idx]  # shape (window_size, 1)
        seq = seq.reshape((1, seq.shape[0], seq.shape[1]))
        pred_scaled = model.predict(seq, verbose=0)
        pred = scaler.inverse_transform(pred_scaled.reshape(-1, 1))[0, 0]
        preds.append(pred)
        dates.append(df_local.loc[end_idx, "Date"])  # prediction aligned to day end_idx

    preds_series = pd.Series(data=preds, index=pd.to_datetime(dates))
    preds_series.name = "rnn_pred_close"
    return preds_series


# -------------------------
# Utility: Merge OHLC / Sentiment / RNN preds and create lag-features
# -------------------------
def build_merged_dataframe(df_ohlc: pd.DataFrame, df_sentiment: pd.DataFrame,
                           rnn_preds: pd.Series, window_size: int = WINDOW_SIZE) -> pd.DataFrame:
    """
    Returns DataFrame with columns:
    Date, Open, High, Low, Close, Volume, sentiment, rnn_pred_close,
    close_lag_1 .. close_lag_{window_size}, next_return (reward target).
    """
    # Normalize column names & Date
    df = df_ohlc.copy()
    df["date"] = pd.to_datetime(df["date"])
    df = df.set_index("date").sort_index()

    # Merge sentiment (df_sentiment is indexed by date)
    df_sent = df_sentiment.copy()
    if "date" in df_sent.columns or df_sent.index.name == "date":
        df_sent.index = pd.to_datetime(df_sent.index)
    df_sent = df_sent.rename(columns={df_sent.columns[0]: "sentiment"})
    df = df.join(df_sent, how="left")
    df["sentiment"] = df["sentiment"].fillna(0.0)

    # Merge RNN preds (already indexed by date)
    df = df.join(rnn_preds.rename("rnn_pred_close"), how="left")

    # compute returns (next day) for reward and intraday if needed
    df["close"] = df["Close"].astype(float)
    df["return"] = df["close"].pct_change()
    # next day return as target (reward reference)
    df["next_return"] = df["return"].shift(-1)

    # Create lag features for close (flattened)
    for i in range(1, window_size + 1):
        df[f"close_lag_{i}"] = df["close"].shift(i)

    # drop rows without enough history or without next_return
    df = df.dropna().reset_index()
    return df

In [8]:
series = build_rnn_predictions(df_ohlc=df_ohlcv)

Training LSTM predictor...
Epoch 1/10


  super().__init__(**kwargs)


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.0022 - loss: 0.0548   
Epoch 2/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.0022 - loss: 0.0043
Epoch 3/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.0022 - loss: 0.0021
Epoch 4/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.0022 - loss: 0.0016    
Epoch 5/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.0022 - loss: 0.0016
Epoch 6/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.0022 - loss: 0.0015    
Epoch 7/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.0022 - loss: 0.0015
Epoch 8/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.0022 - loss: 0.0015
Epoch 9/10
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━

In [9]:
series

2020-03-16     70.116516
2020-03-17     69.230682
2020-03-18     68.324776
2020-03-19     67.381516
2020-03-20     66.440697
                 ...    
2025-06-23    194.981354
2025-06-24    194.927063
2025-06-25    194.953796
2025-06-26    195.107971
2025-06-27    195.303665
Name: rnn_pred_close, Length: 1329, dtype: float32

In [10]:
df_daily = fetch_daily_ticker_sentiment(api_key = API_KEY_MASSIVE, ticker = SENTIMENT_TICKERS, start_date = SENTIMENT_START_DATE, end_date = SENTIMENT_END_DATE)

In [11]:
df = build_merged_dataframe(df_ohlc = df_ohlcv, df_sentiment=df_daily,
                           rnn_preds = series, window_size = 50)

In [12]:
df

Unnamed: 0,date,Close,High,Low,Open,Volume,sentiment,rnn_pred_close,close,return,...,close_lag_41,close_lag_42,close_lag_43,close_lag_44,close_lag_45,close_lag_46,close_lag_47,close_lag_48,close_lag_49,close_lag_50
0,2020-03-16,58.578979,62.659018,58.044485,58.516096,322423600,0.0,70.116516,58.578979,-0.128647,...,75.119934,75.443237,76.475914,74.876221,74.707329,73.153473,71.995354,72.335533,71.763725,72.468262
1,2020-03-17,61.154697,62.303491,57.657516,59.860788,324056000,0.0,69.230682,61.154697,0.043970,...,76.060921,75.119934,75.443237,76.475914,74.876221,74.707329,73.153473,71.995354,72.335533,71.763725
2,2020-03-18,59.657642,60.463010,57.347955,57.988865,300233600,0.0,68.324776,59.657642,-0.024480,...,76.902977,76.060921,75.119934,75.443237,76.475914,74.876221,74.707329,73.153473,71.995354,72.335533
3,2020-03-19,59.200535,61.149861,58.675716,59.831769,271857200,0.0,67.381516,59.200535,-0.007662,...,76.381805,76.902977,76.060921,75.119934,75.443237,76.475914,74.876221,74.707329,73.153473,71.995354
4,2020-03-20,55.442165,60.905602,55.142268,59.780988,401693200,0.0,66.440697,55.442165,-0.063485,...,76.654480,76.381805,76.902977,76.060921,75.119934,75.443237,76.475914,74.876221,74.707329,73.153473
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1323,2025-06-20,200.577713,201.276239,196.446411,197.823517,96813500,1.0,195.242447,200.577713,0.022485,...,199.059326,192.501724,196.308716,193.607956,201.451126,201.829849,197.474731,189.771072,198.172348,171.832428
1324,2025-06-23,201.076645,201.874967,198.541988,201.206377,55814300,2.0,194.981354,201.076645,0.002487,...,203.902740,199.059326,192.501724,196.308716,193.607956,201.451126,201.829849,197.474731,189.771072,198.172348
1325,2025-06-24,199.879181,203.012583,199.779385,202.164363,54064000,0.0,194.927063,199.879181,-0.005955,...,207.659912,203.902740,199.059326,192.501724,196.308716,193.607956,201.451126,201.829849,197.474731,189.771072
1326,2025-06-25,201.136520,203.242088,200.198493,201.026751,39525700,-1.0,194.953796,201.136520,0.006290,...,208.566803,207.659912,203.902740,199.059326,192.501724,196.308716,193.607956,201.451126,201.829849,197.474731


In [17]:
sentiment = load_sentiment()
sentiment.head()

Unnamed: 0,date,sentiment
0,2025-01-01,0.0
1,2025-01-02,0.0
2,2025-01-03,0.0
3,2025-01-04,0.0
4,2025-01-05,0.5


In [15]:
from Project.data import *
from Project.model import *

In [19]:
ohlcv = load_market_data(TICKER)
df_prices = ohlcv[["date", "Close"]].rename(columns={"date": "Date"})

  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


In [20]:
X_train, y_train, X_test, y_test, scaler_y = train_test_split_lstm(df_prices, time_step=50)

IndexError: tuple index out of range

In [None]:
df_prices.head()

Unnamed: 0,Date,Close
0,2024-01-02,183.903214
1,2024-01-03,182.526245
2,2024-01-04,180.20813
3,2024-01-05,179.484909
4,2024-01-08,183.823975


In [None]:
df_pred = build_lstm_prediction_df(y_pred, time_step=50)
df_pred.head()

NameError: name 'y_pred' is not defined