In [5]:
pip install pandas numpy scikit-learn matplotlib yfinance tensorflow[and-cuda] scikeras

Collecting pandas
  Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting matplotlib
  Using cached matplotlib-3.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting yfinance
  Using cached yfinance-0.2.55-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting scikeras
  Using cached scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Using cached scipy-1.15.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-n

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input,LSTM
from scikeras.wrappers import KerasRegressor

from graphing import graph_normal, show_results
from get_data import get_apple_stock_split

2025-04-02 12:51:28.748685: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-02 12:51:28.756649: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743594688.765650     846 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743594688.768067     846 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743594688.775034     846 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [7]:
scaler_X = StandardScaler()
scaler_Y = StandardScaler()

DAYS_LAG = 5

X_train, X_test, Y_train, Y_test = get_apple_stock_split(DAYS_LAG)
print(f"Training samples: {len(X_train)}, Testing samples: {len(X_test)}")

X_train = scaler_X.fit_transform(X_train)
Y_train = scaler_Y.fit_transform(Y_train)

X_test = scaler_X.transform(X_test)
Y_test = scaler_Y.transform(Y_test)

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed

Training samples: 1768, Testing samples: 442





In [8]:
#repeatable way to create the model 
def create_ann(units=50, activation='relu', optimizer='adam'):
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(units=units, activation=activation, return_sequences=True))
    model.add(LSTM(units=units, activation=activation, return_sequences=False))
    model.add(Dense(1))

    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mae'])
    return model

<h1> GridSearch setup </h1>

In [15]:
# define model parameters and create model
# param_grid = {
#     'model__units': [10, 30, 50, 70, 100],
#     'model__activation': ['relu', 'tanh', 'sigmoid'],
#     'model__optimizer': ['adam', 'sgd'],
#     'batch_size': [16, 32],
#     'epochs': [50, 100, 200]
# }
param_grid = {
    'model__units': [50],
    'model__activation': ['relu'],
    'model__optimizer': ['adam'],
    'batch_size': [32],
    'epochs': [50]
}

In [16]:
#training
model = KerasRegressor(model=create_ann, verbose=0) #keras regressor is a wrapper to us scikit learn gid search

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=3,                       # cross-validation
    scoring='neg_mean_absolute_error',         # Use mse as scoring
    verbose=10,
    n_jobs=-1                   # Use all available cores
)

# Perform grid search on training data
grid_search.fit(X_train, Y_train)

results_df = pd.DataFrame(grid_search.cv_results_)


AttributeError: 'super' object has no attribute '__sklearn_tags__'

In [None]:
results_df = pd.DataFrame(grid_search.cv_results_).sort_values(by='mean_test_score', ascending=False)
for i, row in results_df.iterrows():
    print(f"Rank {row['rank_test_score']}:")
    print(f"Parameters: {row['params']}")
    print(f"Mean Test Score: {row['mean_test_score']:.4f}")
    print(f"Std Dev of Test Score: {row['std_test_score']:.4f}")
    print("-" * 30)

In [None]:
# use best model to test
final_model = grid_search.best_estimator_

# Predict on test data
final_predictions = final_model.predict(X_test)

# Evaluate the final model
mae = mean_absolute_error(Y_test, final_predictions)
mse = mean_squared_error(Y_test, final_predictions)
rmse = np.sqrt(mse)

graph_normal(final_predictions, Y_test, "test data")