In [17]:
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Fetch data
df = yf.download('AAPL', start='2010-01-01', end='2023-06-29')

# Scale the data with MinMaxScaler
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
from joblib import dump, load

# Save the scaler
dump(scaler, 'scaler.joblib') 

# Load the scaler
scaler = load('scaler.joblib') 

# Add a column for previous day's close price
df_scaled['Prev_Close'] = df_scaled['Close'].shift(1)

# Drop the first row, which will contain NaN because of the shift operation
df_scaled = df_scaled.dropna()

# Separate features (X) and target (y)
X = df_scaled.drop('Close', axis=1)
y = df_scaled['Close']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)


[*********************100%***********************]  1 of 1 completed


In [11]:
print(X)

                Open      High       Low  Adj Close    Volume  Prev_Close
Date                                                                     
2010-01-05  0.004385  0.003825  0.004543   0.003695  0.307026    0.004300
2010-01-06  0.004342  0.003755  0.004049   0.003131  0.280076    0.004372
2010-01-07  0.003823  0.003124  0.003714   0.003067  0.239427    0.003705
2010-01-08  0.003537  0.003124  0.003716   0.003298  0.223434    0.003628
2010-01-11  0.004030  0.003320  0.003595   0.002989  0.231354    0.003903
...              ...       ...       ...        ...       ...         ...
2023-06-22  0.976859  0.984418  0.978264   0.987733  0.008695    0.970996
2023-06-23  0.986855  0.987206  0.985675   0.985988  0.009689    0.987664
2023-06-26  0.993925  0.989885  0.986892   0.978301  0.006985    0.985909
2023-06-27  0.988733  0.991744  0.989325   0.993512  0.008416    0.978179
2023-06-28  1.000000  1.000000  1.000000   1.000000  0.008680    0.993476

[3393 rows x 6 columns]


In [2]:
# Define the lookback period
lookback = 60

# Reshape to (samples, time_steps, features)
X_train_reshaped = np.array([X_train.values[i-lookback:i, :] for i in range(lookback, len(X_train))])
y_train_reshaped = y_train[lookback:]

X_test_reshaped = np.array([X_test.values[i-lookback:i, :] for i in range(lookback, len(X_test))])
y_test_reshaped = y_test[lookback:]


In [3]:
# Choose the split point, let's say we'll use 95% of the data for training
split_point = int(len(df_scaled) * 0.95)

# Split into train and test sets
X_train = X[:split_point]
X_test = X[split_point:]
y_train = y[:split_point]
y_test = y[split_point:]


In [4]:
# Let's say X_train is of shape (1000, 10, 2)
# Here, we have 1000 samples, sequence length of 10 and 2 features
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.optimizers import RMSprop
from keras.regularizers import l1
from keras.callbacks import EarlyStopping
from keras.layers import LSTM

# Your LSTM model
nn = Sequential()
nn.add(LSTM(128, input_shape=(60, 6), kernel_initializer='normal', activation='tanh', return_sequences=True))  # Adjusted input shape
nn.add(BatchNormalization())
nn.add(Dropout(0.3))
nn.add(LSTM(64, kernel_initializer='normal', activation='tanh', kernel_regularizer=l1(0.001), return_sequences=False))
nn.add(BatchNormalization())
nn.add(Dropout(0.3))
nn.add(Dense(32, kernel_initializer='normal', activation='relu', kernel_regularizer=l1(0.001)))
nn.add(Dense(1, kernel_initializer='normal'))

# And y_train should be of shape (1000, )
# As we are predicting a single value based on each sequence


In [5]:
# Compile the model
nn.compile(loss='mean_absolute_error', optimizer=RMSprop(learning_rate=0.0001))

# Define EarlyStopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=30, verbose=1)

# Train the model
history = nn.fit(
    X_train_reshaped, 
    y_train_reshaped, 
    epochs=200, 
    batch_size=32, 
    validation_split=0.2,  # here we're holding out 20% of the training data for validation
    callbacks=[early_stopping]
)




Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [7]:
# Evaluate the model on the test data
test_loss = nn.evaluate(X_test_reshaped, y_test_reshaped)

print(f'Test loss: {test_loss}')


Test loss: 0.18829162418842316


In [35]:
# Get the latest 60 days' data
latest_data = df_scaled[-60:]

# Reshape the data to match the input shape of the model
latest_data_reshaped = np.array([latest_data.values])

# Scale the data
latest_data_scaled = scaler.transform(latest_data_reshaped.reshape(-1, 6))

# Reshape the data back to (samples, time_steps, features)
latest_data_reshaped = latest_data_scaled.reshape(latest_data_scaled.shape[0], 60, 6)

# Predict for the next 7 days
predictions = []
for _ in range(7):
    # Predict the next day's closing price
    prediction = nn.predict(latest_data_reshaped)
    
    # Append the prediction to the list
    predictions.append(prediction[0][0])
    
    # Shift the data to the next day by removing the first row and appending the predicted value
    latest_data_reshaped = np.concatenate([latest_data_reshaped[:, 1:, :], [[prediction[0]]]])

# Invert scaling on predictions
predictions = scaler.inverse_transform([[x] for x in predictions])

# Print the predictions for the next 7 days
for i in range(len(predictions)):
      print(f"Day {i+1}: Predicted Close Price: {predictions[i][0]:.2f}")

          



ValueError: cannot reshape array of size 420 into shape (70,60,6)