# Build Hybrid Machine Learning Model combine Long Short-Term Memory and Linear Regression to predict the next 10 days of Apple Inc.’s stock prices

### Import required library

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

### Import the dataset

In [29]:
data = pd.read_csv('apple_stock_data.csv')

print(data.head())

                        Date   Adj Close  ...        Open    Volume
0  2023-11-02 00:00:00+00:00  176.665985  ...  175.520004  77334800
1  2023-11-03 00:00:00+00:00  175.750671  ...  174.240005  79763700
2  2023-11-06 00:00:00+00:00  178.317520  ...  176.380005  63841300
3  2023-11-07 00:00:00+00:00  180.894333  ...  179.179993  70530000
4  2023-11-08 00:00:00+00:00  181.958893  ...  182.350006  49340300

[5 rows x 7 columns]


### Convert the Date column to datetime type and focus on the Close price

In [30]:
data['Date'] = pd.to_datetime(data['Date'])

data.set_index('Date', inplace=True)

data = data[['Close']]

In [31]:
print(data)

                                Close
Date                                 
2023-11-02 00:00:00+00:00  177.570007
2023-11-03 00:00:00+00:00  176.649994
2023-11-06 00:00:00+00:00  179.229996
2023-11-07 00:00:00+00:00  181.820007
2023-11-08 00:00:00+00:00  182.889999
...                               ...
2024-10-28 00:00:00+00:00  233.399994
2024-10-29 00:00:00+00:00  233.669998
2024-10-30 00:00:00+00:00  230.100006
2024-10-31 00:00:00+00:00  225.910004
2024-11-01 00:00:00+00:00  222.910004

[252 rows x 1 columns]


### Long Short - Term Memory and Linear Regression

 I selected LSTM because it excels at capturing sequential dependencies and patterns in time-series data, making it well-suited for modeling stock price movements driven by historical trends.

Linear Regression is a simple model designed to capture basic linear relationships and long-term trends in the data.

### Scale CLose price data between 0 and 1 using MinMaxScaler

In [32]:
scaler = MinMaxScaler(feature_range=(0,1))

data['Close'] = scaler.fit_transform(data[['Close']])

### Prepare the data for the LSTM model by generating sequences of a specified length to predict the stock price for the following day

In [None]:
def create_sequences(data, length):
    X, y = [], []
    for i in range(len(data) - length):
        X.append(data[i:i+length])
        y.append(data[i+length])
    return np.array(X), np.array(y)

seqeunces_length = 60

X, y = create_sequences(data['Close'].values, seqeunces_length)

In [59]:
X.shape

(192, 60)

In [60]:
y.shape

(192,)

### Split the sequences into Training set and Test set

In [36]:
training_size = int(len(X) * 0.8)

X_train, X_test = X[:training_size], X[training_size:]

y_train, y_test = y[:training_size], y[training_size:]

In [37]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((153, 60), (39, 60), (153,), (39,))

### Build sequential LSTM model 

In [38]:
model = Sequential()

model.add(LSTM(units=50, 
               return_sequences=True, 
               input_shape=(X_train.shape[1],1)
                )
)

model.add(LSTM(units=50))

model.add(Dense(1))


  super().__init__(**kwargs)


In [39]:
model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_train, y_train, epochs=20, batch_size=32)

Epoch 1/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - loss: 0.3179
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0660
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0311
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0170
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0178
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0147
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - loss: 0.0135
Epoch 8/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0132
Epoch 9/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0113
Epoch 10/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0106
Epoch 11/20
[1m5/5

<keras.src.callbacks.history.History at 0x1303d378310>

### Build Linear Regression model

Generating lag features, using past 3 days

In [40]:
data['lag1'] = data['Close'].shift(1)
data['lag2'] = data['Close'].shift(2)
data['lag3'] = data['Close'].shift(3)
data = data.dropna()

In [41]:
print(data)

                              Close      lag1      lag2      lag3
Date                                                             
2023-11-07 00:00:00+00:00  0.235311  0.199077  0.162983  0.175853
2023-11-08 00:00:00+00:00  0.250280  0.235311  0.199077  0.162983
2023-11-09 00:00:00+00:00  0.243565  0.250280  0.235311  0.199077
2023-11-10 00:00:00+00:00  0.299384  0.243565  0.250280  0.235311
2023-11-13 00:00:00+00:00  0.277001  0.299384  0.243565  0.250280
...                             ...       ...       ...       ...
2024-10-28 00:00:00+00:00  0.956911  0.929071  0.917320  0.919978
2024-10-29 00:00:00+00:00  0.960688  0.956911  0.929071  0.917320
2024-10-30 00:00:00+00:00  0.910744  0.960688  0.956911  0.929071
2024-10-31 00:00:00+00:00  0.852127  0.910744  0.960688  0.956911
2024-11-01 00:00:00+00:00  0.810157  0.852127  0.910744  0.960688

[249 rows x 4 columns]


Split data into traininig and testing

In [42]:
X_linear = data[['lag1','lag2','lag3']]
y_linear = data['Close']

X_train_linear, X_test_linear = X_linear[:training_size], X_linear[training_size:]
y_train_linear, y_test_linear = y_linear[:training_size], y_linear[training_size:]


In [43]:
X_train_linear.shape, X_test_linear.shape, y_train_linear.shape, y_test_linear.shape

((153, 3), (96, 3), (153,), (96,))

Train linear regression model

In [44]:
linear_model = LinearRegression()

linear_model.fit(X_train_linear, y_train_linear)

Make predictions using LSTM

In [45]:
X_test_lstm = X_test.reshape(
    (X_test.shape[0], X_test.shape[1], 1)
)

lstm_predic = model.predict(X_test_lstm)

lstm_predic = scaler.inverse_transform(lstm_predic)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step


In [63]:
lstm_predic

array([[225.49307],
       [224.89336],
       [224.38812],
       [223.96701],
       [223.61288],
       [223.02226],
       [222.34029],
       [221.8088 ],
       [221.78993],
       [222.0719 ],
       [222.47745],
       [223.0024 ],
       [223.53195],
       [224.09888],
       [224.6718 ],
       [225.47404],
       [226.04523],
       [226.48741],
       [226.75743],
       [226.95331],
       [226.8324 ],
       [226.71765],
       [226.77519],
       [226.91214],
       [227.03111],
       [227.32141],
       [227.82382],
       [228.33997],
       [228.86981],
       [229.51859],
       [230.2763 ],
       [231.03682],
       [231.51804],
       [231.79633],
       [231.95757],
       [232.12033],
       [232.27962],
       [232.25546],
       [231.91913]], dtype=float32)

In [48]:
lstm_predic.shape

(39, 1)

Make predictions using Linear Regression

In [46]:
linear_predict = linear_model.predict(X_test_linear)

linear_predict = scaler.inverse_transform(linear_predict.reshape(-1,1))

In [64]:
linear_predict

array([[216.81820901],
       [214.37105639],
       [209.41297789],
       [207.26599011],
       [208.15105726],
       [209.20477132],
       [213.52982995],
       [214.34214776],
       [210.534601  ],
       [216.92358859],
       [220.68371689],
       [221.73802455],
       [226.5791409 ],
       [228.02896608],
       [228.71759398],
       [233.13455589],
       [227.41963259],
       [230.38873171],
       [234.60965097],
       [234.89301977],
       [228.53653766],
       [223.66891254],
       [224.08769717],
       [223.91334202],
       [225.00619103],
       [218.26327302],
       [217.18144479],
       [217.94479768],
       [218.27560289],
       [218.8380597 ],
       [222.24275147],
       [218.31154614],
       [219.77443979],
       [208.86806873],
       [206.77105101],
       [209.92226181],
       [213.62303576],
       [216.54299919],
       [217.72058886],
       [221.49057849],
       [221.87482097],
       [224.84723133],
       [226.19000353],
       [225

In [49]:
linear_predict.shape

(96, 1)

### Prediction 

Using LSTM to predict for the next 10 days

In [None]:
lstm_future_predict = []

last_sequence = X[-1].reshape(1,seqeunces_length, 1)

for _ in range(10):
    lstm_pred = model.predict(last_sequence)[0,0]
    lstm_future_predict.append(lstm_pred)
    lstm_pred_reshape = np.array([[lstm_pred]]).reshape(1,1,1)
    last_sequence = np.append(last_sequence[:,1:,:], lstm_pred_reshape, axis=1)

lstm_future_predict = scaler.inverse_transform(
    np.array(lstm_future_predict)
    .reshape(-1,1)
)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


In [74]:
lstm_future_predict

array([[231.91914],
       [231.6912 ],
       [231.50522],
       [231.35236],
       [231.22382],
       [231.11272],
       [231.01384],
       [230.92336],
       [230.83864],
       [230.75777]], dtype=float32)

Using Linear Regression to predict for the next 10 days

In [78]:
recent_data = data['Close'].values[-3:]

linear_future_predict = []

for _ in range(10):
    linear_pred = linear_model.predict(recent_data.reshape(1,-1))[0]
    linear_future_predict.append(linear_pred)
    recent_data = np.append(recent_data[1:], linear_pred)

linear_future_predict = scaler.inverse_transform(
    np.array(linear_future_predict)
    .reshape(-1,1)
)



In [80]:
linear_future_predict

array([[230.3551918 ],
       [225.70729102],
       [222.70342633],
       [230.63153502],
       [225.48638043],
       [222.49458757],
       [230.93019533],
       [225.24559859],
       [222.28400748],
       [231.25237543]])

Combine the prediction of both models

In [81]:
hybrid_future_predict = (0.7*lstm_future_predict) + (0.3*linear_future_predict)

In [82]:
hybrid_future_predict

array([[231.44995659],
       [229.8960252 ],
       [228.8646778 ],
       [231.13610052],
       [229.50258832],
       [228.52727642],
       [230.98874488],
       [229.22002662],
       [228.27225364],
       [230.90615391]])

Create dataframe

In [86]:
future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=10)
predictions_df = pd.DataFrame({
    'Date': future_dates,
    'LSTM Predictions': lstm_future_predict.flatten(),
    'Linear Regression Predictions': linear_future_predict.flatten(),
    'Hybrid Model Predictions': hybrid_future_predict.flatten()
})
predictions_df

Unnamed: 0,Date,LSTM Predictions,Linear Regression Predictions,Hybrid Model Predictions
0,2024-11-02 00:00:00+00:00,231.919144,230.355192,231.449957
1,2024-11-03 00:00:00+00:00,231.691193,225.707291,229.896025
2,2024-11-04 00:00:00+00:00,231.505219,222.703426,228.864678
3,2024-11-05 00:00:00+00:00,231.352356,230.631535,231.136101
4,2024-11-06 00:00:00+00:00,231.223816,225.48638,229.502588
5,2024-11-07 00:00:00+00:00,231.112717,222.494588,228.527276
6,2024-11-08 00:00:00+00:00,231.01384,230.930195,230.988745
7,2024-11-09 00:00:00+00:00,230.923355,225.245599,229.220027
8,2024-11-10 00:00:00+00:00,230.838638,222.284007,228.272254
9,2024-11-11 00:00:00+00:00,230.757767,231.252375,230.906154
