In [93]:
import pandas as pd
import numpy as np

In [94]:
data=pd.read_csv('/content/apple_stock_data.csv')

In [95]:
data.head()

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2023-11-02 00:00:00+00:00,176.665985,177.570007,177.779999,175.460007,175.520004,77334800
1,2023-11-03 00:00:00+00:00,175.750671,176.649994,176.820007,173.350006,174.240005,79763700
2,2023-11-06 00:00:00+00:00,178.31752,179.229996,179.429993,176.210007,176.380005,63841300
3,2023-11-07 00:00:00+00:00,180.894333,181.820007,182.440002,178.970001,179.179993,70530000
4,2023-11-08 00:00:00+00:00,181.958893,182.889999,183.449997,181.589996,182.350006,49340300


In [96]:
data.tail()

Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
247,2024-10-28 00:00:00+00:00,233.399994,233.399994,234.729996,232.550003,233.320007,36087100
248,2024-10-29 00:00:00+00:00,233.669998,233.669998,234.330002,232.320007,233.100006,35417200
249,2024-10-30 00:00:00+00:00,230.100006,230.100006,233.470001,229.550003,232.610001,47070900
250,2024-10-31 00:00:00+00:00,225.910004,225.910004,229.830002,225.369995,229.339996,64370100
251,2024-11-01 00:00:00+00:00,222.910004,222.910004,225.350006,220.270004,220.970001,65242200


In [97]:
data.shape

(252, 7)

In [98]:
data.describe()

Unnamed: 0,Adj Close,Close,High,Low,Open,Volume
count,252.0,252.0,252.0,252.0,252.0,252.0
mean,199.088202,199.454286,201.093056,197.608373,199.316032,58322140.0
std,21.511933,21.321567,21.571427,20.963034,21.365476,30257710.0
min,164.585999,165.0,166.399994,164.080002,165.350006,24048300.0
25%,182.254063,182.852501,184.617504,181.487499,182.777504,42788550.0
50%,192.370026,193.084999,194.399994,191.724998,192.989998,51748650.0
75%,221.697502,221.697502,224.149994,219.717499,221.635002,64974600.0
max,236.479996,236.479996,237.490005,234.449997,236.479996,318679900.0


In [99]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       252 non-null    object 
 1   Adj Close  252 non-null    float64
 2   Close      252 non-null    float64
 3   High       252 non-null    float64
 4   Low        252 non-null    float64
 5   Open       252 non-null    float64
 6   Volume     252 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 13.9+ KB


#As the dataset is based on stock market data,i'll conver the data column to a datatime type, set it as the index, and focus on the Close price

In [100]:
# Step-by-step fix to set 'Date' as index safely
import pandas as pd

# Remove any leading/trailing whitespace in column names
data.columns = data.columns.str.strip()

# Check if 'Date' column exists
if 'Date' in data.columns:
    # Convert 'Date' to datetime format
    data['Date'] = pd.to_datetime(data['Date'], errors='coerce')

    # Drop rows where conversion failed (NaT values)
    data = data.dropna(subset=['Date'])

    # Set 'Date' as the index
    data.set_index('Date', inplace=True)

    # Optionally keep only the 'Close' column
    if 'Close' in data.columns:
        data = data[['Close']]
    else:
        print("⚠️ 'Close' column not for")


In [101]:
data.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2023-11-02 00:00:00+00:00,177.570007
2023-11-03 00:00:00+00:00,176.649994
2023-11-06 00:00:00+00:00,179.229996
2023-11-07 00:00:00+00:00,181.820007
2023-11-08 00:00:00+00:00,182.889999


In [102]:
data.shape

(252, 1)

So, let’s scale the Close price data between 0 and 1 using MinMaxScaler to ensure compatibility with the LSTM model:

In [103]:
from sklearn.preprocessing import MinMaxScaler

In [104]:
scaler=MinMaxScaler(feature_range=(0,1))
data['Close']= scaler.fit_transform(data[['Close']])

In [105]:
data.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2023-11-02 00:00:00+00:00,0.175853
2023-11-03 00:00:00+00:00,0.162983
2023-11-06 00:00:00+00:00,0.199077
2023-11-07 00:00:00+00:00,0.235311
2023-11-08 00:00:00+00:00,0.25028


Now, let’s prepare the data for LSTM by creating sequences of a defined length (e.g., 60 days) to predict the next day’s price:

In [106]:
def create_sequence(data, seq_length=60):
  X,y=[],[]
  for i in range(len(data)- seq_length):
    X.append(data[i:i+seq_length])
    y.append(data[i+seq_length])

  return np.array(X),np.array(y)

seq_length=60
X,y= create_sequence(data['Close'].values, seq_length)

Now, we will split the sequences into training and test sets (e.g., 80% training, 20% testing):

In [107]:
train_size= int(len(X)*0.8)
X_train, X_test=X[:train_size],X[train_size:]
y_train, y_test= y[:train_size],y[train_size:]

#Build Models

Now, we will build a sequential LSTM model with layers to capture the temporal dependencies in the data:

#1st Model

In [108]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [109]:
lstm_model= Sequential()
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape= (X_train.shape[1],1)))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dense(1))


Now, we will compile the model using an appropriate optimizer and loss function, and fit it into the training data

In [110]:
lstm_model.compile(optimizer='adam', loss='mean_squared_error')
lstm_model.fit(X_train,y_train,epochs=20, batch_size=32)

Epoch 1/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 73ms/step - loss: 0.1964
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - loss: 0.0393
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - loss: 0.0298
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 62ms/step - loss: 0.0234
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - loss: 0.0196
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - loss: 0.0131
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - loss: 0.0142
Epoch 8/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step - loss: 0.0115
Epoch 9/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 72ms/step - loss: 0.0120
Epoch 10/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 103ms/step - loss: 0.0109
Epoch 11/20
[1m5/

<keras.src.callbacks.history.History at 0x7d6f6b095450>

#2nd Model

Now, let’s train the second model. I’ll start by generating lagged features for Linear Regression (e.g., using the past 3 days as predictors):


In [111]:
data['Lag_1']=data['Close'].shift(1)
data['Lag_2']=data['Close'].shift(2)
data['Lag_3']=data['Close'].shift(3)
data=data.dropna()

Now, we will split the data accordingly for training and testing:

In [112]:
X_lin=data[['Lag_1','Lag_2','Lag_3']]
y_lin=data['Close']
X_train_lin, X_test_lin= X_lin[:train_size],X_lin[train_size:]
y_train_lin, y_test_lin= y_lin[:train_size],y_lin[train_size:]

Now, let’s train the linear regression model:


In [113]:
from sklearn.linear_model import LinearRegression
lin_model=LinearRegression()
lin_model.fit(X_train_lin,y_train_lin)

Now, here’s how to make predictions using LSTM on the test set and inverse transform the scaled predictions:

In [114]:
X_test_lstm=X_test.reshape((X_test.shape[0], X_test.shape[1],1))
lstm_predictions=lstm_model.predict(X_test_lstm)
lstm_predictions=scaler.inverse_transform(lstm_predictions)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 459ms/step


In [115]:
lin_predictions= lin_model.predict(X_test_lin)
lin_predictions=scaler.inverse_transform(lin_predictions.reshape(-1,1))

Here’s how to generate predictions using Linear Regression and inverse-transform them:

nd, here’s how to use a weighted average to create hybrid predictions:

In [116]:
min_len = min(len(lstm_predictions), len(lin_predictions))

lstm_predictions = lstm_predictions[:min_len]
lin_predictions = lin_predictions[:min_len]

hybrid_predictions = (0.7 * lstm_predictions) + (0.3 * lin_predictions)


Let’s see how to make predictions for the next 400 days using our hybrid model. Here’s how to predict the Next 400 Days using LSTM:

In [117]:
lstm_future_predictions=[]
last_sequence=X[-1].reshape(1,seq_length,1)
for _ in range(400):
  lstm_pred=lstm_model.predict(last_sequence)[0,0]
  lstm_future_predictions.append(lstm_pred)
  lstm_pred_reshaped=np.array([[lstm_pred]]).reshape(1,1,1)
  last_sequence = np.append(last_sequence[:, 1:, :], lstm_pred_reshaped, axis=1)
lstm_future_predictions = scaler.inverse_transform(np.array(lstm_future_predictions).reshape(-1, 1))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47

Here’s how to predict the Next 10 Days using Linear Regression:

In [118]:
recent_data = data['Close'].values[-3:]
lin_future_predictions = []
for _ in range(400):
    lin_pred = lin_model.predict(recent_data.reshape(1, -1))[0]
    lin_future_predictions.append(lin_pred)
    recent_data = np.append(recent_data[1:], lin_pred)
lin_future_predictions = scaler.inverse_transform(np.array(lin_future_predictions).reshape(-1, 1))

And, here’s how to combine the predictive power of both models to make predictions for the next 400
 days:

In [119]:
hybrid_future_predictions = (0.7 * lstm_future_predictions) + (0.3 * lin_future_predictions)

Here’s how to create the final DataFrame to look at the predictions:

In [121]:
future_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=400)
predictions_df = pd.DataFrame({
    'Date': future_dates,
    'LSTM Predictions': lstm_future_predictions.flatten(),
    'Linear Regression Predictions': lin_future_predictions.flatten(),
    'Hybrid Model Predictions': hybrid_future_predictions.flatten()
})
print(predictions_df)

                         Date  LSTM Predictions  \
0   2024-11-02 00:00:00+00:00        232.975586   
1   2024-11-03 00:00:00+00:00        232.808487   
2   2024-11-04 00:00:00+00:00        232.709915   
3   2024-11-05 00:00:00+00:00        232.657532   
4   2024-11-06 00:00:00+00:00        232.635742   
..                        ...               ...   
395 2025-12-02 00:00:00+00:00        232.076935   
396 2025-12-03 00:00:00+00:00        232.076920   
397 2025-12-04 00:00:00+00:00        232.076904   
398 2025-12-05 00:00:00+00:00        232.076889   
399 2025-12-06 00:00:00+00:00        232.076889   

     Linear Regression Predictions  Hybrid Model Predictions  
0                       230.355192                232.189459  
1                       225.707291                230.678130  
2                       222.703426                229.707970  
3                       230.631535                232.049736  
4                       225.486380                230.490931  
..       

##Results:
-
So, this is how to build a hybrid machine learning model using Python. Build a hybrid machine learning model when a single algorithm cannot capture the complexity of the data or when different types of data or patterns are present. I hope you liked this article on building a hybrid machine learning model with Python.