In [8]:
# Gathering Stock Market Data

In [3]:
import yfinance as yf

# Download historical stock data
ticker = 'AAPL'  # Example: Apple Inc.
data = yf.download(ticker, start='2020-01-01', end='2024-01-01')
print(data.head())

[*********************100%***********************]  1 of 1 completed

Price                      Adj Close      Close       High        Low  \
Ticker                          AAPL       AAPL       AAPL       AAPL   
Date                                                                    
2020-01-02 00:00:00+00:00  72.796021  75.087502  75.150002  73.797501   
2020-01-03 00:00:00+00:00  72.088295  74.357498  75.144997  74.125000   
2020-01-06 00:00:00+00:00  72.662712  74.949997  74.989998  73.187500   
2020-01-07 00:00:00+00:00  72.320984  74.597504  75.224998  74.370003   
2020-01-08 00:00:00+00:00  73.484352  75.797501  76.110001  74.290001   

Price                           Open     Volume  
Ticker                          AAPL       AAPL  
Date                                             
2020-01-02 00:00:00+00:00  74.059998  135480400  
2020-01-03 00:00:00+00:00  74.287498  146322800  
2020-01-06 00:00:00+00:00  73.447502  118387200  
2020-01-07 00:00:00+00:00  74.959999  108872000  
2020-01-08 00:00:00+00:00  74.290001  132079200  





In [9]:
# Preprocessing the Data

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [22]:
# pip install scikit-learn

In [1]:
import sklearn
print(sklearn.__version__)

1.5.2


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [6]:
# Select relevant features
data['Return'] = data['Close'].pct_change()
data = data.dropna()

In [7]:
# Feature scaling
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data[['Close', 'Volume', 'Return']])

In [8]:
# Splitting data
train_size = int(len(scaled_data) * 0.8)
train_data, test_data = scaled_data[:train_size], scaled_data[train_size:]

In [10]:
# Building the Machine Learning Model

In [11]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

ModuleNotFoundError: No module named 'tensorflow'

In [21]:
# pip install tensorflow

In [9]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [10]:
# Prepare data for LSTM
X_train, y_train = [], []
window_size = 60  # Number of previous time steps to predict the next one
for i in range(window_size, len(train_data)):
    X_train.append(train_data[i-window_size:i])
    y_train.append(train_data[i, 0])  # Predicting the 'Close' price

X_train, y_train = np.array(X_train), np.array(y_train)

In [12]:
# Build the LSTM model
model = keras.Sequential([
    layers.LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    layers.LSTM(50, return_sequences=False),
    layers.Dense(25),
    layers.Dense(1)
])

# Compile and train the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=32, epochs=50)

Epoch 1/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 86ms/step - loss: 0.0711
Epoch 2/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 84ms/step - loss: 0.0053
Epoch 3/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 87ms/step - loss: 0.0021
Epoch 4/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 87ms/step - loss: 0.0019
Epoch 5/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 84ms/step - loss: 0.0020
Epoch 6/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 86ms/step - loss: 0.0019
Epoch 7/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 90ms/step - loss: 0.0016
Epoch 8/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 89ms/step - loss: 0.0016
Epoch 9/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 91ms/step - loss: 0.0014
Epoch 10/50
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 91ms/step - loss: 0.001

<keras.src.callbacks.history.History at 0x1fce391bce0>

In [13]:
# Evaluating the Model

In [14]:
X_test, y_test = [], []
for i in range(window_size, len(test_data)):
    X_test.append(test_data[i-window_size:i])
    y_test.append(test_data[i, 0])

X_test, y_test = np.array(X_test), np.array(y_test)

In [17]:
# Make predictions
predictions = model.predict(X_test)

# Pad predictions with zeros to match the shape expected by the scaler (e.g., 3 columns)
predictions_padded = np.zeros((predictions.shape[0], scaled_data.shape[1]))
predictions_padded[:, 0] = predictions[:, 0]  # Place predicted 'Close' price in the first column

# Inverse transform using the padded array
predictions_inversed = scaler.inverse_transform(predictions_padded)[:, 0]  # Extract only the 'Close' column

# Calculate MSE using the true y_test values (rescaled)
y_test_padded = np.zeros((y_test.shape[0], scaled_data.shape[1]))
y_test_padded[:, 0] = y_test  # Place true 'Close' values in the first column
y_test_inversed = scaler.inverse_transform(y_test_padded)[:, 0]  # Extract only the 'Close' column

# Calculate MSE
mse = np.mean((predictions_inversed - y_test_inversed) ** 2)
print(f'Mean Squared Error: {mse}')

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Mean Squared Error: 11.395898782065764


In [18]:
from sklearn.metrics import r2_score

# Calculate R-squared using the true and predicted values
r_squared = r2_score(y_test_inversed, predictions_inversed)
print(f'R-squared: {r_squared:.4f}')

R-squared: 0.8274


In [19]:
# Observations

In [20]:
# MSE of 11.4 suggests moderate prediction error on average.
# This indicates some room for improvement in predictions.
# R-squared of 0.8274 indicates strong model performance overall.
# Model explains about 82.74% of the variance successfully.
# Strong performance for time-series prediction with LSTM model.
# The model is doing well capturing stock price trends.
# Future improvements could further reduce prediction error (MSE).
# Check for overfitting by comparing train/test performance.
# Future Task:
# Implement LSTM model with more big company's stock,
# and implement other model with same dataset,
# also will do model tuning for improvement.