In [5]:
import yfinance as yf
import pandas as pd

# Define ticker and period
ticker = 'AAPL'
period = '5y'

data = pd.read_csv('AAPL_5years.csv', index_col=0)
data.head()

# Display the first few rows
print("Shape of data:", data.shape)
data.head()

Shape of data: (1257, 5)


Unnamed: 0_level_0,Close,High,Low,Open,Volume
Price,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,,,,,
2020-07-08,92.67949676513672,92.71109019742121,91.46197784699565,91.549467922268,117092000
2020-07-09,93.07804107666016,93.62725502908617,92.02820475666121,93.57379093641129,125642800
2020-07-10,93.24087524414062,93.2992044388239,92.05981473407041,92.67221565599948,90257200


In [6]:
import pandas as pd

data = pd.read_csv('AAPL_5years.csv')
print(data.head())
print(data.columns)

        Price              Close               High                Low  \
0      Ticker               AAPL               AAPL               AAPL   
1        Date                NaN                NaN                NaN   
2  2020-07-08  92.67949676513672  92.71109019742121  91.46197784699565   
3  2020-07-09  93.07804107666016  93.62725502908617  92.02820475666121   
4  2020-07-10  93.24087524414062   93.2992044388239  92.05981473407041   

                Open     Volume  
0               AAPL       AAPL  
1                NaN        NaN  
2    91.549467922268  117092000  
3  93.57379093641129  125642800  
4  92.67221565599948   90257200  
Index(['Price', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')


In [7]:
# Remove rows where 'Close' is not a number
data = data[pd.to_numeric(data['Close'], errors='coerce').notnull()]
data['Close'] = data['Close'].astype(float)

In [8]:
from sklearn.preprocessing import MinMaxScaler

# Drop rows with missing values
data_clean = data.dropna()

# Set 'Date' as index if present
if 'Date' in data_clean.columns:
    data_clean['Date'] = pd.to_datetime(data_clean['Date'])
    data_clean.set_index('Date', inplace=True)

# Select only the 'Close' price for LSTM
data_prepared = data_clean[['Close']].copy()

# Normalize the 'Close' price to the range [0, 1]
scaler = MinMaxScaler(feature_range=(0, 1))
data_prepared['Close'] = scaler.fit_transform(data_prepared[['Close']])

print(data_prepared.head())

      Close
2  0.015747
3  0.018114
4  0.019081
5  0.016526
6  0.025649


In [9]:
import numpy as np

def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

# Set sequence length (e.g., 60 days)
SEQ_LENGTH = 60
close_values = data_prepared['Close'].values

X, y = create_sequences(close_values, SEQ_LENGTH)

# Reshape X for LSTM [samples, time steps, features]
X = X.reshape((X.shape[0], X.shape[1], 1))

In [10]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [11]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model = Sequential([
    LSTM(50, return_sequences=False, input_shape=(SEQ_LENGTH, 1)),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20


  super().__init__(**kwargs)


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - loss: 0.1086 - val_loss: 0.0412
Epoch 2/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0049 - val_loss: 0.0138
Epoch 3/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0025 - val_loss: 0.0044
Epoch 4/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0015 - val_loss: 0.0034
Epoch 5/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0012 - val_loss: 0.0034
Epoch 6/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0012 - val_loss: 0.0046
Epoch 7/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.0011 - val_loss: 0.0036
Epoch 8/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 0.0011 - val_loss: 0.0046
Epoch 9/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m