In [1]:
import pandas as pd
import yfinance as yf

# Get the stock data
data = yf.download('AAPL', start='2020-01-01', end='2021-12-31')
data.to_csv('AAPL.csv')

[*********************100%***********************]  1 of 1 completed


In [3]:
import pandas as pd

# Load the data
data = pd.read_csv('AAPL.csv')

# Display the first few rows
print(data.head())

# Check for non-numeric values in the 'Close' column
print(data['Close'].unique())

                       Price          Adj Close              Close  \
0                     Ticker               AAPL               AAPL   
1                       Date                NaN                NaN   
2  2020-01-02 00:00:00+00:00  72.79601287841797   75.0875015258789   
3  2020-01-03 00:00:00+00:00  72.08828735351562  74.35749816894531   
4  2020-01-06 00:00:00+00:00  72.66272735595703  74.94999694824219   

                High                Low               Open     Volume  
0               AAPL               AAPL               AAPL       AAPL  
1                NaN                NaN                NaN        NaN  
2   75.1500015258789  73.79750061035156  74.05999755859375  135480400  
3   75.1449966430664             74.125   74.2874984741211  146322800  
4  74.98999786376953            73.1875  73.44750213623047  118387200  
['AAPL' nan '75.0875015258789' '74.35749816894531' '74.94999694824219'
 '74.59750366210938' '75.79750061035156' '77.40750122070312'
 '77.5824966430

In [4]:
# Convert 'Close' column to numeric, coercing errors
data['Close'] = pd.to_numeric(data['Close'], errors='coerce')

# Drop rows with NaN values in 'Close' column
data = data.dropna(subset=['Close'])

In [5]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1, 1))

# Split the data into training and test sets
training_data_len = int(np.ceil(len(scaled_data) * 0.8))

train_data = scaled_data[0:training_data_len]
test_data = scaled_data[training_data_len:]

# Create the datasets for the models
def create_dataset(data, time_step=1):
    X, Y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        Y.append(data[i + time_step, 0])
    return np.array(X), np.array(Y)

time_step = 60
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(time_step, 1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Reshape the data to be 3-dimensional for LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Train the model
model.fit(X_train, y_train, batch_size=1, epochs=1)

  super().__init__(**kwargs)


[1m343/343[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - loss: 0.0108


<keras.src.callbacks.history.History at 0x7aa5528282b0>

In [7]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

# Calculate metrics
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
rmse = np.sqrt(mse)

print(f'MSE: {mse}')
print(f'MAE: {mae}')
print(f'RMSE: {rmse}')

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186ms/step
MSE: 24076.755242381907
MAE: 154.99879186945387
RMSE: 155.16686257826413
