In [None]:
import yfinance as yf
import pandas as pd
import datetime
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Define the ticker symbols of DJI's constituent stocks
dji_stocks = [
    'MMM', 'AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CVX', 'CSCO',
    'KO', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM',
    'MCD', 'MRK', 'MSFT', 'NKE', 'PG', 'CRM', 'TRV', 'UNH',
    'VZ', 'V', 'WBA', 'WMT', 'DIS', 'DOW'
]

start_date = '2023-01-01'  # Adjust the start date to a more recent date due to hourly data limitations
end_date = datetime.datetime.now().strftime('%Y-%m-%d')

# Initialize an empty DataFrame to store historical data for all stocks
all_data = pd.DataFrame()

for symbol in dji_stocks:
    # Download the hourly historical data
    data = yf.download(symbol, start=start_date, end=end_date, interval='1h')

    # Add a 'Ticker' column to the data with the current symbol
    data['Ticker'] = symbol

    # Append the data to the all_data DataFrame
    all_data = all_data.append(data)

# Reset the index, creating a separate index column
all_data = all_data.reset_index(drop=False)

# Rename the new index column
all_data = all_data.rename(columns={'index': 'Timestamp'})

# Save the data to a CSV file
all_data.to_csv('dji_stocks_hourly_historical_data.csv')

print("Hourly historical data for DJI stocks downloaded and saved as dji_stocks_hourly_historical_data.csv")


# Function to convert string to datetime
def str_to_datetime(s):
    return pd.to_datetime(s)

# Function to convert datetime to string
def datetime_to_str(d):
    return d.strftime('%Y-%m-%d')

# Function to create a dataset with a look_back window
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

# Function to convert a windowed DataFrame to separate date, X, and y arrays
def windowed_df_to_date_X_y(windowed_dataframe):
    df_as_np = windowed_dataframe.to_numpy()

    dates = df_as_np[:, 0]
    X = df_as_np[:, 1:-1]
    y = df_as_np[:, -1]

    return dates, X, y
    
# Function to train and evaluate a stock LSTM model
def train_and_evaluate_stock(stock_data, symbol):
    np.random.seed(42)
    tf.random.set_seed(42)

    stock_data = stock_data[['Datetime', 'Close']].copy()
    stock_data['Datetime'] = stock_data['Datetime'].apply(str_to_datetime)

    scaler = MinMaxScaler(feature_range=(0, 1))
    stock_data['Close'] = scaler.fit_transform(stock_data['Close'].values.reshape(-1, 1))

    look_back = 10

    stock_data = stock_data[['Close']].copy()
    stock_data = pd.concat([stock_data.shift(look_back - i) for i in range(look_back + 1)], axis=1)
    stock_data = stock_data.dropna()

    train_dates, train_X, train_y = windowed_df_to_date_X_y(stock_data)
    train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))

    model = Sequential()
    model.add(LSTM(4, input_shape=(1, look_back - 1)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(train_X, train_y, epochs=100, batch_size=1, verbose=2)

    # Save the trained model
    model.save(f"{symbol}_lstm_model.h5")

    trainPredict = model.predict(train_X)
    trainPredict = scaler.inverse_transform(trainPredict)
    train_y = scaler.inverse_transform(train_y.reshape(-1, 1))

    trainScore = np.sqrt(mean_squared_error(train_y, trainPredict[:, 0]))
    print(f"{symbol} Train Score: {trainScore:.2f} RMSE")

    return model

trained_models = {}

for symbol in dji_stocks:
    stock_data = all_data[all_data['Ticker'] == symbol]
    print(f"Training model for {symbol}...")
    model = train_and_evaluate_stock(stock_data, symbol)
    trained_models[symbol] = model
    print(f"Finished training model for {symbol}")



[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
494/494 - 1s - loss: 0.0011 - 839ms/epoch - 2ms/step
Epoch 51/100
494/494 - 1s - loss: 0.0011 - 783ms/epoch - 2ms/step
Epoch 52/100
494/494 - 1s - loss: 0.0011 - 807ms/epoch - 2ms/step
Epoch 53/100
494/494 - 1s - loss: 0.0012 - 810ms/epoch - 2ms/step
Epoch 54/100
494/494 - 1s - loss: 0.0011 - 812ms/epoch - 2ms/step
Epoch 55/100
494/494 - 1s - loss: 0.0011 - 808ms/epoch - 2ms/step
Epoch 56/100
494/494 - 1s - loss: 0.0012 - 799ms/epoch - 2ms/step
Epoch 57/100
494/494 - 1s - loss: 0.0011 - 820ms/epoch - 2ms/step
Epoch 58/100
494/494 - 1s - loss: 0.0011 - 819ms/epoch - 2ms/step
Epoch 59/100
494/494 - 1s - loss: 0.0011 - 804ms/epoch - 2ms/step
Epoch 60/100
494/494 - 1s - loss: 0.0011 - 1s/epoch - 2ms/step
Epoch 61/100
494/494 - 1s - loss: 0.0011 - 1s/epoch - 2ms/step
Epoch 62/100
494/494 - 1s - loss: 0.0011 - 935ms/epoch - 2ms/step
Epoch 63/100
494/494 - 1s - loss: 0.0011 - 789ms/epoch - 2ms/step
Epoch 64/100
494/494 - 1s - lo