In [1]:
import yfinance as yf
import pandas as pd
import datetime
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Define the ticker symbols of DJI's constituent stocks
dji_stocks = [
    'MMM', 'AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CVX', 'CSCO',
    'KO', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM',
    'MCD', 'MRK', 'MSFT', 'NKE', 'PG', 'CRM', 'TRV', 'UNH',
    'VZ', 'V', 'WBA', 'WMT', 'DIS', 'DOW'
]

start_date = '2021-05-17'  # Adjust the start date to a more recent date due to hourly data limitations
end_date = datetime.datetime.now().strftime('%Y-%m-%d')

# Initialize an empty DataFrame to store historical data for all stocks
all_data = pd.DataFrame()

for symbol in dji_stocks:
    # Download the hourly historical data
    # data = yf.download(symbol, start=start_date, end=end_date, interval='1h')
    data = yf.download(symbol, start=start_date, end=end_date)

    # Add a 'Ticker' column to the data with the current symbol
    data['Ticker'] = symbol

    # Append the data to the all_data DataFrame
    all_data = all_data.append(data)

# Reset the index, creating a separate index column
all_data = all_data.reset_index(drop=False)

# Rename the new index column
all_data = all_data.rename(columns={'index': 'Timestamp'})

# Save the data to a CSV file
all_data.to_csv('dji_stocks_hourly_historical_data.csv')

print("Hourly historical data for DJI stocks downloaded and saved as dji_stocks_hourly_historical_data.csv")


# Function to convert string to datetime
def str_to_datetime(s):
    return pd.to_datetime(s)

# Function to convert datetime to string
def datetime_to_str(d):
    return d.strftime('%Y-%m-%d')

# Function to create a dataset with a look_back window
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

# Function to convert a windowed DataFrame to separate date, X, and y arrays
def windowed_df_to_date_X_y(windowed_dataframe):
    df_as_np = windowed_dataframe.to_numpy()

    dates = df_as_np[:, 0]
    X = df_as_np[:, 1:-1]
    y = df_as_np[:, -1]

    return dates, X, y
    
# Function to train and evaluate a stock LSTM model
def train_and_evaluate_stock(stock_data, symbol):
    np.random.seed(42)
    tf.random.set_seed(42)

    stock_data = stock_data[['Datet', 'Close']].copy()
    stock_data['Datetime'] = stock_data['Datetime'].apply(str_to_datetime)

    scaler = MinMaxScaler(feature_range=(0, 1))
    stock_data['Close'] = scaler.fit_transform(stock_data['Close'].values.reshape(-1, 1))

    look_back = 10

    stock_data = stock_data[['Close']].copy()
    stock_data = pd.concat([stock_data.shift(look_back - i) for i in range(look_back + 1)], axis=1)
    stock_data = stock_data.dropna()

    train_dates, train_X, train_y = windowed_df_to_date_X_y(stock_data)
    train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))

    model = Sequential()
    model.add(LSTM(4, input_shape=(1, look_back - 1)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(train_X, train_y, epochs=100, batch_size=1, verbose=2)

    # Save the trained model
    model.save(f"{symbol}_lstm_model.h5")

    trainPredict = model.predict(train_X)
    trainPredict = scaler.inverse_transform(trainPredict)
    train_y = scaler.inverse_transform(train_y.reshape(-1, 1))

    trainScore = np.sqrt(mean_squared_error(train_y, trainPredict[:, 0]))
    print(f"{symbol} Train Score: {trainScore:.2f} RMSE")

    return model

trained_models = {}

for symbol in dji_stocks:
    stock_data = all_data[all_data['Ticker'] == symbol]
    print(f"Training model for {symbol}...")
    model = train_and_evaluate_stock(stock_data, symbol)
    trained_models[symbol] = model
    print(f"Finished training model for {symbol}")


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed


  all_data = all_data.append(data)


[*********************100%***********************]  1 of 1 completed
Hourly historical data for DJI stocks downloaded and saved as dji_stocks_hourly_historical_data.csv
Training model for MMM...


  all_data = all_data.append(data)


KeyError: ignored

In [2]:
from IPython.testing import test
import yfinance as yf
import pandas as pd
import datetime
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt


# Load the test data
test_data = pd.read_csv('dji.csv')

stock = 'AAPL'
# Load the trained model
model = tf.keras.models.load_model('AAPL_lstm_model.h5')

# Preprocess the test data
scaler = MinMaxScaler(feature_range=(0, 1))
look_back = 10

test_data = test_data[test_data['Ticker']==stock]
all_test_data = test_data

test_data['Close'] = scaler.fit_transform(test_data['Close'].values.reshape(-1, 1))
test_data = test_data[['Close']].copy()
test_data = pd.concat([test_data.shift(look_back - i) for i in range(look_back + 1)], axis=1)
test_data = test_data.dropna()
test_dates, test_X, test_y = windowed_df_to_date_X_y(test_data)
test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))

# Make predictions
testPredict = model.predict(test_X)
testPredict = scaler.inverse_transform(testPredict)
test_y = scaler.inverse_transform(test_y.reshape(-1, 1))

# Evaluate the model
testScore = np.sqrt(mean_squared_error(test_y, testPredict[:, 0]))
print(f"AAPL Test Score: {testScore:.2f} RMSE")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['Close'] = scaler.fit_transform(test_data['Close'].values.reshape(-1, 1))


AAPL Test Score: 2.92 RMSE


In [6]:
testPredict[:,0]

array([124.95965 , 124.6598  , 125.30713 , 124.431786, 125.97969 ,
       126.320465, 126.83057 , 127.312195, 126.36477 , 127.1374  ,
       129.77089 , 129.66238 , 130.12756 , 131.44414 , 130.40019 ,
       131.8127  , 133.5574  , 133.53966 , 133.49869 , 133.05424 ,
       134.2812  , 135.98717 , 136.80931 , 137.28586 , 139.51532 ,
       141.59239 , 144.04645 , 143.31166 , 144.56038 , 144.3305  ,
       144.9695  , 148.55981 , 148.32094 , 146.38173 , 142.75043 ,
       145.16533 , 145.29425 , 146.37582 , 148.64435 , 148.84639 ,
       146.96861 , 144.94687 , 145.15451 , 145.54109 , 145.41496 ,
       147.19572 , 147.04623 , 146.95815 , 146.2172  , 145.81854 ,
       145.49487 , 145.63657 , 148.44514 , 149.04738 , 150.7656  ,
       150.29459 , 146.3766  , 146.30971 , 147.64478 , 149.1893  ,
       149.69096 , 148.49501 , 147.53604 , 148.23737 , 152.348   ,
       151.86789 , 152.23051 , 153.50299 , 153.77608 , 156.15857 ,
       155.0125  , 153.68599 , 149.18556 , 148.8155  , 148.014

In [None]:
testPredict[:,0].shape

(479,)

In [None]:
all_test_data['Date'][10:]

1477    2021-06-01
1478    2021-06-02
1479    2021-06-03
1480    2021-06-04
1481    2021-06-07
           ...    
1951    2023-04-19
1952    2023-04-20
1953    2023-04-21
1954    2023-04-24
1955    2023-04-25
Name: Date, Length: 479, dtype: object

In [None]:

fig = px.line(x=all_test_data['Date'][10:], y=testPredict[:,0], title='Time Series with Rangeslider')

fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [None]:
import plotly.express as px
import plotly.graph_objects as go


df = pd.read_csv('dji_stocks_hourly_historical_data.csv')
df

Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
0,0,2021-05-17,204.600006,205.350006,203.690002,205.110001,189.772263,1532700,MMM
1,1,2021-05-18,204.990005,205.179993,202.809998,203.029999,187.847809,1771800,MMM
2,2,2021-05-19,201.910004,202.699997,199.330002,202.600006,187.449966,2220200,MMM
3,3,2021-05-20,201.089996,202.190002,200.500000,201.649994,187.943939,1412500,MMM
4,4,2021-05-21,201.710007,203.759995,201.080002,201.860001,188.139648,1537200,MMM
...,...,...,...,...,...,...,...,...,...
14665,14665,2023-04-19,56.400002,56.588001,55.840000,56.439999,56.439999,3828000,DOW
14666,14666,2023-04-20,55.799999,56.165001,55.365002,55.700001,55.700001,3334700,DOW
14667,14667,2023-04-21,55.360001,55.480000,54.705002,54.869999,54.869999,4179500,DOW
14668,14668,2023-04-24,55.139999,55.400002,54.779999,55.389999,55.389999,5036400,DOW


In [None]:
stock = 'CVX'

df_stock = df[df['Ticker']==stock]


fig = px.line(df_stock, x='Date', y='High', title='Time Series with Rangeslider')

fig.update_xaxes(rangeslider_visible=True)
fig.show()

In [None]:
df_stock

Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Ticker
2934,2934,2021-05-17,109.089996,110.879997,108.970001,110.809998,102.163795,12692900,CVX
2935,2935,2021-05-18,108.940002,109.059998,105.970001,106.180000,99.093376,12759900,CVX
2936,2936,2021-05-19,103.820000,104.290001,101.970001,103.199997,96.312256,14762700,CVX
2937,2937,2021-05-20,103.070000,104.070000,101.970001,103.559998,96.648232,9706300,CVX
2938,2938,2021-05-21,104.099998,105.199997,103.680000,104.120003,97.170860,10352800,CVX
...,...,...,...,...,...,...,...,...,...
3418,3418,2023-04-19,169.460007,170.740005,169.100006,170.679993,170.679993,5506800,CVX
3419,3419,2023-04-20,168.759995,170.059998,168.020004,169.850006,169.850006,5851100,CVX
3420,3420,2023-04-21,169.830002,170.589996,168.839996,169.119995,169.119995,7178600,CVX
3421,3421,2023-04-24,169.100006,172.380005,168.580002,171.479996,171.479996,7177600,CVX
