In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import pandas_ta as ta
from tensorflow.keras.losses import Huber
from scipy import stats

# List of ticker symbols (example of 50 different stocks)
tickers = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'BRK-B', 'JNJ', 'V', 'WMT',
    'JPM', 'PG', 'MA', 'DIS', 'NVDA', 'HD', 'PYPL', 'BAC', 'VZ', 'ADBE',
    'NFLX', 'INTC', 'CMCSA', 'PFE', 'KO', 'PEP', 'T', 'CSCO', 'MRK', 'ABT',
    'XOM', 'CVX', 'NKE', 'CRM', 'MDT', 'MCD', 'UNH', 'WFC', 'BMY', 'ACN',
    'TXN', 'AMGN', 'COST', 'QCOM', 'NEE', 'LLY', 'TMO', 'IBM', 'HON', 'GE'
]

# Adjust the date range to the last 60 days
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=59)).strftime('%Y-%m-%d')

# Function to download and preprocess data for a given ticker
def preprocess_data(ticker):
    try:
        data = yf.download(ticker, start=start_date, end=end_date, interval='5m')
        data.index.name = 'Date'
        data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
        data['SMA_50'] = data['Close'].rolling(window=50).mean()
        data['SMA_200'] = data['Close'].rolling(window=200).mean()
        data['RSI'] = ta.rsi(data['Close'], window=14)
        data.dropna(inplace=True)
        return data
    except Exception as e:
        print(f"Failed to download data for {ticker}: {e}")
        return None

# Combine all stocks' data into a single DataFrame
combined_data = {}
successful_tickers = []

for ticker in tickers:
    stock_data = preprocess_data(ticker)
    if stock_data is not None and len(stock_data) > 0:
        combined_data[ticker] = stock_data
        successful_tickers.append(ticker)

# Ensure all dataframes have the same length by trimming to the shortest one
min_length = min([len(data) for data in combined_data.values()])
for ticker in successful_tickers:
    combined_data[ticker] = combined_data[ticker].iloc[-min_length:]

# Combine all stocks' data into a single numpy array
data_array = np.array([combined_data[ticker].values for ticker in successful_tickers])

# Normalize the data
scaler = MinMaxScaler()
num_features = data_array.shape[-1]
data_array = data_array.reshape(-1, num_features)
scaled_data = scaler.fit_transform(data_array)
scaled_data = scaled_data.reshape(len(successful_tickers), min_length, num_features)




In [74]:
# Function to create sequences for predicting differences
def create_sequences(data, seq_length, pred_length):
    xs, ys = [], []
    for i in range(len(data[0]) - seq_length - pred_length):
        x = data[:, i:i+seq_length]
        y = data[:, i+seq_length:i+seq_length+pred_length, 3] - data[:, i+seq_length-1:i+seq_length, 3]  # Predicting the difference in 'Close' prices
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys), np.sign(np.array(ys))

# Parameters
SEQ_LENGTH = 200
PRED_LENGTH = 10

# Create sequences
X, y, dir = create_sequences(scaled_data, SEQ_LENGTH, PRED_LENGTH)

print(y.shape)
# y = y.reshape((y.shape[0], y.shape[1], PRED_LENGTH))

# Split the data into training and test sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
dir_train, dir_test = dir[:split], dir[split:]



(2741, 50, 10)


In [75]:
model = Sequential()
model.add(TimeDistributed(LSTM(100, return_sequences=True), input_shape=(SEQ_LENGTH, len(tickers), num_features)))
model.add(TimeDistributed(LSTM(50)))
model.add(Dropout(0.2))
model.add(TimeDistributed(Dense(PRED_LENGTH)))

model.compile(optimizer='adam', loss=Huber())

# Train the model
model.fit(X_train, dir_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

  super().__init__(**kwargs)


Epoch 1/20
[1m11/69[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m8:37[0m 9s/step - loss: 0.4966

In [None]:
# Make predictions
predictions = model.predict(X_test)



In [None]:
print(X.shape)