In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed, Input
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import pandas_ta as ta
from tensorflow.keras.losses import Huber, BinaryCrossentropy
from scipy import stats
from tensorflow.keras.optimizers import Adam

# List of ticker symbols (example of 50 different stocks)
tickers = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'BRK-B', 'JNJ', 'V', 'WMT',
    'JPM', 'PG', 'MA', 'DIS', 'NVDA', 'HD', 'PYPL', 'BAC', 'VZ', 'ADBE',
    'NFLX', 'INTC', 'CMCSA', 'PFE', 'KO', 'PEP', 'T', 'CSCO', 'MRK', 'ABT',
    'XOM', 'CVX', 'NKE', 'CRM', 'MDT', 'MCD', 'UNH', 'WFC', 'BMY', 'ACN',
    'TXN', 'AMGN', 'COST', 'QCOM', 'NEE', 'LLY', 'TMO', 'IBM', 'HON', 'GE'
]

# Adjust the date range to the last 60 days
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=59)).strftime('%Y-%m-%d')

# Function to download and preprocess data for a given ticker
def preprocess_data(ticker):
    try:
        data = yf.download(ticker, start=start_date, end=end_date, interval='5m')
        data.index.name = 'Date'
        data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
        data['SMA_50'] = data['Close'].rolling(window=50).mean()
        data['SMA_200'] = data['Close'].rolling(window=200).mean()
        data['RSI'] = ta.rsi(data['Close'], window=14)
        data.dropna(inplace=True)
        return data
    except Exception as e:
        print(f"Failed to download data for {ticker}: {e}")
        return None

# Combine all stocks' data into a single DataFrame
combined_data = {}
successful_tickers = []

for ticker in tickers:
    stock_data = preprocess_data(ticker)
    if stock_data is not None and len(stock_data) > 0:
        combined_data[ticker] = stock_data
        successful_tickers.append(ticker)


# Ensure all dataframes have the same length by trimming to the shortest one
min_length = min([len(data) for data in combined_data.values()])
for ticker in successful_tickers:
    combined_data[ticker] = combined_data[ticker].iloc[-min_length:]

# Combine all stocks' data into a single numpy array
data_array = np.array([combined_data[ticker].values for ticker in successful_tickers])

# Normalize the data
scaler = MinMaxScaler()
num_features = data_array.shape[-1]
data_array = data_array.reshape(-1, num_features)
scaled_data = scaler.fit_transform(data_array)
scaled_data = scaled_data.reshape(len(successful_tickers), min_length, num_features)




In [None]:
# Function to create sequences for predicting differences
def create_sequences(data, seq_length, pred_length):
    xs, ys = [], []
    for i in range(len(data[0]) - seq_length - pred_length):
        x = data[:, i:i+seq_length]
        y = data[:, i+seq_length:i+seq_length+pred_length, 3] - data[:, i+seq_length-1:i+seq_length, 3]  # Predicting the difference in 'Close' prices
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys), np.sign(np.array(ys))

# Parameters
SEQ_LENGTH = 200
PRED_LENGTH = 10

# Create sequences
X, y, dir = create_sequences(scaled_data, SEQ_LENGTH, PRED_LENGTH)
# X = np.einsum('ijkl->ikjl', X)
# y = np.einsum('ijk->ikj', y)
print(y.shape)
# y = y.reshape((y.shape[0], y.shape[1], PRED_LENGTH))

# Split the data into training and test sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
dir_train, dir_test = dir[:split], dir[split:]



In [None]:
input_layer = Input(shape=(len(tickers), SEQ_LENGTH, num_features))


reshaped_input = tf.keras.layers.Reshape((len(tickers) * SEQ_LENGTH, num_features))(input_layer)


x = LSTM(100, return_sequences=True)(reshaped_input)
x = LSTM(50)(x)
x = Dropout(0.2)(x)


x = Dense(len(tickers) * PRED_LENGTH)(x)


dir_layer = tf.keras.layers.Reshape((len(tickers), PRED_LENGTH))(x)
pred_layer = tf.keras.layers.Reshape((len(tickers), PRED_LENGTH))(x)


model = Model(inputs=input_layer, outputs=pred_layer)

# Compile the model
model.compile(optimizer=Adam(), loss='binary_crossentropy')
model.summary()

In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

In [81]:
predictions = model.predict(X_test)

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 3s/step


In [85]:
success=0
attempts=0
trades = np.array([])
profit_over_time = np.array([])
for i in range(predictions.shape[0]):
    for j in range(50):
        for k in range(1):
            if(np.sign(predictions[i][j][k]) == np.sign(y_test[i][j][k])):
                success+=1
            trades = np.append(trades, y_test[i][j][k]*(scaler.data_max_[3] - scaler.data_min_[3]) * np.sign(predictions[i][j][k])) # long/short
            profit_over_time = np.append(profit_over_time, np.sum(trades))
            attempts+=1

per_success = success/attempts
print("percent success: " + str(per_success))
print("attempts: " + str(attempts))


baseline_accuracy = 0.5


SE = np.sqrt((baseline_accuracy * (1 - baseline_accuracy)) / attempts)

Z = (per_success - baseline_accuracy) / SE

p_value = 2 * (1 - stats.norm.cdf(np.abs(Z)))

print(f'Z-score: {Z:.4f}')
print(f'p-value: {p_value:.4f}')

print(f'profit: {np.sum(trades):.4f}')
print(np.min(profit_over_time))
a = np.arange(len(profit_over_time))
print(np.max(trades))
print(np.min(trades))


percent success: 0.49932457786116324
attempts: 26650
Z-score: -0.2205
p-value: 0.8255
profit: 315.8272
-15.499095916748258
24.784393310546903
-26.684997558593782
