In [6]:
! pip install yfinance
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

import math
import numpy as np
import statistics as stats
from sklearn.preprocessing import MinMaxScaler 
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

# import sys
# !{sys.executable} -m pip install pyportfolioopt
! pip install pyportfolioopt
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt.efficient_frontier import EfficientFrontier

#Helper function to generate input data of appropriate shape
def generate_time_series_data(data, batch_size):
  # Generate batches of data
  num_samples = data.shape[0]
  num_batches = num_samples // batch_size
  batchSet = []
  for i in range(num_batches):
    batchSet.append(data[i * batch_size:(i + 1) * batch_size])
  return np.asarray(batchSet)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [7]:
#Load data and create portfolios
Portfolio_tickers = ['AVGO', 'COST', 'FDS', 'FTNT', 'ORLY', 'REGN', 'TMO', 'TSLA', 'UNH', 'CPB', 'K']
Portfolio = yf.download(Portfolio_tickers, start='2021-05-03', end='2022-07-12')['Adj Close']
Portfolio.head()

[*********************100%***********************]  11 of 11 completed


Unnamed: 0_level_0,AVGO,COST,CPB,FDS,FTNT,K,ORLY,REGN,TMO,TSLA,UNH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-05-03,430.409454,375.486542,46.084919,337.009125,40.636002,59.18465,554.960022,488.619995,465.68454,228.300003,396.614746
2021-05-04,425.054901,371.497253,45.942089,340.496826,41.355999,58.798801,559.880005,485.179993,463.592163,224.53334,401.930267
2021-05-05,424.376038,368.735443,46.51339,331.580475,41.875999,59.354038,560.090027,482.420013,466.07309,223.646667,403.063751
2021-05-06,428.917816,378.891785,47.436996,333.068176,41.301998,63.551258,561.219971,498.679993,468.434509,221.179993,405.477234
2021-05-07,432.742523,380.436005,47.179909,332.073059,42.354,63.005432,562.320007,496.75,466.750641,224.123337,408.095947


In [8]:
batch_size = 60

#Data preprocessing
training_data_len = math.ceil(len(Portfolio)* 0.8)
temp = []
tickers = Portfolio.columns.values

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(Portfolio)
data = scaled_data.reshape(-1,11)
print(f"data shape: {data.shape}")

#Training data
train_data = scaled_data[0: training_data_len, :]

x_train = []
y_train = []

for i in range(batch_size, len(train_data)):
    x_train.append(train_data[i-batch_size:i, :])
    y_train.append(train_data[i, :])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], x_train.shape[2]))
print(f"train_data shape: {train_data.shape}")
print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")

#Test data
test_data = scaled_data[training_data_len-batch_size: , : ]
y_test = data[training_data_len:]
x_test = []

for i in range(batch_size, len(test_data)):
  x_test.append(test_data[i-batch_size:i, :])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], x_test.shape[2]))

print(f"test data shape: {test_data.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_test shape: {y_test.shape}")

data shape: (300, 11)
train_data shape: (240, 11)
x_train shape: (180, 60, 11)
y_train shape: (180, 11)
test data shape: (120, 11)
x_test shape: (60, 60, 11)
y_test shape: (60, 11)


In [9]:
#set the hyperparameters
latent_dim = 32
learning_rate = 1e-2
num_epochs = 50
beta = 1 #information bottleneck coefficient
n_stocks = np.shape(x_train)[2]

#Split the data into training and test sets
# x_train, x_test, y_train, y_test = train_test_split(dataStocks, dataSPY, test_size=0.2)
# x_train = generate_time_series_data(x_train, batch_size)
# x_test = generate_time_series_data(x_test, batch_size)
# y_train = generate_time_series_data(y_train, batch_size)
# y_test = generate_time_series_data(y_test, batch_size)
# print(f"shapes 1: {np.shape(x_train)} and {np.shape(y_train)}")

#Reshape the data to be 3D [samples, timesteps, features]
# x_train = x_train.reshape((-1, 1, 1))
# x_test = x_test.reshape(-1, 1, 1)

#Build the model
inputs = tf.keras.Input(shape=(x_train.shape[1], x_train.shape[2]))
lstm_encoder = tf.keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = lstm_encoder(inputs)
encoder_states = [state_h, state_c]

z_mean = tf.keras.layers.Dense(latent_dim)(encoder_outputs)
z_log_var = tf.keras.layers.Dense(latent_dim)(encoder_outputs)

def sampling(args):
  z_mean, z_log_var = args
  epsilon = tf.random.normal(shape=tf.shape(z_mean))
  return z_mean + tf.exp(0.5*z_log_var)*epsilon

z = tf.keras.layers.Lambda(sampling)([z_mean, z_log_var])

lstm_decoder = tf.keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = lstm_decoder(z, initial_state=encoder_states)
decoder_outputs=tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(n_stocks))(decoder_outputs)

#Define the losses for IB and no-IB
def IBLoss(inputs, decoder_outputs):
  reconstruction_loss = tf.keras.losses.MeanSquaredError()(inputs, decoder_outputs)
  kl_loss = -0.5*tf.reduce_mean(z_log_var - tf.square(z_mean)-tf.exp(z_log_var) + 1)
  information_bottleneck_loss = beta*kl_loss
  loss = reconstruction_loss + information_bottleneck_loss
  return loss

def MSELoss(inputs, decoder_outputs):
  reconstruction_loss = tf.keras.losses.MeanSquaredError()(inputs, decoder_outputs)
  return reconstruction_loss

#Define the optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

#Compile the model
model = tf.keras.Model(inputs, decoder_outputs)
model.compile(optimizer=optimizer, loss=IBLoss)

model2 = tf.keras.Model(inputs, decoder_outputs)
model2.compile(optimizer=optimizer, loss=MSELoss)

#Train the model
model.fit(x_train, y_train, batch_size=1, epochs=num_epochs)
model2.fit(x_train, y_train, batch_size=1, epochs=num_epochs)

#Evaluate the model
predictions = np.mean(model.predict(x_test), axis=0)
predictions = scaler.inverse_transform(predictions)
rmse = np.sqrt(np.mean(predictions - y_test)**2)

predictions2 = np.mean(model2.predict(x_test), axis=0)
predictions2 = scaler.inverse_transform(predictions2)
rmse2 = np.sqrt(np.mean(predictions2 - y_test)**2)

print(f"IB rmse: {rmse}")
print(f"no IB rmse: {rmse2}")

# test_loss = model.evaluate(x_test, y_test)
# print(f'Test loss: {test_loss}')



Train on 180 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Train on 180 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoc

  updates=self.state_updates,
  updates=self.state_updates,


IB rmse: 377.39625257736736
no IB rmse: 377.4073726742098


In [10]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 5313116654802279898
 xla_global_id: -1, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 40231960576
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 13740885530245362191
 physical_device_desc: "device: 0, name: A100-SXM4-40GB, pci bus id: 0000:00:04.0, compute capability: 8.0"
 xla_global_id: 416903419]

In [24]:
#Predicted stock prices with two models
Port_1 = pd.DataFrame(predictions, columns = Portfolio_tickers)
Port_2 = pd.DataFrame(predictions, columns = Portfolio_tickers)

[*********************100%***********************]  11 of 11 completed
60


In [25]:
def calcSharpe(port):
  mu = expected_returns.capm_return(port)
  Sigma = risk_models.CovarianceShrinkage(port).ledoit_wolf()

  ef = EfficientFrontier(mu, Sigma)
  ef.max_sharpe()
  weights = ef.clean_weights()

  portfolio_mean = 0
  portfolio_var = 0

  for ticker in weights.keys():
      portfolio_mean += weights[ticker]*mu[ticker]

  for ticker1 in weights.keys():
      for ticker2 in weights.keys():
          portfolio_var += weights[ticker1]*weights[ticker2]*Sigma[ticker1][ticker2]

  portfolio_std = portfolio_var ** (1/2)

  portfolio_sharpe = portfolio_mean/portfolio_std
  return portfolio_sharpe
     

In [32]:
print(f"Sharpe Ratios\nPortfolio with IB: {calcSharpe(Port_1)}\nPortfolio without IB: {calcSharpe(Port_2)}")

Sharpe Ratios
Portfolio with IB: 9.497522368882235
Portfolio without IB: 9.497522368882235
