In [None]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt

import math
import numpy as np
import statistics as stats
from sklearn.preprocessing import MinMaxScaler 
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# import sys
# !{sys.executable} -m pip install pyportfolioopt
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt.efficient_frontier import EfficientFrontier

#Helper function to generate input data of appropriate shape
def generate_time_series_data(data, batch_size):
  # Generate batches of data
  num_samples = data.shape[0]
  num_batches = num_samples // batch_size
  batchSet = []
  for i in range(num_batches):
    batchSet.append(data[i * batch_size:(i + 1) * batch_size])
  return np.asarray(batchSet)

In [None]:
#Load data and create portfolios
Portfolio_tickers = ['AVGO', 'COST', 'FDS', 'FTNT', 'ORLY', 'REGN', 'TMO', 'TSLA', 'UNH']
Long_tickers = ['CPB', 'K']
Portfolio = yf.download(Portfolio_tickers, start='2021-05-03', end='2022-07-12')['Adj Close']
Long = yf.download(Long_tickers, start='2021-05-03', end='2022-07-12')['Adj Close']
CPB = Long[['CPB']]
K = Long[['K']]
Portfolio1 = pd.concat([Portfolio, CPB], axis=1)
Portfolio2 = pd.concat([Portfolio, K], axis=1)
Portfolio3 = pd.concat([Portfolio, Long], axis=1)

In [None]:
batch_size = 60

#Data preprocessing
training_data_len = math.ceil(len(Portfolio3)* 0.8)
temp = []
tickers = Portfolio3.columns.values

scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(Portfolio3)
data = scaled_data.reshape(-1,11)
print(f"data shape: {data.shape}")

#Training data
train_data = scaled_data[0: training_data_len, :]

x_train = []
y_train = []

for i in range(batch_size, len(train_data)):
    x_train.append(train_data[i-batch_size:i, :])
    y_train.append(train_data[i, :])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], x_train.shape[2]))
print(f"train_data shape: {train_data.shape}")
print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")

#Test data
test_data = scaled_data[training_data_len-batch_size: , : ]
y_test = data[training_data_len:]
x_test = []

for i in range(batch_size, len(test_data)):
  x_test.append(test_data[i-batch_size:i, :])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], x_test.shape[2]))

print(f"test data shape: {test_data.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_test shape: {y_test.shape}")

In [None]:
#set the hyperparameters
latent_dim = 32
learning_rate = 1e-2
num_epochs = 50
beta = 1 #information bottleneck coefficient
n_stocks = np.shape(x_train)[2]

#Split the data into training and test sets
# x_train, x_test, y_train, y_test = train_test_split(dataStocks, dataSPY, test_size=0.2)
# x_train = generate_time_series_data(x_train, batch_size)
# x_test = generate_time_series_data(x_test, batch_size)
# y_train = generate_time_series_data(y_train, batch_size)
# y_test = generate_time_series_data(y_test, batch_size)
# print(f"shapes 1: {np.shape(x_train)} and {np.shape(y_train)}")

#Reshape the data to be 3D [samples, timesteps, features]
# x_train = x_train.reshape((-1, 1, 1))
# x_test = x_test.reshape(-1, 1, 1)

#Build the model
inputs = tf.keras.Input(shape=(x_train.shape[1], x_train.shape[2]))
lstm_encoder = tf.keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = lstm_encoder(inputs)
encoder_states = [state_h, state_c]

z_mean = tf.keras.layers.Dense(latent_dim)(encoder_outputs)
z_log_var = tf.keras.layers.Dense(latent_dim)(encoder_outputs)

def sampling(args):
  z_mean, z_log_var = args
  epsilon = tf.random.normal(shape=tf.shape(z_mean))
  return z_mean + tf.exp(0.5*z_log_var)*epsilon

z = tf.keras.layers.Lambda(sampling)([z_mean, z_log_var])

lstm_decoder = tf.keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = lstm_decoder(z, initial_state=encoder_states)
decoder_outputs=tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(n_stocks))(decoder_outputs)

#Define the losses for IB and no-IB
def IBLoss(inputs, decoder_outputs):
  reconstruction_loss = tf.keras.losses.MeanSquaredError()(inputs, decoder_outputs)
  kl_loss = -0.5*tf.reduce_mean(z_log_var - tf.square(z_mean)-tf.exp(z_log_var) + 1)
  information_bottleneck_loss = beta*kl_loss
  loss = reconstruction_loss + information_bottleneck_loss
  return loss

def MSELoss(inputs, decoder_outputs):
  reconstruction_loss = tf.keras.losses.MeanSquaredError()(inputs, decoder_outputs)
  return reconstruction_loss

#Define the optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

#Compile the model
model = tf.keras.Model(inputs, decoder_outputs)
model.compile(optimizer=optimizer, loss=IBLoss)

model2 = tf.keras.Model(inputs, decoder_outputs)
model2.compile(optimizer=optimizer, loss=MSELoss)

#Train the model
model.fit(x_train, y_train, batch_size=1, epochs=num_epochs)
model2.fit(x_train, y_train, batch_size=1, epochs=num_epochs)

#Evaluate the model
predictions = np.mean(model.predict(x_test), axis=0)
predictions = scaler.inverse_transform(predictions)
rmse = np.sqrt(np.mean(predictions - y_test)**2)

predictions2 = np.mean(model2.predict(x_test), axis=0)
predictions2 = scaler.inverse_transform(predictions2)
rmse2 = np.sqrt(np.mean(predictions2 - y_test)**2)

print(f"IB rmse: {rmse}")
print(f"no IB rmse: {rmse2}")

# test_loss = model.evaluate(x_test, y_test)
# print(f'Test loss: {test_loss}')

In [None]:
def calcSharpe(port):
  mu = expected_returns.capm_return(port)
  Sigma = risk_models.CovarianceShrinkage(port).ledoit_wolf()

  ef = EfficientFrontier(mu, Sigma)
  ef.max_sharpe()
  weights = ef.clean_weights()

  portfolio_mean = 0
  portfolio_var = 0

  for ticker in weights.keys():
      portfolio_mean += weights[ticker]*mu[ticker]

  for ticker1 in weights.keys():
      for ticker2 in weights.keys():
          portfolio_var += weights[ticker1]*weights[ticker2]*sigma[ticker1][ticker2]

  portfolio_std = portfolio_var ** (1/2)

  portfolio_sharpe = portfolio_mean/portfolio_std
  return portfolio_sharpe

In [None]:
print(f"Sharpe Ratios\nPortfolio 1: {calcSharpe(Port1)}\nPortfolio 2: {calcSharpe(Port2)}\nPortfolio 3: {calcSharpe(Port3)}")