# Self-contained Robo-advisor

In [None]:
import pandas as pd
import plotly.graph_objects as go

from PortfolioPredictor import evaluate_portfolios_over_time, extract_asset_returns, chain_portfolio_performance
from fetchData import fetch_raw_data_yf, getSNP500, getNasdaq_comp, fetch_raw_data_yf_all
from LearningRBA import MLRBA_V2
from PortfolioFunction import get_matrices

In [None]:
assets = getSNP500()
start_date = pd.Timestamp("2014-01-01")
end_date = pd.Timestamp("2016-01-01")
investment_period = 910
investment_end_date = end_date + pd.Timedelta(days=investment_period)
raw_data, _, _= fetch_raw_data_yf(assets, start_date, investment_end_date)

raw_data_train = raw_data.loc[:end_date]
raw_data_test = raw_data.loc[end_date:]

In [None]:
names, annualized_returns, x, x1, _, cov, correlation_matrix = get_matrices(raw_data[:end_date])
_, initial_portfolio, _, _, _ = MLRBA_V2(names, cov, annualized_returns, correlation_matrix)

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go

class PortfolioPredictorDirectMultiStep:
    def __init__(self, raw_data_train, raw_data_test, best_portfolio, 
                 lookback=10, n_steps=5, epochs=50, batch_size=32, activation_function='tanh'):
        self.raw_data_train = raw_data_train
        self.raw_data_test = raw_data_test
        self.best_portfolio = best_portfolio
        self.lookback = lookback            
        self.n_steps = n_steps              
        self.epochs = epochs
        self.batch_size = batch_size
        self.activation_function = activation_function
        self.model = None
        self.history = None

    def preprocess_data(self):
        best_portfolio_data_train = self.raw_data_train[self.best_portfolio['tickers']]
        best_portfolio_data_test = self.raw_data_test[self.best_portfolio['tickers']]
        weights = np.array(self.best_portfolio['weights'])
        
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        full_data = pd.concat([best_portfolio_data_train, best_portfolio_data_test])
        self.scaler.fit(full_data)
        
        normalized_train_data = self.scaler.transform(best_portfolio_data_train)
        normalized_test_data = self.scaler.transform(best_portfolio_data_test)
        
        normalized_test_data = np.concatenate([normalized_train_data[-self.lookback:], normalized_test_data], axis=0)
        
        self.weighted_returns_train = np.dot(normalized_train_data, weights)
        self.weighted_returns_test = np.dot(normalized_test_data, weights)

    def create_datasets(self, data):
        X, y = [], []
        for i in range(len(data) - self.lookback - self.n_steps + 1):
            X.append(data[i: i + self.lookback, :])
            target = data[i + self.lookback: i + self.lookback + self.n_steps, 0]
            y.append(target)
        return np.array(X), np.array(y)

    def build_model(self):
        self.model = Sequential([
            LSTM(250, activation=self.activation_function, return_sequences=True),
            Dropout(0.2),
            LSTM(50, activation=self.activation_function, return_sequences=False),
            Dropout(0.2),
            Dense(self.n_steps)
        ])
        self.model.compile(optimizer='adam', loss='mse')

    def train_model(self):
        X_train, y_train = self.create_datasets(self.weighted_returns_train.reshape(-1, 1))
        self.history = self.model.fit(X_train, y_train, 
                                      epochs=self.epochs, 
                                      batch_size=self.batch_size, 
                                      validation_split=0.001, 
                                      shuffle=False, 
                                      verbose=0)

    def predict(self):
        X_test, y_test = self.create_datasets(self.weighted_returns_test.reshape(-1, 1))
        self.predictions = self.model.predict(X_test)
        self.y_test = y_test
        
        prediction_dates = self.raw_data_test.index[self.lookback + self.n_steps - 1:]
        return self.predictions, prediction_dates

    def compute_cumulative_returns(self, data, baseline):
        data_series = pd.Series(data.flatten())
        cumulative_returns = data_series / data_series.iloc[0] * baseline
        return cumulative_returns

    def plot_loss(self):
        fig = go.Figure()
        fig.add_trace(go.Scatter(
            x=np.arange(1, len(self.history.history['loss']) + 1),
            y=self.history.history['loss'], 
            mode='lines', 
            name='Training Loss'
        ))
        fig.add_trace(go.Scatter(
            x=np.arange(1, len(self.history.history['val_loss']) + 1),
            y=self.history.history['val_loss'], 
            mode='lines', 
            name='Validation Loss'
        ))
        fig.update_layout(
            title='Training and Validation Loss Over Epochs',
            xaxis_title='Epoch',
            yaxis_title='Loss',
            legend_title='Type of Loss',
            font=dict(family="Cambria", size=18)
        )
        fig.show()

    def plot_predictions(self):
        normalized_train = self.compute_cumulative_returns(self.weighted_returns_train, 100)
        training_end_value = normalized_train.iloc[-1]
        test_first_day = self.y_test[:, 0]
        normalized_test = self.compute_cumulative_returns(test_first_day, training_end_value)
        predicted_first_day = self.predictions[:, 0]
        normalized_predicted = self.compute_cumulative_returns(predicted_first_day, training_end_value)

        fig = go.Figure()
        fig.add_trace(go.Scatter(
            x=self.raw_data_train.index[self.lookback:], 
            y=normalized_train,
            mode='lines',
            name='Actual Training Returns'
        ))
        fig.add_trace(go.Scatter(
            x=self.raw_data_test.index,
            y=normalized_test,
            mode='lines',
            name='Actual Test Returns'
        ))
        fig.add_trace(go.Scatter(
            x=self.raw_data_test.index,
            y=normalized_predicted,
            mode='lines',
            name='Predicted Returns'
        ))
        fig.update_layout(
            title='Actual vs Predicted Returns',
            xaxis_title='Date',
            yaxis_title='Cumulative Returns',
            legend_title='Portfolio',
            font=dict(family="Cambria", size=18)
        )
        fig.show()

    def compute_performance(self):
        actual = self.y_test.flatten()
        predicted = self.predictions.flatten()
        percentage_diff = np.abs((predicted - actual) / actual) * 100
        mean_percentage_error = np.mean(percentage_diff)
        accuracy = 100 - mean_percentage_error
        print(f"Mean Percentage Error: {mean_percentage_error}%, Accuracy: {accuracy}%")
        return mean_percentage_error, accuracy

In [None]:
portfolio_predictor = PortfolioPredictorDirectMultiStep(raw_data_train, raw_data_test, initial_portfolio, n_steps=5, epochs=50, activation_function='tanh')

portfolio_predictor.preprocess_data()
portfolio_predictor.build_model()
portfolio_predictor.train_model()
prediction = portfolio_predictor.predict()   
portfolio_predictor.plot_loss()
portfolio_predictor.plot_predictions()
portfolio_predictor.compute_performance()

In [None]:
import numpy as np

data = np.array([[i] for i in range(30)])  # Convert to 2D array with shape (30, 1)

def create_datasets(lookback, n_steps, data):
    X, y = [], []
    for i in range(len(data) - lookback - n_steps + 1):
        X.append(data[i: i + lookback, :])
        target = data[i + lookback: i + lookback + n_steps, 0]
        y.append(target)
    return np.array(X), np.array(y)

x, y = create_datasets(10, 5, data)