<h3>Module 1: Generate ML Training Data</h3>
<br />
<p> Desription: 
This module generates the input data used to train our neural network machine learning model.
It achieves this by simulating several thousand randomly generated portfolios with the following characteristics:
    - Randomly between 5-10 stocks to include w/n the portfolio
    - Randomly weights each asset within the portfolio
    - Backtests the performace of the portfolio
    - Saves the performance metrics of the portfolio to a dicitionary
       
 </p>

In [1]:
%matplotlib inline
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
random.seed(42)
import statsmodels.api as sm
from apidata import (
    closing_prices,
    exp_portfolio_return,
    exp_portfolio_variance,
    mod_sharpe_ratio
)

from scipy import stats

In [2]:
def generate_random_portfolio():
    stocklist_df = pd.read_csv("StockTickers.csv")
    stocklist = list(dict(stocklist_df)["Tickers"])

    random_portfolio = []
    for i in range(random.randint(6, 10)):
        add_stock = random.choice(stocklist)
        if add_stock not in random_portfolio:
            random_portfolio.append(add_stock)
            del add_stock
    
    print("\nRandom Generated Portfolio", random_portfolio)
    return random_portfolio

In [None]:
def compile_random_portfolio(p_stocks):
    print(f"\n<Quandl API> Stock Data: {p_stocks[0]}")
    sim_portfolio = closing_prices(p_stocks[0])
    for stock in p_stocks[1:]:
        print(f"<Quandl API> Stock Data: {stock}")
        add_stock = closing_prices(stock)
        sim_portfolio = pd.merge(sim_portfolio, add_stock, on="Date", how="inner")
        del add_stock
    
    benchmark = pd.read_csv("S&P500.csv")[["Date", "Close"]]
    benchmark["Date"] = pd.to_datetime(benchmark["Date"])
    benchmark = benchmark.rename(columns={"Close":"SP500"})
    
    output_portfolio = pd.merge(sim_portfolio, benchmark, on="Date", how="inner")
    
    print("\n[Output] Portfolio Closing Prices\n", output_portfolio.head())
    return output_portfolio.set_index("Date")

In [None]:
def generate_ml_training_data(sim_portfolio):    
    p_stocks = list(sim_portfolio.columns)
    p_stocks.remove("SP500")
    benchmark_portfolio = sim_portfolio["SP500"]
    stock_portfolio = sim_portfolio[p_stocks]
    
    print('\n[Training] Stock Portfolio\n', stock_portfolio.head())

    weights = np.random.random(len(p_stocks))
    weights /= np.sum(weights)
    
    p_allocation = [(pos[0],round(pos[1], 4)) for pos in zip(p_stocks, weights)]
    print("\n[Portfolio] Asset Allocation:\n", p_allocation)
    
    pct_returns = round(stock_portfolio.pct_change().iloc[1:], 4)
    pct_returns["RTNp"] = np.sum(pct_returns, axis=1)
    print("\n[Portfolio] Daily Returns:")
    print(pct_returns.head())
    
    # Calculate Portfolio Statistics (Return, Variance, Sharpe)
    p_rtn = exp_portfolio_return(stock_portfolio, weights)
    p_var = exp_portfolio_variance(stock_portfolio, weights)
    sp500_rtn = benchmark_portfolio.mean() * 250
    sp500_var = benchmark_portfolio.var() * 250
    
    mod_sharpe = mod_sharpe_ratio(p_rtn, p_var)
    
    unweighted_perform = round((stock_portfolio.iloc[-1]/stock_portfolio.iloc[0]) -1, 4)
    print("\n[Portfolio] Unweighted Returns")
    print(unweighted_perform.head())
    
    weighted_perform = round(np.sum(unweighted_perform[p_stocks] * weights), 4)
    print(f"\n[Portfolio] Weighted Return: {weighted_perform}")
    
    benchmark_perform = round((benchmark_portfolio[-1]/benchmark_portfolio[0])-1, 4)
    
    # Portfolio Regression Beta Calculation
    p_beta = "N/A"
    print(f"\n[Benchmark | S&P500] Perfomance: {benchmark_perform}")
    
    pfolio_radj_perform = ((weighted_perform/p_var) > (benchmark_perform/sp500_var))
    pfolio_stats = {
        "CTp":len(p_stocks),
        "RTNp":round(p_rtn, 4),
        "VARp":round(p_var, 4),
        "SHRp":round(mod_sharpe, 4),
        "BETAp":p_beta,
        "SP500":round(benchmark_perform, 4),
        "PvSP":pfolio_radj_perform
    }
    
    return pfolio_stats

In [None]:
# Initialize Empty List - Save Simulate Ml Training Data 
ml_training_portfolios = []

In [None]:
# Generate & Store ML Training Portfolios (Loop)
for i in range(25):
    mc_pfolio_random = generate_random_portfolio()
    mc_pfolio_compile = compile_random_portfolio(mc_pfolio_random)
    mc_pfolio_stats = generate_ml_training_data(mc_pfolio_compile)    
    ml_training_portfolios.append(mc_pfolio_stats)    
    
    print(f"\nRandom Portfolio Generation: {i}\n")
    print(mc_pfolio_stats)

In [None]:
# [View] Total Number of Training Portfolio in ml_training_portfolios
number_of_training_portfolios = len(ml_training_portfolios)
print("# of Portfolios:", number_of_training_portfolios)

In [None]:
# [View] ML Training Portfolio Stats -- [Output] Dictionary
ml_training_portfolios[:5]

In [None]:
# Convert & View ML Training Portfolios as DataFrame
df_headers = "CTp,SHRp,BETAp,VARp,RTNp,SP500,PvSP".split(",")
df_portfolio = pd.DataFrame(ml_training_portfolios)[df_headers]
df_portfolio.head()

In [None]:
df_portfolio["PvSP"].value_counts()

In [None]:
# Save ML Training Data to Excel (.xlsx) Format
df_portfolio.to_excel("Portfolio_ML_Training_Data.xlsx", index=False)

In [None]:
errors_df = df_portfolio.loc[df_portfolio["SP500"] < .3857]

In [None]:
errors_df.count()

In [None]:
errors_df.head()