<h3>Module 1: Generate ML Training Data</h3>
<br />
<p> Desription: 
This module generates the input data used to train our neural network machine learning model.
It achieves this by simulating several thousand randomly generated portfolios with the following characteristics:
    - Randomly between 5-10 stocks to include w/n the portfolio
    - Randomly weights each asset within the portfolio
    - Backtests the performace of the portfolio
    - Saves the performance metrics of the portfolio to a dicitionary
       
 </p>

In [1]:
%matplotlib inline
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
random.seed(42)
import statsmodels.api as sm
from apidata import (
    closing_prices,
    exp_portfolio_return,
    exp_portfolio_variance,
    mod_sharpe_ratio
)

from scipy import stats

In [2]:
def generate_random_portfolio():
    stocklist_df = pd.read_csv("StockTickers.csv")
    stocklist = list(dict(stocklist_df)["Tickers"])

    random_portfolio = []
    for i in range(random.randint(6, 10)):
        add_stock = random.choice(stocklist)
        if add_stock not in random_portfolio:
            random_portfolio.append(add_stock)
            del add_stock
    
    print("\nRandom Generated Portfolio", random_portfolio)
    return random_portfolio

In [3]:
def compile_random_portfolio(p_stocks):
    print(f"\n<Quandl API> Stock Data: {p_stocks[0]}")
    sim_portfolio = closing_prices(p_stocks[0])
    for stock in p_stocks[1:]:
        print(f"<Quandl API> Stock Data: {stock}")
        add_stock = closing_prices(stock)
        sim_portfolio = pd.merge(sim_portfolio, add_stock, on="Date", how="inner")
        del add_stock
    
    benchmark = pd.read_csv("S&P500.csv")[["Date", "Close"]]
    benchmark["Date"] = pd.to_datetime(benchmark["Date"])
    benchmark = benchmark.rename(columns={"Close":"SP500"})
    
    output_portfolio = pd.merge(sim_portfolio, benchmark, on="Date", how="inner")
    
    print("\n[Output] Portfolio Closing Prices\n", output_portfolio.head())
    return output_portfolio.set_index("Date")

In [4]:
def generate_ml_training_data(sim_portfolio):    
    p_stocks = list(sim_portfolio.columns)
    p_stocks.remove("SP500")
    benchmark_portfolio = sim_portfolio["SP500"]
    stock_portfolio = sim_portfolio[p_stocks]
    
    print('\n[Training] Stock Portfolio\n', stock_portfolio.head())

    weights = np.random.random(len(p_stocks))
    weights /= np.sum(weights)
    
    p_allocation = [(pos[0],round(pos[1], 4)) for pos in zip(p_stocks, weights)]
    print("\n[Portfolio] Asset Allocation:\n", p_allocation)
    
    pct_returns = round(stock_portfolio.pct_change().iloc[1:], 4)
    pct_returns["RTNp"] = np.sum(pct_returns, axis=1)
    print("\n[Portfolio] Daily Returns:")
    print(pct_returns.head())
    
    # Calculate Portfolio Statistics (Return, Variance, Sharpe)
    p_rtn = exp_portfolio_return(stock_portfolio, weights)
    p_var = exp_portfolio_variance(stock_portfolio, weights)
    sp500_rtn = benchmark_portfolio.mean() * 250
    sp500_var = benchmark_portfolio.var() * 250
    
    mod_sharpe = mod_sharpe_ratio(p_rtn, p_var)
    
    unweighted_perform = round((stock_portfolio.iloc[-1]/stock_portfolio.iloc[0]) -1, 4)
    print("\n[Portfolio] Unweighted Returns")
    print(unweighted_perform.head())
    
    weighted_perform = round(np.sum(unweighted_perform[p_stocks] * weights), 4)
    print(f"\n[Portfolio] Weighted Return: {weighted_perform}")
    
    benchmark_perform = round((benchmark_portfolio[-1]/benchmark_portfolio[0])-1, 4)
    
    # Portfolio Regression Beta Calculation
    p_beta = "N/A"
    print(f"\n[Benchmark | S&P500] Perfomance: {benchmark_perform}")
    
    pfolio_radj_perform = ((weighted_perform/p_var) > (benchmark_perform/sp500_var))
    pfolio_stats = {
        "CTp":len(p_stocks),
        "RTNp":round(p_rtn, 4),
        "VARp":round(p_var, 4),
        "SHRp":round(mod_sharpe, 4),
        "BETAp":p_beta,
        "SP500":round(benchmark_perform, 4),
        "PvSP":pfolio_radj_perform
    }
    
    return pfolio_stats

In [5]:
# Initialize Empty List - Save Simulate Ml Training Data 
ml_training_portfolios = []

In [6]:
# Generate & Store ML Training Portfolios (Loop)
for i in range(25):
    mc_pfolio_random = generate_random_portfolio()
    mc_pfolio_compile = compile_random_portfolio(mc_pfolio_random)
    mc_pfolio_stats = generate_ml_training_data(mc_pfolio_compile)    
    ml_training_portfolios.append(mc_pfolio_stats)    
    
    print(f"\nRandom Portfolio Generation: {i}\n")
    print(mc_pfolio_stats)


Random Generated Portfolio ['JPM', 'F', 'T', 'DIS', 'BAC']

<Quandl API> Stock Data: JPM
<Quandl API> Stock Data: F
<Quandl API> Stock Data: T
<Quandl API> Stock Data: DIS
<Quandl API> Stock Data: BAC

[Output] Portfolio Closing Prices
         Date    JPM      F      T    DIS    BAC      SP500
0 2014-01-02  58.21  15.44  34.95  76.27  16.10  25.799999
1 2014-01-03  58.66  15.51  34.80  76.11  16.41  25.930000
2 2014-01-06  59.00  15.58  34.96  75.82  16.66  25.379999
3 2014-01-07  58.32  15.38  34.95  76.34  16.50  25.510000
4 2014-01-08  58.87  15.54  34.24  75.22  16.58  25.360001

[Training] Stock Portfolio
               JPM      F      T    DIS    BAC
Date                                         
2014-01-02  58.21  15.44  34.95  76.27  16.10
2014-01-03  58.66  15.51  34.80  76.11  16.41
2014-01-06  59.00  15.58  34.96  75.82  16.66
2014-01-07  58.32  15.38  34.95  76.34  16.50
2014-01-08  58.87  15.54  34.24  75.22  16.58

[Portfolio] Asset Allocation:
 [('JPM', 0.3552), ('F', 0

<Quandl API> Stock Data: S
<Quandl API> Stock Data: MGM
<Quandl API> Stock Data: JPM
<Quandl API> Stock Data: HAL
<Quandl API> Stock Data: MSFT
<Quandl API> Stock Data: TSLA
<Quandl API> Stock Data: PFE
<Quandl API> Stock Data: VZ

[Output] Portfolio Closing Prices
         Date      T      S    MGM    JPM    HAL   MSFT    TSLA    PFE     VZ  \
0 2014-01-02  34.95  10.40  23.72  58.21  50.01  37.16  150.10  30.46  49.00   
1 2014-01-03  34.80   9.94  23.45  58.66  50.13  36.91  149.56  30.52  48.42   
2 2014-01-06  34.96   9.77  23.48  59.00  50.32  36.13  147.00  30.55  48.69   
3 2014-01-07  34.95   9.87  24.51  58.32  50.20  36.41  149.36  30.74  49.30   
4 2014-01-08  34.24   9.98  24.73  58.87  49.50  35.76  151.28  30.95  48.50   

       SP500  
0  25.799999  
1  25.930000  
2  25.379999  
3  25.510000  
4  25.360001  

[Training] Stock Portfolio
                 T      S    MGM    JPM    HAL   MSFT    TSLA    PFE     VZ
Date                                                      

<Quandl API> Stock Data: CVX
<Quandl API> Stock Data: GLW
<Quandl API> Stock Data: TXN
<Quandl API> Stock Data: USB
<Quandl API> Stock Data: DVA

[Output] Portfolio Closing Prices
         Date    NKE     CVX    GLW    TXN    USB    DVA      SP500
0 2014-01-02  78.24  124.14  17.77  43.10  39.89  64.78  25.799999
1 2014-01-03  78.03  124.35  17.89  43.29  40.06  64.47  25.930000
2 2014-01-06  77.43  124.02  17.73  42.93  40.59  64.80  25.379999
3 2014-01-07  77.49  125.07  17.84  42.70  40.93  65.20  25.510000
4 2014-01-08  77.09  123.29  17.98  43.29  41.09  65.44  25.360001

[Training] Stock Portfolio
               NKE     CVX    GLW    TXN    USB    DVA
Date                                                 
2014-01-02  78.24  124.14  17.77  43.10  39.89  64.78
2014-01-03  78.03  124.35  17.89  43.29  40.06  64.47
2014-01-06  77.43  124.02  17.73  42.93  40.59  64.80
2014-01-07  77.49  125.07  17.84  42.70  40.93  65.20
2014-01-08  77.09  123.29  17.98  43.29  41.09  65.44

[Portfoli

<Quandl API> Stock Data: TXN
<Quandl API> Stock Data: MSFT
<Quandl API> Stock Data: NKE
<Quandl API> Stock Data: F
<Quandl API> Stock Data: DIS

[Output] Portfolio Closing Prices
         Date    USB    TXN   MSFT    NKE      F    DIS      SP500
0 2014-01-02  39.89  43.10  37.16  78.24  15.44  76.27  25.799999
1 2014-01-03  40.06  43.29  36.91  78.03  15.51  76.11  25.930000
2 2014-01-06  40.59  42.93  36.13  77.43  15.58  75.82  25.379999
3 2014-01-07  40.93  42.70  36.41  77.49  15.38  76.34  25.510000
4 2014-01-08  41.09  43.29  35.76  77.09  15.54  75.22  25.360001

[Training] Stock Portfolio
               USB    TXN   MSFT    NKE      F    DIS
Date                                                
2014-01-02  39.89  43.10  37.16  78.24  15.44  76.27
2014-01-03  40.06  43.29  36.91  78.03  15.51  76.11
2014-01-06  40.59  42.93  36.13  77.43  15.58  75.82
2014-01-07  40.93  42.70  36.41  77.49  15.38  76.34
2014-01-08  41.09  43.29  35.76  77.09  15.54  75.22

[Portfolio] Asset Alloc

<Quandl API> Stock Data: TSLA
<Quandl API> Stock Data: VZ
<Quandl API> Stock Data: PG
<Quandl API> Stock Data: TXN
<Quandl API> Stock Data: PM
<Quandl API> Stock Data: KO
<Quandl API> Stock Data: X

[Output] Portfolio Closing Prices
         Date    DVA    TSLA     VZ     PG    TXN     PM     KO      X  \
0 2014-01-02  64.78  150.10  49.00  80.54  43.10  86.02  40.66  30.28   
1 2014-01-03  64.47  149.56  48.42  80.45  43.29  85.53  40.46  29.90   
2 2014-01-06  64.80  147.00  48.69  80.64  42.93  84.93  40.27  29.57   
3 2014-01-07  65.20  149.36  49.30  81.42  42.70  84.68  40.39  29.44   
4 2014-01-08  65.44  151.28  48.50  80.24  43.29  83.27  39.94  29.60   

       SP500  
0  25.799999  
1  25.930000  
2  25.379999  
3  25.510000  
4  25.360001  

[Training] Stock Portfolio
               DVA    TSLA     VZ     PG    TXN     PM     KO      X
Date                                                               
2014-01-02  64.78  150.10  49.00  80.54  43.10  86.02  40.66  30.28
2014

<Quandl API> Stock Data: AMZN
<Quandl API> Stock Data: KO
<Quandl API> Stock Data: ORCL
<Quandl API> Stock Data: IBM
<Quandl API> Stock Data: T
<Quandl API> Stock Data: NKE

[Output] Portfolio Closing Prices
         Date    USB    AMZN     KO   ORCL     IBM      T    NKE      SP500
0 2014-01-02  39.89  397.97  40.66  37.84  185.53  34.95  78.24  25.799999
1 2014-01-03  40.06  396.44  40.46  37.62  186.64  34.80  78.03  25.930000
2 2014-01-06  40.59  393.63  40.27  37.47  186.00  34.96  77.43  25.379999
3 2014-01-07  40.93  398.03  40.39  37.85  189.71  34.95  77.49  25.510000
4 2014-01-08  41.09  401.92  39.94  37.72  187.97  34.24  77.09  25.360001

[Training] Stock Portfolio
               USB    AMZN     KO   ORCL     IBM      T    NKE
Date                                                         
2014-01-02  39.89  397.97  40.66  37.84  185.53  34.95  78.24
2014-01-03  40.06  396.44  40.46  37.62  186.64  34.80  78.03
2014-01-06  40.59  393.63  40.27  37.47  186.00  34.96  77.43
20

<Quandl API> Stock Data: X
<Quandl API> Stock Data: C
<Quandl API> Stock Data: MGM
<Quandl API> Stock Data: TXN
<Quandl API> Stock Data: CVX
<Quandl API> Stock Data: GLW

[Output] Portfolio Closing Prices
         Date     IBM      X      C    MGM    TXN     CVX    GLW      SP500
0 2014-01-02  185.53  30.28  52.27  23.72  43.10  124.14  17.77  25.799999
1 2014-01-03  186.64  29.90  53.40  23.45  43.29  124.35  17.89  25.930000
2 2014-01-06  186.00  29.57  53.81  23.48  42.93  124.02  17.73  25.379999
3 2014-01-07  189.71  29.44  54.18  24.51  42.70  125.07  17.84  25.510000
4 2014-01-08  187.97  29.60  54.81  24.73  43.29  123.29  17.98  25.360001

[Training] Stock Portfolio
                IBM      X      C    MGM    TXN     CVX    GLW
Date                                                         
2014-01-02  185.53  30.28  52.27  23.72  43.10  124.14  17.77
2014-01-03  186.64  29.90  53.40  23.45  43.29  124.35  17.89
2014-01-06  186.00  29.57  53.81  23.48  42.93  124.02  17.73
2014-

In [7]:
# [View] Total Number of Training Portfolio in ml_training_portfolios
number_of_training_portfolios = len(ml_training_portfolios)
print("# of Portfolios:", number_of_training_portfolios)

# of Portfolios: 25


In [8]:
# [View] ML Training Portfolio Stats -- [Output] Dictionary
ml_training_portfolios[:5]

[{'CTp': 5,
  'RTNp': 0.0723,
  'VARp': 0.1829,
  'SHRp': -0.1405,
  'BETAp': 'N/A',
  'SP500': 0.3857,
  'PvSP': True},
 {'CTp': 6,
  'RTNp': -0.0012,
  'VARp': 0.1539,
  'SHRp': -0.6446,
  'BETAp': 'N/A',
  'SP500': 0.3857,
  'PvSP': True},
 {'CTp': 8,
  'RTNp': 0.0814,
  'VARp': 0.156,
  'SHRp': -0.1064,
  'BETAp': 'N/A',
  'SP500': 0.3857,
  'PvSP': True},
 {'CTp': 6,
  'RTNp': 0.0562,
  'VARp': 0.1722,
  'SHRp': -0.2427,
  'BETAp': 'N/A',
  'SP500': 0.3857,
  'PvSP': True},
 {'CTp': 9,
  'RTNp': 0.0729,
  'VARp': 0.1682,
  'SHRp': -0.1492,
  'BETAp': 'N/A',
  'SP500': 0.3857,
  'PvSP': True}]

In [9]:
# Convert & View ML Training Portfolios as DataFrame
df_headers = "CTp,SHRp,BETAp,VARp,RTNp,SP500,PvSP".split(",")
df_portfolio = pd.DataFrame(ml_training_portfolios)[df_headers]
df_portfolio.head()

Unnamed: 0,CTp,SHRp,BETAp,VARp,RTNp,SP500,PvSP
0,5,-0.1405,,0.1829,0.0723,0.3857,True
1,6,-0.6446,,0.1539,-0.0012,0.3857,True
2,8,-0.1064,,0.156,0.0814,0.3857,True
3,6,-0.2427,,0.1722,0.0562,0.3857,True
4,9,-0.1492,,0.1682,0.0729,0.3857,True


In [None]:
df_portfolio["PvSP"].value_counts()

In [None]:
# Save ML Training Data to Excel (.xlsx) Format
df_portfolio.to_excel("Portfolio_ML_Training_Data.xlsx", index=False)

In [None]:
errors_df = df_portfolio.loc[df_portfolio["SP500"] < .3857]

In [None]:
errors_df.count()

In [None]:
errors_df.head()