In [24]:
import pandas as pd 
import numpy as np
import yfinance as yf
import ta

import pyfolio as pf
import pypfopt # pip install PyPortfolioOpt



import wrds
import copy
import datetime as dt
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

In [3]:
tickers = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'TSLA', 'BRK-B', 'NVDA', 'FB', 'UNH', 'V', 'JNJ', 'WMT', 'JPM', 'PG', 'MA']
start = '2012-07-01'
end = dt.datetime.today()

# Data Import

In [4]:
close_original = yf.download(tickers, start=start, end=end)['Adj Close']
volume_original = yf.download(tickers, start=start, end=end)['Volume']
high_original = yf.download(tickers, start=start, end=end)['High']
low_original = yf.download(tickers, start=start, end=end)['Low']

[*********************100%***********************]  15 of 15 completed
[*********************100%***********************]  15 of 15 completed
[*********************100%***********************]  15 of 15 completed
[*********************100%***********************]  15 of 15 completed


# Data Smoothing

In [6]:
close = close_original.ewm(alpha=0.2).mean()
volume = volume_original.ewm(alpha=0.2).mean()

# Direction forecasting
## Features
### On balance Volume

In [7]:
obv = pd.DataFrame()
for ticker in tickers:
    obv[ticker] = np.where(close[ticker] > close[ticker].shift(1),1,0) * volume[ticker]
    obv[ticker] = np.where(close[ticker] < close[ticker].shift(1),-1,obv[ticker]) * volume[ticker]
    obv[ticker] = obv[ticker].cumsum()

### Stochastic Oscillator %K

In [8]:
so = pd.DataFrame()
for ticker in tickers:
    so[ticker] = ta.momentum.StochasticOscillator(close = close_original[ticker], high = high_original[ticker], low = low_original[ticker]).stoch()

### Moving Average Convergence Divergence

In [9]:
macd = pd.DataFrame()
for ticker in tickers:
    macd[ticker] = ta.trend.MACD(close = close_original[ticker]).macd()
    macd[ticker+"_signal"] = ta.trend.MACD(close = close_original[ticker]).macd_signal() 

### Organize features into dictionary

In [10]:
features = {}
for ticker in tickers:
    features[ticker] = pd.DataFrame(obv[ticker],so[ticker],macd[ticker],macd[ticker+"_signal"])

In [11]:
# Calculate log return
log_ret = np.log(close_original/close_original.shift(1))

# Calculate return
ret = close_original/close_original.shift(1) - 1

# Compare strategies
## Equally weighted portfolio

In [12]:
weight_equal = [1/len(tickers) for i in range(0,len(tickers))]
strat_equal = (ret * weight_equal).sum(axis=1)
# pf.create_full_tear_sheet(strat_equal)

## Classic mean variance - no short

In [13]:
# Read in price data
df = yf.download(tickers=tickers, start=start, end=end)['Adj Close']
df.head()

[*********************100%***********************]  15 of 15 completed


Unnamed: 0_level_0,AAPL,AMZN,BRK-B,FB,GOOG,JNJ,JPM,MA,MSFT,NVDA,PG,TSLA,UNH,V,WMT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2012-07-02,18.119987,229.320007,83.650002,30.77,289.151184,51.882904,27.545603,41.542068,25.009995,3.088514,45.750225,6.08,48.115578,29.577492,55.298279
2012-07-03,18.330696,229.529999,83.779999,31.200001,292.817444,51.913441,27.469049,41.718674,25.173679,3.168884,45.877327,6.132,46.935345,29.512087,56.4146
2012-07-05,18.65271,227.059998,83.220001,31.469999,296.847351,51.715057,26.320684,41.962463,25.124577,3.136735,45.86985,6.246,47.730728,29.668587,56.677738
2012-07-06,18.528547,225.050003,82.540001,31.73,291.895905,51.608257,25.953196,41.733776,24.707205,3.077032,45.817505,6.198,47.739273,29.264473,56.901009
2012-07-09,18.773514,225.050003,83.389999,32.169998,291.910858,51.715057,25.999132,40.754768,24.551701,3.035699,46.019379,6.298,47.970165,28.883717,57.219944


In [14]:
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage
from pypfopt.efficient_frontier import EfficientFrontier

mu = mean_historical_return(df)
S = CovarianceShrinkage(df).ledoit_wolf()

ef = EfficientFrontier(mu, S)
weights = ef.max_sharpe()

ef.portfolio_performance(verbose=True)

Expected annual return: 42.2%
Annual volatility: 24.8%
Sharpe Ratio: 1.62


(0.4218156066182406, 0.24833392918143843, 1.6180455403041802)

## Classic mean variance - short allowed

In [15]:
ef_short = EfficientFrontier(mu, S, weight_bounds=(-1,1))
weights_short = ef_short.max_sharpe()
ef_short.portfolio_performance(verbose=True)

Expected annual return: 46.4%
Annual volatility: 26.9%
Sharpe Ratio: 1.65


(0.4638490685432127, 0.26867866615065256, 1.6519698973581296)