In [1]:
import pandas_datareader.data as web
import datetime
import pandas as pd
from functools import reduce
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt.expected_returns import mean_historical_return
from pypfopt.risk_models import CovarianceShrinkage

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Start and ending times for dataset
startTrain = datetime.datetime(2009, 1, 1)
endTrain = datetime.datetime(2017, 1, 1)

# testing dataset
startTest = datetime.datetime(2017, 1, 2)
endTest = datetime.datetime(2019, 1, 1)

# List of stocks that are in the portfolio
# TODO: Let the user input a stock and create this array based on the inputs
# !: Removed gs-pj and fb as their data is lacking earlier years
stocks = ["AAPL", "CAH", "CMCSA", "DISH", "GOOG", "HSY", "JNJ", "JPM", "K", "MA", "NFLX", "UL", "WBA"]


In [3]:
def get_stock(ticker, start=startTrain, end=endTrain):
    data = web.DataReader(f"{ticker}","yahoo",start,end)
    # print(data)
    data[f'{ticker}'] = data["Close"]#(data["Close"] - data["Open"])/data["Open"]
    data = data[[f'{ticker}']] 
    # print(data.head())
    return data 

pfizer = get_stock("AAPL")
jnj = get_stock("NFLX")

In [4]:
def combine_stocks(tickers, testing=False):
    data_frames = []
    for i in tickers:
        if (testing):
            data_frames.append(get_stock(i, startTest, endTest))
        else:
            data_frames.append(get_stock(i))
        
    df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['Date'],
                                            how='outer'), data_frames)

    # print(df_merged.head())
    return df_merged

In [5]:
import yfinance as yf
def get_historical_data(ticker, start_date, end_date):
    # Pull Historical Data
    data = yf.download(ticker, start=start_date, end=end_date)
    # Calculate Daily Returns
    data['Daily Return'] = data['Adj Close'].pct_change()   
    return data.dropna()

In [6]:
def std_dev(data):
    # Get number of observations
    n = len(data)
    # Calculate mean
    mean = sum(data) / n
    # Calculate deviations from the mean
    deviations = sum([(x - mean)**2 for x in data])
    # Calculate Variance & Standard Deviation
    variance = deviations / (n - 1)
    s = variance**(1/2)
    return s

In [7]:
# Sharpe Ratio From Scratch
def sharpe_ratio(data, risk_free_rate=0.02): # risk_free_rate refers to 10 year treasury bond rate(2% is used)
    # Calculate Average Daily Return
    mean_daily_return = sum(data) / len(data)
    # Calculate Standard Deviation
    s = std_dev(data)
    # Calculate Daily Sharpe Ratio
    daily_sharpe_ratio = (mean_daily_return - risk_free_rate/252) / s #TODO: check if risk_free_rate should be divided by 252 trading days
    # Annualize Daily Sharpe Ratio
    sharpe_ratio = 252**(1/2) * daily_sharpe_ratio
    
    return sharpe_ratio

In [8]:
def get_stock_return(ticker, start=startTrain, end=endTrain):
    data = web.DataReader(f"{ticker}","yahoo",start,end)
    print(data)
    data[f'{ticker}'] = data["Close"]#(data["Close"] - data["Open"])/data["Open"]
    data = data[[f'{ticker}']] 
    print(data.head())
    return data 

In [9]:
aapl = get_historical_data("AAPL", start_date="2009-01-01", end_date="2019-01-01")
aaplSharpe = sharpe_ratio(aapl['Daily Return'])
aapl['AAPL'] = aapl['Daily Return']
aapl.drop(columns=['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Daily Return'], inplace=True)
# aapl

[*********************100%***********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aapl['AAPL'] = aapl['Daily Return']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [10]:
portfolio_daily_changes = aapl
# data.drop(columns=['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Daily Return'], inplace=True)
for i in stocks:
    data = get_historical_data(i, start_date="2009-01-01", end_date="2019-01-01") # train + test time frame
    portfolio_daily_changes[i] = data['Daily Return']
    print(i)

portfolio_daily_changes

[*********************100%***********************]  1 of 1 completed
AAPL
[*********************100%***********************]  1 of 1 completed
CAH
[*********************100%***********************]  1 of 1 completed
CMCSA
[*********************100%***********************]  1 of 1 completed
DISH
[*********************100%***********************]  1 of 1 completed
GOOG
[*********************100%***********************]  1 of 1 completed
HSY
[*********************100%***********************]  1 of 1 completed
JNJ
[*********************100%***********************]  1 of 1 completed
JPM
[*********************100%***********************]  1 of 1 completed
K
[*********************100%***********************]  1 of 1 completed
MA
[*********************100%***********************]  1 of 1 completed
NFLX
[*********************100%***********************]  1 of 1 completed
UL
[*********************100%***********************]  1 of 1 completed
WBA


Unnamed: 0_level_0,AAPL,CAH,CMCSA,DISH,GOOG,HSY,JNJ,JPM,K,MA,NFLX,UL,WBA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2009-01-02,0.063269,0.021758,0.065759,0.058611,0.044434,0.0308,0.01370571,0.00642,0.027366,0.047926,-0.000669,0.029974,0.03567
2009-01-05,0.042204,0.01448,-0.050063,-0.021295,0.020945,-0.008936,-0.009893125,-0.066985,-0.005771,0.023234,0.0693,-0.009279,0.050489
2009-01-06,-0.016494,-0.002799,0.011744,0.042646,0.01832,0.013525,-0.00599513,0.021538,-0.003126,0.057158,0.031309,0.017028,-0.004843
2009-01-07,-0.021608,0.0087,-0.034243,0.003339,-0.036071,-0.015291,-0.009381846,-0.059907,-0.004031,-0.060542,-0.006982,-0.018418,0.01198
2009-01-08,0.018569,0.021425,0.016226,0.12396,0.009875,-0.007905,-0.001860212,-0.030972,-0.002249,0.004669,0.013452,0.026013,-0.007029
2009-01-09,-0.022869,0.004358,-0.034299,-0.034049,-0.03112,-0.02049,0.0005082412,-0.045922,-0.018706,-0.017935,-0.026848,-0.018288,-0.025708
2009-01-12,-0.021197,0.000542,-0.038579,-0.027586,-0.007554,0.013074,-0.01202325,-0.040816,0.003675,-0.013329,-0.032548,-0.0326,-0.00956
2009-01-13,-0.010715,0.02765,-0.023567,-0.009456,0.005213,0.0,0.008570126,0.057808,0.004577,0.016212,-0.008651,0.014879,0.006564
2009-01-14,-0.027135,-0.014508,-0.03392,-0.019093,-0.042473,-0.021795,-0.01512571,-0.016698,-0.028473,-0.034167,-0.045249,-0.026735,0.002685
2009-01-15,-0.022852,0.014721,0.013504,0.010543,-0.006579,0.009968,-0.005694561,-0.060594,0.002344,-0.103235,0.070413,0.025698,0.029074


In [11]:
# str(portfolio_daily_changes.index[0].date())
# for i in portfolio_daily_changes.columns:
#     print(i)

portfolio = combine_stocks(stocks) #?: By default it's the training time frame which is from start 2009 to end 2016

# training dataset is used to create the portfolio model
mu = mean_historical_return(portfolio)
S = CovarianceShrinkage(portfolio).ledoit_wolf()
# portfolio

In [12]:
# PORTFOLIO BASED ON TRAIN DATA
ef = EfficientFrontier(mu, S)
weights = ef.max_sharpe()
cleaned_weights_best_sharpe = ef.clean_weights()
# print(dict(cleaned_weights_best_sharpe))
print("\nMax Sharpe (Best reward/risk ratio)")
ef.portfolio_performance(verbose=True)

print("\nPortfolio percentages for BEST SHARPE")
# adds up to roughly 100%. Might be slightly off due to the decimals
for key in cleaned_weights_best_sharpe:
    print(key, str(round(cleaned_weights_best_sharpe[key] * 100, 2)) + '%')


Max Sharpe (Best reward/risk ratio)
Expected annual return: 31.2%
Annual volatility: 20.7%
Sharpe Ratio: 1.41

Portfolio percentages for BEST SHARPE
AAPL 41.71%
CAH 0.0%
CMCSA 0.7%
DISH 1.73%
GOOG 0.76%
HSY 19.68%
JNJ 0.0%
JPM 0.0%
K 0.0%
MA 17.02%
NFLX 17.27%
UL 0.0%
WBA 1.13%


In [13]:
# cleaned_weights_best_sharpe: orderedDict of ratios of each stock
# portfolio_dailychanges.columns: AAPL, CAH, etc
# print(portfolio_daily_changes)
# portfolio_daily_changes.drop(columns=['netDailyChange'], inplace=True)
# for i in portfolio_daily_changes.columns:
#     print(i, cleaned_weights_best_sharpe[i])

In [14]:
portfolio_daily_changes['netDailyChange'] = 0

# TODO: WHY is the for loop not working??
# for i in portfolio_daily_changes.columns:
#     portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe[i] * portfolio_daily_changes[i]
    
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['AAPL'] * portfolio_daily_changes['AAPL']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['CAH'] * portfolio_daily_changes['CAH']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['CMCSA'] * portfolio_daily_changes['CMCSA']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['DISH'] * portfolio_daily_changes['DISH']
# portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['FB'] * portfolio_daily_changes['FB']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['GOOG'] * portfolio_daily_changes['GOOG']
# portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['GS-PJ'] * portfolio_daily_changes['GS-PJ']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['HSY'] * portfolio_daily_changes['HSY']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['JNJ'] * portfolio_daily_changes['JNJ']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['JPM'] * portfolio_daily_changes['JPM']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['K'] * portfolio_daily_changes['K']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['MA'] * portfolio_daily_changes['MA']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['NFLX'] * portfolio_daily_changes['NFLX']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['UL'] * portfolio_daily_changes['UL']
portfolio_daily_changes['netDailyChange'] += cleaned_weights_best_sharpe['WBA'] * portfolio_daily_changes['WBA']
# portfolio_daily_changes
# portfolio_daily_changes.drop(columns=['netDailyChange'], inplace=True)

In [15]:
def total_return(data):
    initial = 1
    for index, row in data.items():
        # ?: ignore the first row because the portfolio starts at $100,000 on that day
        if index == portfolio_daily_changes.index[0]:
            continue

        row += 1
        initial *= row
    return initial

In [16]:
# TOTAL RETURNS AND SHARPE RATIO FOR TRAINING TIMEFRAME(2009-2016)
sharpe = sharpe_ratio(portfolio_daily_changes.loc['2009-01-01':'2017-01-01']['netDailyChange'])
totalReturns = total_return(portfolio_daily_changes.loc['2009-01-01':'2017-01-01']['netDailyChange'])
totalReturns, sharpe
# portfolio_daily_changes.loc['2009-01-01':'2017-01-01']['netDailyChange']

(11.622745175621118, 1.5092158446913992)

In [17]:
# TOTAL RETURNS AND SHARPE RATIO FOR TESTING TIMEFRAME(2017-2018)
sharpe = sharpe_ratio(portfolio_daily_changes.loc['2017-01-01':'2019-01-01']['netDailyChange'])
totalReturns = total_return(portfolio_daily_changes.loc['2017-01-01':'2019-01-01']['netDailyChange'])
totalReturns, sharpe

(1.5140307395534125, 1.1038960049632405)

In [18]:
# TOTAL RETURNS AND SHARPE RATIO FOR ENTIRE TIMEFRAME(2009-2018)
sharpe = sharpe_ratio(portfolio_daily_changes['netDailyChange'])
totalReturns = total_return(portfolio_daily_changes['netDailyChange'])
totalReturns, sharpe

(17.59719347388649, 1.4340750045445065)

In [19]:
# min vol: (1.2510565075695712, 0.4156715453255877)
# best sharpe: (2.370754712244849, 1.1054096304284171)
mutated_data = portfolio_daily_changes.drop(columns=["AAPL", "CAH", "CMCSA", "DISH", "GOOG", "HSY", "JNJ", "JPM", "K", "MA", "NFLX", "UL", "WBA"])

# !: The $100,000 is put into the portfolio after day 0 close so it is not affected by price changes on day 0
mutated_data.loc[mutated_data.index[0], 'Portfolio Value'] = 100000
for i in range(1, len(mutated_data)):
    mutated_data.loc[mutated_data.index[i], 'Portfolio Value'] = mutated_data.loc[mutated_data.index[i-1], 'Portfolio Value'] * mutated_data.loc[mutated_data.index[i], 'netDailyChange'] + mutated_data.loc[mutated_data.index[i-1], 'Portfolio Value']

mutated_data.tail()

Unnamed: 0_level_0,netDailyChange,Portfolio Value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-24,-0.026028,1631418.0
2018-12-26,0.061571,1731866.0
2018-12-27,0.001434,1734349.0
2018-12-28,-0.001122,1732404.0
2018-12-31,0.015768,1759719.0


In [20]:
mutated_data.to_csv('portfolioBestSharpe.csv', index=True)

### Sharpe and Total Returns for max sharpe portfolio
- Training Timeframe: (11.62274549145041, 1.5092151864491001)
- Testing Timeframe: (1.5140309299877148, 1.1038966069959586)
- Total Timeframe: (17.597196165431157, 1.4340746076171493)

### Sharpe and Total Returns for min volatility portfolio
- Training Timeframe: (2.9372727423958414, 0.9941734735774026)
- Testing Timeframe: (1.0944363130248105, 0.25940771449832484)
- Total Timeframe: (3.2146579505359787, 0.8440027044260568)