<a href="https://colab.research.google.com/github/AWHochman/cds_sp23/blob/main/Optimal_Portfolio_with_Backtest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Imports
from pandas_datareader import data as web
!pip install yfinance
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime 
from matplotlib import pyplot as plt
plt.style.use('fivethirtyeight')
from statistics import mean 

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yfinance
  Downloading yfinance-0.2.12-py2.py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 KB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting cryptography>=3.3.2
  Downloading cryptography-39.0.2-cp36-abi3-manylinux_2_28_x86_64.whl (4.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
Collecting frozendict>=2.3.4
  Downloading frozendict-2.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (112 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m112.8/112.8 KB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Collecting appdirs>=1.4.4
  Downloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: appdirs, frozendict, cryptography, yfinance
Successfully installed appdirs-1.4.4 cryptography-39.0.2 frozendict

In [None]:
pip install PyPortfolioOpt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting PyPortfolioOpt
  Downloading pyportfolioopt-1.5.4-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.9/61.9 KB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: PyPortfolioOpt
Successfully installed PyPortfolioOpt-1.5.4


In [None]:
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models 
from pypfopt import expected_returns
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices

In [None]:
# Get today's date
def today_date():
  today = datetime.today().strftime('%Y-%m-%d')
  return today

In [None]:
# function to pull stock data from yfinance
# assets is a list of stock tickers (as strings), start is the starting date, and end is the ending date
def get_data(start, end, assets):
  df=pd.DataFrame()
  for stock in assets:
    df[stock]= yf.download(stock, start=start, end=end)['Adj Close']
  return df


In [None]:
# Portfolio Optimization
# function that finds the optimal weights (and returns them in an array) for each stock. df is a data frame of daily stock prices
def get_weights(start,end,assets):

  dfSubset = get_data(start, end, assets)

  # Calculate the expected returns and the annualized sample covariance matrix of asset returns
  mean = expected_returns.mean_historical_return(dfSubset)
  S = risk_models.sample_cov(dfSubset)

  # Optimize for max sharpe ratio
  # Sharpe Ratio: A way to describe how much excess return you recieve for the extra amount of volatility you add. 

  ef = EfficientFrontier(mean, S)
  weights = ef.max_sharpe()
  cleaned_weights = ef.clean_weights()

  cleaned_weights_array = [] 
  for key, value in cleaned_weights.items():
   cleaned_weights_array.append(value)
  cleaned_weights_array

  return cleaned_weights_array




In [None]:
# returns an array with how many shares of each stock should be bought
# df is a data frame of daily stock prices, weights is an array with weights of each stock in df, and money is how much money you can invest
def find_discrete_allocation(df, weights, money,date):
  prices = get_prices_at_valid_date(df,date)
  spend = np.dot(money,weights) #how much you can spend on each stock 
  num = [] # how much of each stock to buy
  for i in range(len(weights)):
    num.append(np.floor(spend[i]/prices[i]))
  return num


In [None]:
# finds left over money after investing in the portfolio
# num_stocks is the number of shares of each stock bought
# df must be updated (have today's prices)
def find_money_left_over(allocation, df, cash,date):
   prices = get_prices_at_valid_date(df,date)
   spent = np.dot(allocation, prices)
   sum = spent.sum()
   return cash - sum

In [None]:
#meant to be run after a cycle when rebalancing
# gets total amount from selling portfolio plus our left over money 
def get_new_total(allocation, df, extra_cash,date):
  prices = get_prices_at_valid_date(df,date)
  sales = spent = np.dot(allocation, prices)
  sum = sales.sum()
  return sum + extra_cash


In [None]:
# Compares the initial amount (start) and the final amount (end). 
# If it is a 20% increase: 1.2
# If it is a 10% decrease: 0.9
def compare(start,end):
  profit = end - start
  return 1 + (profit/start)

In [None]:
# Return the prices of all the assets on a given date. 
# date must be a valid trading date or else the function will return False. 
def get_prices_at_valid_date(df,date):
  #print(type(df.ffill().iloc[1].name))
  for ind in range(df.shape[0]):
    if (str(df.ffill().iloc[ind].name) == date + ' 00:00:00'):
      return df.ffill().iloc[ind]

  return False


In [None]:
# Return a valid trading date: date + 'date_range' number of trading days.  
# date_range can be negative
# If the new date is outside of the range of the dataframe, the function returns "OutOfRange"
# If the input date is not a valid trading date, the function returns False.
def return_date_after_range(df,date,date_range):
  for ind in range(df.shape[0]):
    if (str(df.ffill().iloc[ind].name) == date + ' 00:00:00'):
      try:
        aString = str(df.ffill().iloc[ind + date_range].name)
      except:
        return 'OutOfRange'
      split = aString.split()
      return split[0]

  return False

In [None]:
# Back-tests one trade and compares the input and output. 
# startDate is the date where you start collecting data
# dataRange is the range of trading days where you collect data before allocating weights and buy stocks
# shiftRange is the frequency of resetting the portfolio. (in number of trading days)
# *The stocks are sold on the date: startDate + dataRange + shiftRange
def backTest(df,assets,startMoney,startDate,dataRange,shiftRange):

  
  endDate = return_date_after_range(df,startDate,dataRange)

  weights = get_weights(startDate, endDate, assets)

  allocation = find_discrete_allocation(df, weights, startMoney,endDate)

  cash = find_money_left_over(allocation, df, startMoney,endDate)

  total = get_new_total(allocation, df, cash,return_date_after_range(df,endDate,shiftRange) )

  return compare(startMoney,total)

In [None]:
# Loops the backTest from 'startDate' to today (or the closest trading day consistant with the rebalancing frequency)
# The portfolio is rebalanced every 'shiftRange' trading days. 
# Return: Compares the initial value '1' and the final value
# * If the output is '2', the value doubled in total. 
def backTestLoop(df,assets,startMoney,startDate,dataRange,shiftRange):

  returnValue = backTest(df,assets,startMoney,startDate,dataRange,shiftRange)

  while return_date_after_range(df,startDate, dataRange + shiftRange + shiftRange) != 'OutOfRange':
    sellDate = return_date_after_range(df,startDate,dataRange + shiftRange)
    print(sellDate)
    
    startDate = return_date_after_range(df,startDate,shiftRange)

    returnValue = returnValue * backTest(df,assets,startMoney,startDate,dataRange, shiftRange)

    print(returnValue)
    
  return str(returnValue) + ' => Last Sold On : ' + str(sellDate)


In [None]:
# Back-tests by keeping the weights equal and constant. 
# This is used to compare results with the optimization program. 
def equal_weights_test(df,assets,startDate,endDate):
  change_list = []

  for x in range(len(assets)):
    change_list.append(get_prices_at_valid_date(df,endDate)[x]/get_prices_at_valid_date(df,startDate)[x])

  return mean(change_list)




In [None]:
assets = ['META', 'AMZN', 'AAPL','NFLX','GOOG']
startDate = '2013-01-02' # Date to start collecting data. Must be a valid trading date or there will be an error. 

startMoney = 100000

dataRange = 1000 # Range of data that will be used to determine the weights (in number of trading days)
shiftRange = 20 # Portfolio rebalancing frequency (in number of trading days)

df = get_data(startDate, today_date(), assets) # Create a large data frame of all stock info from start date ~ today. 



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [None]:
solution = backTestLoop(df,assets,startMoney,startDate,dataRange,shiftRange)
print(solution)


equal_weights_solution = equal_weights_test(df,assets,'2017-01-20','2023-02-03')
print('With Equal Weights: ' + str(equal_weights_solution))


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
2017-01-20
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
1.1387428296183373
2017-02-17
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 com