In [3]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns, base_optimizer
import random
from datetime import datetime

## Group Assignment
### Team Number: 02
### Team Member Names: Jason, Patrick, Gateek
### Team Strategy Chosen: Market Beat

Disclose any use of AI for this assignment below (detail where and how you used it).  Please see the course outline for acceptable uses of AI.


### STEP 1: FILTER STOCKS FOR VALID TICKERS BASED ON SET REQUIREMENTS

In [None]:
# valid_stocks(tickers_file) reads in a given tickers file and produces a list of tickers
#                            that are valid according to restrictions such as currency and 
#                            average monthly volume.
# tickers_file: csv file with tickers 
def valid_stocks(tickers_file):
    # Read CSV and get tickers
    tickers_df = pd.read_csv(tickers_file)

    if tickers_df.empty:
        return

    tickers_df.columns = (['Tickers'])
    tickers_list = tickers_df['Tickers'].tolist()

    # Start and end dates
    start = '2023-10-01'
    end = '2024-09-30'

    valid_tickers = []

    for ticker in tickers_list:
        # Loads in ticker info from yfinance
        stock = yf.Ticker(ticker)
        info = stock.fast_info 

        # filter ticker by currency
        try:
            currency = info['currency']
        except:
            continue

        if currency != 'USD' and currency != 'CAD':
            continue

        #filter ticker by average monthly volume
        try:
            hist = stock.history(start=start, end=end, interval='1d')
            hist.index = pd.to_datetime(hist.index)
        except:
            continue
        monthly_volume = pd.DataFrame()
        monthly_volume['volume'] = hist['Volume'].resample('ME').sum()
        monthly_volume['count'] = hist['Volume'].resample('ME').count()
        monthly_volume['avg monthly volume'] = monthly_volume['volume'] / monthly_volume['count']
        invalid_trading_days = monthly_volume[monthly_volume['count'] < 18]
        invalid_monthly_vol = monthly_volume[monthly_volume['avg monthly volume'] < 100000]

        if len(invalid_monthly_vol) > 0 or len(invalid_trading_days) > 0:
            continue

        valid_tickers.append(ticker)

    return valid_tickers

valid_tickers = valid_stocks('Tickers_Example.csv')

$AGN: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
Failed to get ticker 'CELG' reason: Expecting value: line 1 column 1 (char 0)
$CELG: possibly delisted; no price data found  (period=5d)
$CL: possibly delisted; no price data found  (1d 2023-10-01 -> 2024-09-30)
$KO: possibly delisted; no price data found  (1d 2023-10-01 -> 2024-09-30)
$LLY: possibly delisted; no price data found  (period=5d)
$MO: possibly delisted; no price data found  (period=5d)
Failed to get ticker 'MON' reason: Expecting value: line 1 column 1 (char 0)
$MON: possibly delisted; no price data found  (period=5d)
$PFE: possibly delisted; no price data found  (1d 2023-10-01 -> 2024-09-30)
$PM: possibly delisted; no price data found  (1d 2023-10-01 -> 2024-09-30)
$RTN: possibly delisted; no price data found  (period=5d)


### STEP 2: GET CLOSE PRICES FOR ALL VALID STOCKS

In [1]:
#get_close_prices(start, end, tickers, cutoff) retrieves all close prices for stocks in tickers from a start date
#                                              to an end date. It also takes in a cutoff that excludes all stocks
#                                              that do not have close price data before this cutoff date. Function 
#                                              will return all close prices for the valid stocks in CAD starting from the 
#                                              date at which the youngest valid stock began tracking close prices.
# Example: get_close_prices('2020-01-01', '2024-01-01', ['AAPL', 'NVDA'], '2022-01-01')
# Restrictions:
#       * start < cutoff < end
def get_close_prices(start, end, tickers, cutoff):

    multi_data = pd.DataFrame()
    df = []
    appended_tickers = []

    # loop through tickers 
    for ticker in tickers:
        # get all data and put into a series
        data = yf.download(ticker, start=start, end=end, interval='1d')
        close = data['Close']
        close = close.rename(ticker)

        # if the first close price is less than cutoff
        if close.index.min() < pd.Timestamp(cutoff):
            # add stock close prices to df
            df.append(close)
            appended_tickers.append(ticker)

    # create df with all the data
    multi_data = pd.concat(df, axis=1)
    #drop all values so that there are valid data points for each date in the index
    multi_data.dropna(subset=appended_tickers, inplace=True)

    # Get CAD->USD exchange rate
    cadusd = yf.download('CAD=x', start=start, end=end, interval='1d')

    # convert everything to CAD
    for ticker in appended_tickers:
        stock = yf.Ticker(ticker)
        info = stock.fast_info

        currency = info['currency']
        if currency == 'USD':
            multi_data[ticker] = multi_data[ticker] * cadusd['Close']
    
    return multi_data

start = '2015-01-01'
end = '2024-11-22'
cutoff = '2019-01-01'
close_prices = get_close_prices(start, end, valid_tickers, cutoff)


NameError: name 'valid_tickers' is not defined

### STEP 3: REMOVES POOR PERFORMING STOCKS

In [None]:
def ticker_prices(ticker_list, start, end, interval):
    """
        This function, when given a list of tickers, a start date, end date, and interval
        will return a dataframe of the prices of the tickers from the start date to the end
        date at the set interval

        :param ticker_list: int
        :param start: datetime
        :param end: datetime
        :return: dataframe
        """

    prices = pd.DataFrame()

    hist_ticker = yf.Ticker(ticker_list[0])
    prices[ticker_list[0]] = hist_ticker.history(start=start, end=end, interval=interval).Close

    ticker_list.pop(0)

    for i in ticker_list:
        hist_ticker = yf.Ticker(i)
        prices[i] = hist_ticker.history(start=start, end=end, interval=interval).Close

    return prices

#feed pct_change() data
def sort_by_sharpe(price_pct, min_sharpe, min_return, max_std):
    """
        This function, when given a dataframe of
        price percent change, a dataframe of prices, a minimum sharpe ratio, a minimum return
        and maximum standard deviation will produce a dataframe with the returns, standard deviation
        sharpe ratio and prices of all stocks in the given dataframe that meets the set criteria

        :param price_pct: dataframe
        :param min_sharpe: float
        :param min_return: float
        :param: max_std: float
        :return: dataframe
        """

    sharpe_df = pd.DataFrame(columns=['Returns', 'Std', 'Sharpe'])
    returns = 0
    std = 0
    ticker = ""

    stock_info = {}#fixed length list of Ticker, Returns, Std, Sharpe

    tick_list = price_pct.columns

    for i in range(len(tick_list)):
        ticker = tick_list[i]

        #work out equations - pct_change() on all,
        returns = price_pct[ticker].mean()

        #work out equations
        std = price_pct[ticker].std()

        #multiply sharpe_ratio by sqrt(15)
        sharpe = returns/std * (50 ** 0.5)

        #print(ticker, sharpe, returns, std)

        if sharpe > min_sharpe and returns > min_return and std < max_std:
            stock_info = {
                'Returns': returns,
                'Std': std,
                'Sharpe': sharpe
            }

            # stock_info = pd.DataFrame(stock_info)

            sharpe_df.loc[ticker] = stock_info
        elif tick_list.size - sharpe_df.index.size < 12:
            stock_info = {
                'Returns': returns,
                'Std': std,
                'Sharpe': sharpe
            }

            # stock_info = pd.DataFrame(stock_info)

            sharpe_df.loc[ticker] = stock_info

    sharpe_df = sharpe_df.sort_values('Sharpe', ascending = False)

    return sharpe_df

def keep_tickers(dataframe, list):
    """
    This function, given a dataframe and list of tickers, will keep
    all items in the dataframe with a ticker in the list. Tickers must be in
    the Dataframe

    :param dataframe: pd.DataFrame
    :param list: list[Str]
    :return: pd.DataFrame
    """

    newframe = pd.DataFrame()

    for i in list:
        newframe[i] = dataframe[i]

    return newframe

def stock_df_to_ticker(dataframe):
    """
        This function, when given a dataframe of stocks, will return a list of ticker strings

        :param dataframe: dataframe
        :return: list[str]
        """

    ticker_list = []

    for i in dataframe.index:
        ticker_list.append(i)

    return ticker_list

def filtering(list_len, stock_correlation_tiers):
    """
    This function, when given how long the list will be and categorization of stocks,
    will return a list of stocks that we want to craft the portfolio from.
    :param list_len: int
    :param stock_correlation_tiers: list[list[str]]
    :return: list[str]
    """
    ticker_list = []
    '''
    while 0 < len(stock_correlation_tiers) and len(ticker_list) < list_len:
        sub_list = stock_correlation_tiers[0]
        while 0 < len(sub_list) and len(ticker_list) < list_len:
            print(sub_list[0])
            ticker_list.append(sub_list[0])
            sub_list.pop(0)

        stock_correlation_tiers.pop(0)
    '''

    while len(ticker_list) < list_len:
        for i in range(len(stock_correlation_tiers)):
            sub_list = stock_correlation_tiers[i]

            if len(sub_list) > 0:
                ticker_list.append(sub_list[0])
                sub_list.pop(0)
                stock_correlation_tiers[i] = sub_list

    return ticker_list

def arrange_by_sharpe(prices:pd.DataFrame, sharpe:pd.DataFrame):
    """
    This function, when given how long the list will be and categorization of stocks,
    will return a list of stocks that we want to craft the portfolio from.
    :param prices: pd.DataFrame
    :param sharpe: pd.DataFrame
    :return: pd.DataFrame
    """
    new_prices = pd.DataFrame()

    for i in sharpe.index:
        #print(i)
        #print(prices[i])
        new_prices[i] = prices[i]

    #print(new_prices)

    return new_prices

def correlation_filter(prices: pd.DataFrame, max_corr: float):
    """
    This function, when given a dataframe of prices and

    :param prices: pd.DataFrame
    :param max_corr: float
    :return: list[Str]
    """
    correlations = prices.corr()

    tickers = []

    corr_list = correlations.index

    for i in range(len(corr_list)):
        tick = corr_list[i]

        if correlations.loc[tick].mean() <= max_corr:
            tickers.append(tick)
        elif corr_list.size - len(tickers) < 12:
            tickers.append(tick)

    return tickers

def categorize(df:pd.DataFrame, pivot:str, categories:int):
    """
    Categorizes stocks into different categories depending on how correlated they are
    :param df: pd.DataFrame, must have pct data
    :param pivot: string that is in df
    :param categories: int
    :return: list[list[str]]
    """
    correlations = df.corr()
    lin_space = np.linspace(0, 0.9999999, categories + 1)[1:]
    # The 0.999999 prevents adding the stock itself into the list
    stock_categories = []

    for i in range(categories):
        stock_categories.append([])

    for stock in correlations.index:

        correlation = correlations.at[pivot, stock]
        inserted = False
        i = 0
        while i < categories and not inserted:
            if correlation <= lin_space[i]:
                stock_categories[i].append(stock)
                inserted = True
            i = i + 1
    return stock_categories

In [None]:
stock_pct_change = close_prices.pct_change()

stock_pct_change = keep_tickers(stock_pct_change, correlation_filter(stock_pct_change, 0.75))

stock_sharpe = sort_by_sharpe(stock_pct_change, 0.1, 0, 1)

#print(stock_sharpe)

best_stock = stock_sharpe.index[0]

stock_prices = arrange_by_sharpe(close_prices, stock_sharpe)

stock_correlation_tiers = categorize(stock_prices, best_stock, 10)

#print(stock_sharpe)
#print(stock_correlation_tiers)

ticker_lst = [best_stock]

ticker_lst += filtering(24, stock_correlation_tiers)

stock_close_prices = keep_tickers(close_prices, ticker_lst)
stock_close_prices

  stock_pct_change = close_prices.pct_change()


Unnamed: 0_level_0,SHOP.TO,PM,BK,LLY,UNH,PG,ACN,AMZN,C,LMT,...,ABBV,AXP,UPS,UNP,ABT,USB,MRK,RY.TO,QCOM,PYPL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-06,3.846000,102.426488,52.411584,108.922751,154.093850,100.975874,122.785645,27.501261,69.478465,237.132472,...,86.053392,97.872797,121.927895,121.398096,62.742535,54.997474,69.269224,76.370003,79.607586,46.306360
2015-07-07,3.901000,104.363277,52.586736,112.325978,152.797754,103.451806,124.921978,27.642849,68.993188,240.729264,...,86.792162,98.172880,123.428181,123.732001,63.562347,55.029981,70.048935,76.449997,79.500407,46.358357
2015-07-08,3.705000,103.730937,51.553908,110.954080,151.050149,102.993360,124.141405,27.322046,67.144703,241.682787,...,86.041873,96.558663,121.432720,121.890526,62.630240,54.262583,69.590503,75.599998,78.729708,44.127297
2015-07-09,3.725000,103.410392,52.176173,111.353295,151.907880,102.672117,124.425974,27.646751,68.367457,244.562280,...,86.519014,97.173180,121.880174,122.516624,62.613949,54.683782,69.693702,75.480003,78.741594,43.915049
2015-07-10,3.748000,104.422076,53.011739,112.213057,155.247763,102.884206,125.545425,28.184174,69.343578,246.807720,...,87.988563,98.257912,123.232278,124.325304,63.459032,55.210499,70.278752,76.580002,79.727319,44.089600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-12,152.259995,172.932880,108.518092,1139.975965,855.712844,230.874158,502.023207,290.834071,96.086191,787.901270,...,238.182946,401.649223,181.174406,332.598572,162.101955,70.665534,137.238154,172.750000,227.491244,120.281766
2024-11-13,161.199997,174.653435,108.231024,1132.109604,844.915981,232.304136,516.290290,298.573156,96.070546,777.782325,...,237.561595,401.128373,186.423429,335.263762,161.405210,71.359119,137.363172,172.429993,223.713703,121.758154
2024-11-14,153.429993,179.386417,108.541378,1100.148070,829.977069,233.790044,506.633720,295.917626,95.360257,754.192555,...,237.358187,403.409541,185.599180,329.640043,160.958033,70.397279,137.632204,172.050003,229.438313,120.043380
2024-11-15,152.869995,180.816821,110.087483,1049.269135,832.764177,238.398659,497.172460,284.900049,96.686876,752.051220,...,232.000693,403.382239,188.522539,331.246762,162.972785,70.166886,135.426301,171.130005,225.687072,120.689856


### LAST STEP: BUY SHARES AND GENERATE PORTFOLIO

##### The following function determines the currency of each stock in our portfolio 

In [None]:
def get_currency(tickers):
    currencies = []

    for ticker in tickers:
        stock = yf.Ticker(ticker)
        info = stock.fast_info
        currency = info['currency']

        currencies.append({'Ticker': ticker, 'Currency': currency})
        
    df = pd.DataFrame(currencies)
    df.set_index('Ticker', inplace=True)

    return df

currencies = get_currency(valid_tickers)

##### The following function buys our stocks based on the determined weightings 

In [None]:
def buy_shares(weightings_df, prices_df, currencies_df):

    cash = 1000000
    flat_fee = 3.95
    fee_per_share = 0.001

    weightings_df['Close Price'] = prices_df.reindex(weightings_df.index)

    # 1: Calculate the initial investment of each stock and the amount of shares
    weightings_df['Investment Amt'] = cash * (weightings_df['Weight'] / 100)
    weightings_df['Shares'] = weightings_df['Investment Amt'] / weightings_df['Close Price']

    # 2: Calculate the fees based on what kind of fee structure is cheaper
    weightings_df['fees'] = np.minimum(weightings_df['Shares'] * fee_per_share, flat_fee)

    # 3: Calculate total investment with fees added
    weightings_df['Investment with fees'] = weightings_df['Shares'] * weightings_df['Close Price'] + weightings_df['fees']
    total_with_fees = weightings_df['Investment with fees'].sum()

    # 4: Adjust investment to keep the total under the budget
    adjustment_factor = cash / total_with_fees
    weightings_df['Adjusted Investment Amt'] = weightings_df['Investment Amt'] * adjustment_factor
    weightings_df['Adjusted Shares'] = weightings_df['Adjusted Investment Amt'] / weightings_df['Close Price']

    # 5: Recalculate fees
    weightings_df['Adjusted fees'] = np.minimum(weightings_df['Adjusted Shares'] * fee_per_share, flat_fee)

    # 6: Final investment for each stock
    weightings_df['Final Investment'] = weightings_df['Adjusted Shares'] * weightings_df['Close Price'] + weightings_df['Adjusted fees']

    # Create Final Portfolio
    Portfolio_Final = pd.DataFrame()
    Portfolio_Final['Ticker'] = weightings_df.index
    Portfolio_Final.index = Portfolio_Final['Ticker']
    Portfolio_Final['Price'] = weightings_df['Close Price']
    Portfolio_Final['Currency'] = currencies_df.reindex(Portfolio_Final.index)['Currency'] # NEED TO FIGURE OUT A WAY TO GET ACCURATE CURRENCY DATA
    Portfolio_Final['Shares'] = weightings_df['Adjusted Shares']
    Portfolio_Final['Value'] = weightings_df['Adjusted Investment Amt']
    Portfolio_Final['Weight'] = weightings_df['Weight']

    Portfolio_Final.index = range(1, len(Portfolio_Final) + 1)

    return Portfolio_Final

weightings_df = pd.DataFrame()
weightings_df.index = valid_tickers
weights = [1.4999, 4.45, 1.34, 4.26, 4.23, 1.54, 1.45, 2.70, 1.85, 3.54, 4.43, 3.19, 1.39, 2.51, 3.72, 3.44, 4.43, 1.34, 3.98, 1.37, 4.14, 3.80, 1.38, 4.17, 1.61, 2.51, 2.39, 1.34, 2.27, 1.34, 1.34, 1.65, 4.21, 3.63, 4.46, 3.10]
weightings_df['Weight'] = weights
Portfolio_Final = buy_shares(weightings_df, close_prices.iloc[-1], currencies)

### TEST

In [None]:
#tests
total = Portfolio_Final['Value'].sum()
total_weight = Portfolio_Final['Weight'].sum()
print(total, total_weight)

999989.1663796143 99.99989999999997


## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Gateek, Jason, Patrick.