In [1]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 02
### Team Member Names: Jason, Patrick, Gateek
### Team Strategy Chosen: Market Beat

Disclose any use of AI for this assignment below (detail where and how you used it).  Please see the course outline for acceptable uses of AI.


### STEP 1: FILTER STOCKS FOR VALID TICKERS BASED ON SET REQUIREMENTS

In [3]:
# valid_stocks(tickers_file) reads in a given tickers file and produces a list of tickers
#                            that are valid according to restrictions such as currency and 
#                            average monthly volume.
# tickers_file: csv file with tickers 
def valid_stocks(tickers_file):
    # Read CSV and get tickers
    tickers_df = pd.read_csv(tickers_file)

    if tickers_df.empty:
        return

    tickers_df.columns = (['Tickers'])
    tickers_list = tickers_df['Tickers'].tolist()

    # Start and end dates
    start = '2023-10-01'
    end = '2024-09-30'

    valid_tickers = []

    for ticker in tickers_list:
        # Loads in ticker info from yfinance
        stock = yf.Ticker(ticker)
        info = stock.fast_info 

        # filter ticker by currency
        try:
            currency = info['currency']
        except:
            continue

        if currency != 'USD' and currency != 'CAD':
            continue

        #filter ticker by average monthly volume
        try:
            hist = stock.history(start=start, end=end, interval='1d')
        except:
            continue
        monthly_volume = pd.DataFrame()
        monthly_volume['volume'] = hist['Volume'].resample('ME').sum()
        monthly_volume['count'] = hist['Volume'].resample('ME').count()
        monthly_volume['avg monthly volume'] = monthly_volume['volume'] / monthly_volume['count']
        invalid_trading_days = monthly_volume[monthly_volume['count'] < 18]
        invalid_monthly_vol = monthly_volume[monthly_volume['avg monthly volume'] < 100000]

        if len(invalid_monthly_vol) > 0 or len(invalid_trading_days) > 0:
            continue

        valid_tickers.append(ticker)

    return valid_tickers

valid_tickers = valid_stocks('Tickers_Example.csv')

$AGN: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$CELG: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$MON: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$RTN: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")


### STEP 2: GET CLOSE PRICES FOR ALL VALID STOCKS

In [4]:
#get_close_prices(start, end, tickers, cutoff) retrieves all close prices for stocks in tickers from a start date
#                                              to an end date. It also takes in a cutoff that excludes all stocks
#                                              that do not have close price data before this cutoff date. Function 
#                                              will return all close prices for the valid stocks in CAD starting from the 
#                                              date at which the youngest valid stock began tracking close prices.
# Example: get_close_prices('2020-01-01', '2024-01-01', ['AAPL', 'NVDA'], '2022-01-01')
# Restrictions:
#       * start < cutoff < end
def get_close_prices(start, end, tickers, cutoff):

    multi_data = pd.DataFrame()
    df = []
    appended_tickers = []

    # loop through tickers 
    for ticker in tickers:
        # get all data and put into a series
        data = yf.download(ticker, start=start, end=end, interval='1d')
        close = data['Close']
        close = close.rename(ticker)

        # if the first close price is less than cutoff
        if close.index.min() < pd.Timestamp(cutoff):
            # add stock close prices to df
            df.append(close)
            appended_tickers.append(ticker)

    # create df with all the data
    multi_data = pd.concat(df, axis=1)
    #drop all values so that there are valid data points for each date in the index
    multi_data.dropna(subset=appended_tickers, inplace=True)

    # Get CAD->USD exchange rate
    cadusd = yf.download('CAD=x', start=start, end=end, interval='1d')

    # convert everything to CAD
    for ticker in appended_tickers:
        stock = yf.Ticker(ticker)
        info = stock.fast_info

        currency = info['currency']
        if currency == 'USD':
            multi_data[ticker] = multi_data[ticker] * cadusd['Close']
    
    return multi_data

start = '2015-01-01'
end = '2024-11-22'
cutoff = '2019-01-01'
close_prices = get_close_prices(start, end, valid_tickers, cutoff)

display(close_prices)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Unnamed: 0_level_0,ABBV,ABT,ACN,AIG,AMZN,AXP,BA,BAC,BB.TO,BIIB,...,QCOM,RY.TO,SHOP.TO,T.TO,TD.TO,TXN,UNH,UNP,UPS,USB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-06,86.053392,62.742535,122.785645,78.018206,27.501261,97.872797,177.253338,21.368286,10.08,510.404334,...,79.607586,76.370003,3.846000,21.870001,53.230000,64.823859,154.093850,121.398096,121.927895,54.997474
2015-07-07,86.792162,63.562347,124.921978,78.753508,27.642849,98.172880,181.217880,21.128373,10.13,516.575416,...,79.500407,76.449997,3.901000,21.990000,52.910000,64.828277,152.797754,123.732001,123.428181,55.029981
2015-07-08,86.041873,62.630240,124.141405,77.775948,27.322046,96.558663,180.476823,20.664800,9.79,505.721692,...,78.729708,75.599998,3.705000,21.834999,52.220001,62.986309,151.050149,121.890526,121.432720,54.262583
2015-07-09,86.519014,62.613949,124.425974,78.614301,27.646751,97.173180,181.770122,20.977391,9.88,508.256246,...,78.741594,75.480003,3.725000,21.750000,51.799999,61.239219,151.907880,122.516624,121.880174,54.683782
2015-07-10,87.988563,63.459032,125.545425,79.587512,28.184174,98.257912,183.628292,21.225033,9.75,493.424804,...,79.727319,76.580002,3.748000,21.990000,52.360001,62.289746,155.247763,124.325304,123.232278,55.210499
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-13,237.561595,161.405210,516.290290,106.111302,298.573156,401.128373,195.195160,63.968005,3.43,231.202429,...,223.713703,172.429993,161.199997,21.809999,78.559998,286.886821,844.915981,335.263762,186.423429,71.359119
2024-11-14,237.358187,160.958033,506.633720,105.994707,295.917626,403.409541,193.295165,64.226498,3.39,230.725639,...,229.438313,172.050003,153.429993,21.340000,79.639999,288.249632,829.977069,329.640043,185.599180,70.397279
2024-11-15,232.000693,162.972785,497.172460,106.543980,284.900049,403.382239,197.128170,65.737512,3.33,224.969944,...,225.687072,171.130005,152.869995,21.389999,78.800003,282.804878,832.764177,331.246762,188.522539,70.166886
2024-11-18,234.220337,165.312118,496.936464,107.489496,284.112590,402.208789,202.653831,65.809325,3.37,222.472702,...,231.713048,171.600006,148.490005,21.629999,78.870003,291.831657,830.575104,330.187577,190.342768,71.288739


### STEP 3: REMOVES POOR PERFORMING STOCKS

In [5]:
def keep_tickers(dataframe, list):
    """
    This function, given a dataframe and list of tickers, will keep
    all items in the dataframe with a ticker in the list. Tickers must be in
    the Dataframe

    :param dataframe: pd.DataFrame
    :param list: list[Str]
    :return: pd.DataFrame
    """

    newframe = pd.DataFrame()

    for i in list:
        newframe[i] = dataframe[i]

    return newframe

In [6]:
def stock_df_to_ticker(dataframe):
    """
        This function, when given a dataframe of stocks, will return a list of ticker strings

        :param dataframe: dataframe
        :return: list[str]
        """

    ticker_list = []

    for i in dataframe.index:
        ticker_list.append(i)

    return ticker_list

In [8]:
def correlation_filter(prices: pd.DataFrame, max_corr: float):
    """
    This function, when given a dataframe of prices and

    :param prices: pd.DataFrame
    :param max_corr: float
    :return: list[Str]
    """
    correlations = prices.corr()

    tickers = []

    corr_list = correlations.index

    avg_corr_df = pd.DataFrame(columns = ['Correlation'])

     #add to a dataframe

    for i in corr_list:
        avg_corr_df.loc[i, 'Correlation'] = correlations.loc[i].mean()

    avg_corr_df = avg_corr_df.sort_values('Correlation', ascending = False)

    #display(avg_corr_df)

    corr_list = avg_corr_df.index

    i = 0
    
    #filter - checks the stocks near the beginning, so 
    while i < len(avg_corr_df.index):
        index = avg_corr_df.index[i]
        tick = corr_list[i]
        
        if avg_corr_df.loc[tick, 'Correlation'] <= max_corr or len(corr_list) - i <= 12:
            tickers.append(tick)
            i += 1
        else:
            print(tick + " was removed since its correlation with other stocks was too high. (" 
                  + str(np.round(avg_corr_df.loc[tick, 'Correlation'], 2)) + ")")
            i += 1
    return tickers

stock_pct_change = close_prices.pct_change(fill_method=None)
stock_pct_change.drop(index=stock_pct_change.index[0], inplace=True)

stock_pct_change = keep_tickers(stock_pct_change, correlation_filter(stock_pct_change, 0.5))

display(stock_pct_change)

BLK was removed since its correlation with other stocks was too high. (0.52)


Unnamed: 0_level_0,BAC,C,ACN,BK,USB,AXP,TXN,UNP,AIG,KO,...,QCOM,MO,ABBV,T.TO,BMY,AMZN,LLY,BIIB,SHOP.TO,BB.TO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-07,-0.011228,-0.006985,0.017399,0.003342,0.000591,0.003066,0.000068,0.019225,0.009425,0.024714,...,-0.001346,0.040008,0.008585,0.005487,0.014437,0.005148,0.031244,0.012091,0.014301,0.004960
2015-07-08,-0.021941,-0.026792,-0.006248,-0.019640,-0.013945,-0.016443,-0.028413,-0.014883,-0.012413,-0.005191,...,-0.009694,-0.008092,-0.008645,-0.007049,-0.002672,-0.011605,-0.012214,-0.021011,-0.050244,-0.033564
2015-07-09,0.015127,0.018211,0.002292,0.012070,0.007762,0.006364,-0.027738,0.005137,0.010779,0.002466,...,0.000151,-0.000610,0.005545,-0.003893,0.003176,0.011884,0.003598,0.005012,0.005398,0.009193
2015-07-10,0.011805,0.014278,0.008997,0.016014,0.009632,0.011163,0.017154,0.014763,0.012380,0.010232,...,0.012518,0.005531,0.016985,0.011034,0.018772,0.019439,0.007721,-0.029181,0.006174,-0.013158
2015-07-13,0.018071,0.016873,0.016829,0.014976,0.007898,0.014694,0.005452,-0.008627,0.011054,0.005608,...,0.010077,-0.000681,0.003836,0.004320,0.004554,0.026093,0.004926,0.006649,0.033885,0.008205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-13,0.001942,-0.000163,0.028419,-0.002645,0.009815,-0.001297,-0.031513,0.008013,0.018183,-0.001446,...,-0.016605,0.016628,-0.002609,-0.003199,-0.008629,0.026610,-0.006900,-0.013098,0.058715,0.014793
2024-11-14,0.004041,-0.007393,-0.018704,0.002868,-0.013479,0.005687,0.004750,-0.016774,-0.001099,-0.003782,...,0.025589,0.005927,-0.000856,-0.021550,0.004931,-0.008894,-0.028232,-0.002062,-0.048201,-0.011662
2024-11-15,0.023526,0.013912,-0.018675,0.014244,-0.003273,-0.000068,-0.018889,0.004874,0.005182,-0.008096,...,-0.016350,0.014714,-0.022571,0.002343,-0.034249,-0.037232,-0.046247,-0.024946,-0.003650,-0.017699
2024-11-18,0.001092,0.005232,-0.000475,0.006341,0.015988,-0.002909,0.031919,-0.003198,0.008874,0.003682,...,0.026701,0.009079,0.009567,0.011220,0.012070,-0.002764,-0.023771,-0.011100,-0.028652,0.012012


    The first round of elimination is identifying the stocks that have too high of an average correlation with the other stocks in the portfolio. So, as discussed in our Portfolio unit, we want stocks that are diversified, so we do not want to hold stocks that have too similar of a correlation with the rest of the portfolio. If the stocks in our portfolio are too closely correlated, it could result in one event causing our entire portfolio to lose value, so by having lowly correlated stocks, we can prevent that. 

In [10]:
#feed pct_change() data
def sort_by_sharpe(price_pct, min_sharpe, min_return, max_std):
    """
        This function, when given a dataframe of
        price percent change, a dataframe of prices, a minimum sharpe ratio, a minimum return
        and maximum standard deviation will produce a dataframe with the returns, standard deviation
        sharpe ratio and prices of all stocks in the given dataframe that meets the set criteria

        :param price_pct: dataframe
        :param min_sharpe: float
        :param min_return: float
        :param: max_std: float
        :return: dataframe
        """

    sharpe_df = pd.DataFrame(columns=['Returns', 'Std', 'Sharpe'])
    returns = 0
    std = 0
    ticker = ""

    stock_info = {}#fixed length list of Ticker, Returns, Std, Sharpe

    tick_list = price_pct.columns

    for i in range(len(tick_list)):
        ticker = tick_list[i]

        #work out equations - pct_change() on all,
        returns = price_pct[ticker].mean()

        #work out equations
        std = price_pct[ticker].std()

        #multiply sharpe_ratio by sqrt(15), or the number of weeks
        sharpe = returns/std * (50 ** 0.5)

        #print(ticker, sharpe, returns, std)
        stock_info = {
            'Returns': returns,
            'Std': std,
            'Sharpe': sharpe
        }

        # stock_info = pd.DataFrame(stock_info)

        sharpe_df.loc[ticker] = stock_info

    sharpe_df = sharpe_df.sort_values('Sharpe', ascending = False)

    #print(sharpe_df)

    #filter out the bad stocks from after the last 12 stocks, this ensures that only the worse stocks get removed. 

    i = 12

    while i < len(sharpe_df.index):
        index = sharpe_df.index[i]
        
        if sharpe_df.loc[index, 'Returns'] < min_return :
            print(index + " was removed because its returns were too low. (", np.round(sharpe_df.loc[index, 'Returns'], 4), ")")
            sharpe_df.drop(index=index, inplace=True)
        elif sharpe_df.loc[index, 'Std'] > max_std:
            print(index + " was removed because its risk was too high. (", np.round(sharpe_df.loc[index, 'Std'], 4), ")")
            sharpe_df.drop(index=index, inplace=True)
        elif sharpe_df.loc[index, 'Sharpe'] < min_sharpe:
            print(index + " was removed because its Sharpe ratio was too low. (", np.round(sharpe_df.loc[index, 'Sharpe'], 4), ")")
            sharpe_df.drop(index=index, inplace=True)
            
        else:
            i+= 1
        
        

    #print(sharpe_df)

    return sharpe_df

stock_sharpe = sort_by_sharpe(stock_pct_change, 0.1, 0, 1)
display(stock_sharpe)

MO was removed because its Sharpe ratio was too low. ( 0.0897 )
BMY was removed because its Sharpe ratio was too low. ( 0.053 )
T.TO was removed because its Sharpe ratio was too low. ( 0.0366 )
PFE was removed because its Sharpe ratio was too low. ( 0.0316 )
BB.TO was removed because its Sharpe ratio was too low. ( 0.0298 )
BIIB was removed because its returns were too low. ( -0.0 )


Unnamed: 0,Returns,Std,Sharpe
SHOP.TO,0.00223,0.036354,0.43384
LLY,0.001131,0.018653,0.428692
AMZN,0.001247,0.021353,0.412932
UNH,0.000871,0.01717,0.358869
CAT,0.000894,0.019493,0.324416
ACN,0.000735,0.016549,0.314095
TXN,0.000805,0.018924,0.30086
LMT,0.000614,0.015029,0.289059
AXP,0.000815,0.020286,0.28416
RY.TO,0.000415,0.011339,0.258513


    At this step of algorithm, we sort a dataframe by their sharpe values and remove any stock whose returns or sharpe values are too low. This is to ensure that the stocks that we have are a good investment. Additionally, stocks that have too high of a risk might be removed as well. Essentially, we want to have stocks that provide positive returns and meet our criteria for our stocks.

    In the event that there at not 12 stocks that meet our criteria, we take the 12 that best meet our criteria.

In [11]:

def arrange_by_sharpe(prices:pd.DataFrame, sharpe:pd.DataFrame):
    """
    This function, when given how long the list will be and categorization of stocks,
    will return a list of stocks that we want to craft the portfolio from.
    :param prices: pd.DataFrame
    :param sharpe: pd.DataFrame
    :return: pd.DataFrame
    """
    new_prices = pd.DataFrame()

    for i in sharpe.index:
        #print(i)
        #print(prices[i])
        new_prices[i] = prices[i]

    #print(new_prices)

    return new_prices

best_stock = stock_sharpe.index[0]

print("Our best stock is " + best_stock)

stock_prices = arrange_by_sharpe(close_prices, stock_sharpe)

Our best stock is SHOP.TO


    No actual filtering occurs here, we only just shrink the list of prices to only contain the list of stocks that we wish to use.

In [27]:
def categorize(df:pd.DataFrame, pivot:str, categories:int):
    """
    Categorizes stocks into different categories depending on how correlated they are
    :param df: pd.DataFrame, must have pct data
    :param pivot: string that is in df
    :param categories: int
    :return: list[list[str]]
    """
    correlations = df.corr()
    lin_space = np.linspace(0, 0.9999999, categories + 1)[1:]
    # The 0.999999 prevents adding the stock itself into the list
    stock_categories = []

    for i in range(categories):
        stock_categories.append([])

    for stock in correlations.index:

        correlation = correlations.at[pivot, stock]
        inserted = False
        i = 0
        while i < categories and not inserted:
            if correlation <= lin_space[i]:
                stock_categories[i].append(stock)
                inserted = True
            i = i + 1
    return stock_categories

stock_correlation_tiers = categorize(stock_prices, best_stock, 10)
display(stock_correlation_tiers)

[['PM', 'C', 'AIG', 'USB', 'BA'],
 ['BK'],
 [],
 [],
 ['LLY', 'LMT', 'ABBV', 'MRK', 'KO', 'CL', 'TD.TO'],
 ['UNH', 'CAT', 'AXP', 'RY.TO', 'PEP'],
 ['PG', 'BAC', 'UPS'],
 ['ACN', 'TXN', 'UNP', 'QCOM'],
 ['AMZN', 'ABT', 'PYPL'],
 []]

Next, we categorize the stocks we wish to build our portfolio from based on how closely correlated they are to our best stock. Additionally, each subcategory is sorted by Sharpe ratio. This prepares our program to select the best stocks so that the stocks contained in the portfolio are not too highly correlated with the other stocks, and that the stocks with higher sharpe ratios are chosen before stocks with lower sharpe ratios.

In [28]:
def filtering(list_len, stock_correlation_tiers):
    """
    This function, when given how long the list will be and categorization of stocks,
    will return a list of stocks that we want to craft the portfolio from.
    :param list_len: int
    :param stock_correlation_tiers: list[list[str]]
    :return: list[str]
    """
    ticker_list = []

    while 0 < len(stock_correlation_tiers) and len(ticker_list) < list_len:
        sub_list = stock_correlation_tiers[0]
        while 0 < len(sub_list) and len(ticker_list) < list_len:
            #print(sub_list[0])
            ticker_list.append(sub_list[0])
            sub_list.pop(0)

        stock_correlation_tiers.pop(0)
    
    '''
    while len(ticker_list) < list_len:
        for i in range(len(stock_correlation_tiers)):
            sub_list = stock_correlation_tiers[i]

            if len(sub_list) > 0:
                ticker_list.append(sub_list[0])
                sub_list.pop(0)
                stock_correlation_tiers[i] = sub_list
    '''

    return ticker_list


ticker_lst = [best_stock]

ticker_lst += filtering(24, stock_correlation_tiers)

print(ticker_lst)

['SHOP.TO', 'PM', 'C', 'AIG', 'USB', 'BA', 'BK', 'LLY', 'LMT', 'ABBV', 'MRK', 'KO', 'CL', 'TD.TO', 'UNH', 'CAT', 'AXP', 'RY.TO', 'PEP', 'PG', 'BAC', 'UPS', 'ACN', 'TXN', 'UNP']


In the filtering step, we cycle through each tier of stock correlation and add them to the list of stocks we wish to buy. This continues until we have 24 stocks (or less, if stocks we have are weak). 

In [30]:


#print(stock_sharpe)
#print(stock_correlation_tiers)


stock_close_prices = keep_tickers(close_prices, ticker_lst)
display(stock_close_prices)

Unnamed: 0_level_0,SHOP.TO,PM,C,AIG,USB,BA,BK,LLY,LMT,ABBV,...,CAT,AXP,RY.TO,PEP,PG,BAC,UPS,ACN,TXN,UNP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-06,3.846000,102.426488,69.478465,78.018206,54.997474,177.253338,52.411584,108.922751,237.132472,86.053392,...,104.886243,97.872797,76.370003,119.077106,100.975874,21.368286,121.927895,122.785645,64.823859,121.398096
2015-07-07,3.901000,104.363277,68.993188,78.753508,55.029981,181.217880,52.586736,112.325978,240.729264,86.792162,...,106.325463,98.172880,76.449997,122.035659,103.451806,21.128373,123.428181,124.921978,64.828277,123.732001
2015-07-08,3.705000,103.730937,67.144703,77.775948,54.262583,180.476823,51.553908,110.954080,241.682787,86.041873,...,104.621109,96.558663,75.599998,121.585325,102.993360,20.664800,121.432720,124.141405,62.986309,121.890526
2015-07-09,3.725000,103.410392,68.367457,78.614301,54.683782,181.770122,52.176173,111.353295,244.562280,86.519014,...,103.983203,97.173180,75.480003,120.403605,102.672117,20.977391,121.880174,124.425974,61.239219,122.516624
2015-07-10,3.748000,104.422076,69.343578,79.587512,55.210499,183.628292,53.011739,112.213057,246.807720,87.988563,...,104.409364,98.257912,76.580002,121.440229,102.884206,21.225033,123.232278,125.545425,62.289746,124.325304
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-13,161.199997,174.653435,96.070546,106.111302,71.359119,195.195160,108.231024,1132.109604,777.782325,237.561595,...,539.788465,401.128373,172.429993,229.738169,232.304136,63.968005,186.423429,516.290290,286.886821,335.263762
2024-11-14,153.429993,179.386417,95.360257,105.994707,70.397279,193.295165,108.541378,1100.148070,754.192555,237.358187,...,542.021229,403.409541,172.050003,231.089441,233.790044,64.226498,185.599180,506.633720,288.249632,329.640043
2024-11-15,152.869995,180.816821,96.686876,106.543980,70.166886,197.128170,110.087483,1049.269135,752.051220,232.000693,...,540.060034,403.382239,171.130005,223.043504,238.398659,65.737512,188.522539,497.172460,282.804878,331.246762
2024-11-18,148.490005,185.539471,97.192707,107.489496,71.288739,202.653831,110.785602,1024.326635,747.904956,234.220337,...,541.546484,402.208789,171.600006,223.022051,240.516736,65.809325,190.342768,496.936464,291.831657,330.187577


Finally, reduce the number of stock prices that we have in our dataframe to contain the close prices of only the stocks we wish to keep.

### LAST STEP: BUY SHARES AND GENERATE PORTFOLIO

##### The following function determines the currency of each stock in our portfolio 

In [None]:
def get_currency(tickers):
    currencies = []

    for ticker in tickers:
        stock = yf.Ticker(ticker)
        info = stock.fast_info
        currency = info['currency']

        currencies.append({'Ticker': ticker, 'Currency': currency})
        
    df = pd.DataFrame(currencies)
    df.set_index('Ticker', inplace=True)

    return df

currencies = get_currency(valid_tickers)

##### The following function buys our stocks based on the determined weightings 

In [None]:
def buy_shares(weightings_df, prices_df, currencies_df):

    cash = 1000000
    flat_fee = 3.95
    fee_per_share = 0.001

    weightings_df['Close Price'] = prices_df.reindex(weightings_df.index)

    # 1: Calculate the initial investment of each stock and the amount of shares
    weightings_df['Investment Amt'] = cash * (weightings_df['Weight'] / 100)
    weightings_df['Shares'] = weightings_df['Investment Amt'] / weightings_df['Close Price']

    # 2: Calculate the fees based on what kind of fee structure is cheaper
    weightings_df['fees'] = np.minimum(weightings_df['Shares'] * fee_per_share, flat_fee)

    # 3: Calculate total investment with fees added
    weightings_df['Investment with fees'] = weightings_df['Shares'] * weightings_df['Close Price'] + weightings_df['fees']
    total_with_fees = weightings_df['Investment with fees'].sum()

    # 4: Adjust investment to keep the total under the budget
    adjustment_factor = cash / total_with_fees
    weightings_df['Adjusted Investment Amt'] = weightings_df['Investment Amt'] * adjustment_factor
    weightings_df['Adjusted Shares'] = weightings_df['Adjusted Investment Amt'] / weightings_df['Close Price']

    # 5: Recalculate fees
    weightings_df['Adjusted fees'] = np.minimum(weightings_df['Adjusted Shares'] * fee_per_share, flat_fee)

    # 6: Final investment for each stock
    weightings_df['Final Investment'] = weightings_df['Adjusted Shares'] * weightings_df['Close Price'] + weightings_df['Adjusted fees']

    # Create Final Portfolio
    Portfolio_Final = pd.DataFrame()
    Portfolio_Final['Ticker'] = weightings_df.index
    Portfolio_Final.index = Portfolio_Final['Ticker']
    Portfolio_Final['Price'] = weightings_df['Close Price']
    Portfolio_Final['Currency'] = currencies_df.reindex(Portfolio_Final.index)['Currency'] # NEED TO FIGURE OUT A WAY TO GET ACCURATE CURRENCY DATA
    Portfolio_Final['Shares'] = weightings_df['Adjusted Shares']
    Portfolio_Final['Value'] = weightings_df['Adjusted Investment Amt']
    Portfolio_Final['Weight'] = weightings_df['Weight']

    Portfolio_Final.index = range(1, len(Portfolio_Final) + 1)

    return Portfolio_Final

weightings_df = pd.DataFrame()
weightings_df.index = valid_tickers
weights = [1.4999, 4.45, 1.34, 4.26, 4.23, 1.54, 1.45, 2.70, 1.85, 3.54, 4.43, 3.19, 1.39, 2.51, 3.72, 3.44, 4.43, 1.34, 3.98, 1.37, 4.14, 3.80, 1.38, 4.17, 1.61, 2.51, 2.39, 1.34, 2.27, 1.34, 1.34, 1.65, 4.21, 3.63, 4.46, 3.10]
weightings_df['Weight'] = weights
Portfolio_Final = buy_shares(weightings_df, close_prices.iloc[-1], currencies)

### TEST

In [None]:
#tests
total = Portfolio_Final['Value'].sum()
total_weight = Portfolio_Final['Weight'].sum()
print(total, total_weight)

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Gateek, Jason, Patrick.