In [1]:
#These are the libraries you can use.  You may add any libraries directy related to threading if this is a direction
#you wish to go (this is not from the course, so it's entirely on you if you wish to use threading).  Any
#further libraries you wish to use you must email me, james@uwaterloo.ca, for permission.

from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

## Group Assignment
### Team Number: 17
### Team Member Names: Jack, Jennifer, Justus, Precious
### Team Strategy Chosen: Market Meet (Market Beat or Market Meet)

Disclose any use of AI for this assignment below (detail where and how you used it).  Please see the course outline for acceptable uses of AI.


In [10]:
def get_random_stocks():
    us_stocks = [
        "AAPL", "MSFT", "AMZN", "TSLA", "GOOGL", "META", "NVDA", "BRK.B", "JNJ", "V",
        "WMT", "PG", "XOM", "DIS", "MA", "HD", "NFLX", "PEP", "COST", "PFE",
        "ABT", "T", "MRK", "CVX", "IBM", "ORCL", "BAC", "GE", "KO", "INTC",
        "CSCO", "CRM", "TXN", "AMD", "UPS", "CAT", "LOW", "FDX", "MMM", "GILD"
    ]

    canada_stocks = [
        "RY.TO", "TD.TO", "ENB.TO", "BNS.TO", "CNR.TO", "BMO.TO", "TRP.TO", "CP.TO", "T.TO", "SLF.TO",
        "BAM.A.TO", "FTS.TO", "MG.TO", "SHOP.TO", "WCN.TO", "CNQ.TO", "MFC.TO", "POW.TO", "ATD.TO", "GIB.A.TO"
    ]

    global_stocks = [
        "BABA", "TSM", "HDB", "SIEGY", "ADBE", "ASML", "NVO", "SAP", "SONY", "NSRGY",
        "TOT", "RDS.A", "BP", "TM", "VOD", "BIDU", "AXP", "LVMUY", "UL", "DAI"
    ]

    us_ca_count = 35
    global_count = 5

    us_ca_random = random.sample(us_stocks + canada_stocks, us_ca_count)
    global_random = random.sample(global_stocks, global_count)

    all_tickers = us_ca_random + global_random
    random.shuffle(all_tickers) 

    return pd.DataFrame({"stock": all_tickers})

random_stocks_df = get_random_stocks()
random_stocks_df

40
20
20


Unnamed: 0,stock
0,DAI
1,BAC
2,MA
3,FDX
4,AMZN
5,COST
6,ABT
7,UPS
8,RY.TO
9,META


In [6]:
## Read in portfolio of tickers and create a dataframe
start_date_required = '2023-10-01'
end_date_required = '2024-09-30'
count_days = 0
monthly_vol = []

# This function counts the monthly volume of the stock and finds the average monthly volume
def count_volume(ticker):
    ticker_hist = ticker.history(start=start_date_required, end=end_date_required, interval='1mo')
    monthly_vol.append(df['Volume'])
    return np.mean(monthly_vol)
    
## function that takes in a dataframe of stock tickers removes unlisted, low volume or stocks not denominated in USD or CAD
def remove_unwanted(df):
    i = 0
    while i <= (1 - len(df)):
        try:
            ticker = yf.Ticker(df.iloc[i,0])
        except: 
            df.drop(i)
        else:
            currency = ticker.info['currency']
            if (currency == 'USD' or 'CAD') and (count_volume(ticker) > 100000):
                i += 1
            else: 
                df.drop(i)  
    return df
    

## Choosing our Time Frame
We decided to create our portfolio with a three year time period, from 2021-01-01 to 2024-11-02. Choosing a three year period allows us to mitigate short term market volatility that could distort the relationship between the portfolio and the S&P500 and TSX60 indices. For instance, a shorter time period such as one year may include anomalies from recent trends in the market that do not accurately reflect the portfolio long term. This is especially important when calculating volatility as it can be inflated by short-term price swings. Furthermore, a three year time period is long enough to capture different market cycles to provide a more accurate representation of how the portfolio will perform under different market conditions. This ensures the portfolio is not biased towards a single market trend, allowing us to better evaluate its consistency. 

In [6]:
# Retrieving data from S&P500 
start_date = '2021-01-01'
end_date = '2024-11-02'

sp500_ticker = yf.Ticker('^GSPC')
sp500 = sp500_ticker.history(start=start_date, end=end_date, interval='1mo')[['Close']]
sp500.index = sp500.index.strftime('%Y-%m-%d')
sp500_returns = sp500.ffill().pct_change().dropna()
sp500_returns.rename(columns={'Close': 'S&P500 Returns'}, inplace=True)
sp500_returns.head()

Unnamed: 0_level_0,S&P500 Returns
Date,Unnamed: 1_level_1
2021-02-01,0.026091
2021-03-01,0.042439
2021-04-01,0.052425
2021-05-01,0.005486
2021-06-01,0.022214


In [7]:
# Getting tsx historical data
tsx_ticker = yf.Ticker('XIU.TO')
tsx = tsx_ticker.history(start=start_date,end=end_date, interval='1mo')[['Close']]
tsx.index = tsx.index.strftime('%Y-%m-%d')
#tsx.head()
tsx_returns = tsx.ffill().pct_change().dropna()
tsx_returns.rename(columns={'Close': 'TSX60 Returns'}, inplace=True)
tsx_returns.head()

Unnamed: 0_level_0,TSX60 Returns
Date,Unnamed: 1_level_1
2021-02-01,0.03651
2021-03-01,0.052981
2021-04-01,0.02305
2021-05-01,0.030849
2021-06-01,0.033423


In [8]:
# combining dataframe to get total pct change 
sp500tsx60 = sp500_returns.join(tsx_returns)
sp500tsx60['Total % Change'] = sp500tsx60.mean(axis=1)
sp500tsx60.head()

Unnamed: 0_level_0,S&P500 Returns,TSX60 Returns,Total % Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-02-01,0.026091,0.03651,0.031301
2021-03-01,0.042439,0.052981,0.04771
2021-04-01,0.052425,0.02305,0.037737
2021-05-01,0.005486,0.030849,0.018168
2021-06-01,0.022214,0.033423,0.027818


In [13]:
# Default weight on the factors
market_value_weight = 0.25 #Decide if we need this later
returns_weight = 0.5
error_weight = 0.25

# achieve data from yahoo finance
sp_index = sp500tsx60['Total % Change'].sum()
total_market_value = 0
market_returns = 0

start_date = '2021-01-01'
end_date = '2024-11-02'

# rating gives every stock a rating and sorts them in non-increasing order
# Four factors to be considered:
    # 1. Market Value (weight 25%)
    # 2. Returns (weight 50%)
    # 3. Error (weight 25%)
# The rating should between 0 and 1. A higher rating means the stock aligns better with the true market index.
def rating(dataframe):
    for stock in dataframe:
        # Get the closing prices of the stock 
        stock = yf.Ticker(dataframe.iloc[stock,0])
        stock_data = stock.history(start=start_date, end=end_date, interval='1mo')[['Close']]
        stock_data.index = stock_data.index.strftime('%Y-%m-%d')
        # calculate returns for stock
        stock_returns_df = stock_data.ffill().pct_change().dropna()
        
        # Step 1: Calculate Market Value Score (Formula: Stock's Market Value / Total Market Value)
        #stock_market_value = stock.fast_info['marketCap']
        #market_value_score = stock_market_value / total_market_value
        #weighted_market_value_score = market_value_weight * market_value_score

        # Step 2: Calculate Returns Score (Formula: 1 - abs((Stock's Returns - Market Returns) / Market Returns))
        stock_returns = stock_returns_df['Close'].mean() 
        returns_score = 1 - abs((stock_returns - market_returns) / market_returns)
        weighted_returns_score = returns_weight * returns_score

        # Step 3: Calculate Volatility Score (Formula: 1 - abs((Stock's Volatility - Market Volatility) / Market Volatility))
        stock_volatility = stock_returns_df['Close'].std()
        volatility_score = 1 - abs((stock_volatility - market_volatility) / market_volatility)
        weighted_volatility_score = volatility_weight * volatility_score

        # Step 4: Calculate tracking error score 
        tracking_error = (stock_returns['Close'] - sp500tsx60['Total % Change']).std()
        weighted_error = error_weight * tracking_error 

        # Step 5: Calculate rating of the stock based on weight
        rating = weighted_market_value_score + weighted_returns_score + weighted_volatility_score + weighted_beta_score
        dataframe.at[stock, 'rating'] = rating

    # Sort the rating in descending order
    sorted_df = dataframe.sort_values(by='rating', ascending=False)
    return sorted_df



In [84]:
def calculate_weight(dataframe):
    n = len(dataframe)
    
     # Step 1: Initialize reassign_list with proportional values
    total_rating = dataframe['rating'].sum()
    reassign_dataframe = dataframe[['stock', 'rating']].copy()
    reassign_dataframe['proportional_rating'] = reassign_dataframe['rating'] / total_rating

    # Step 2: Set each weight to the minimum weight
    min_weight = 100 / (2 * n) / 100
    max_weight = 0.15
    dataframe['weight'] = min_weight
    remaining_weight = 1 - dataframe['weight'].sum()
    
    # Step 3: Assign the remaining 
    while remaining_weight > 0:
        total_proportional_rating = reassign_dataframe['proportional_rating'].sum()
        adjustment_factor = remaining_weight / total_proportional_rating
        
        reassign_dataframe['additional_weight'] = reassign_dataframe['proportional_rating'] * adjustment_factor
        reassign_dataframe['new_weight'] = reassign_dataframe['additional_weight'] + dataframe.set_index('stock').loc[reassign_dataframe['stock'], 'weight'].values

        for index, row in reassign_dataframe.iterrows():
            stock_name = row['stock']
            new_weight = row['new_weight']

            if new_weight > max_weight:
                excess = new_weight - max_weight
                dataframe.loc[dataframe['stock'] == stock_name, 'weight'] = max_weight
                remaining_weight -= row['additional_weight'] - excess
                reassign_dataframe.drop(index, inplace=True)
            else:
                dataframe.loc[dataframe['stock'] == stock_name, 'weight'] = new_weight
                remaining_weight -= row['additional_weight']

        remaining_weight = max(0, remaining_weight)

    dataframe['weight'] = dataframe['weight'] * 100 / dataframe['weight'].sum()
    return dataframe

# testing
data_1 = {
    'stock': ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12'],
    'rating': [0.01] * 9 + [0.99] * 3
}
dataframe_1 = pd.DataFrame(data_1)
dataframe_1['rating'].sum()
print(calculate_weight(dataframe_1))

# testing
data_2 = {
    'stock': ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21', 's22', 's23', 's24'],
    'rating': [0.01] * 19 + [0.99] * 5
}
dataframe_2 = pd.DataFrame(data_2)
dataframe_2['rating'].sum()
print(calculate_weight(dataframe_2))

# testing
# 0.1 + 0.2 # The result should be 0.3, but the actual result is 0.30000000000000004

   stock  rating     weight
0     s1    0.01   6.111111
1     s2    0.01   6.111111
2     s3    0.01   6.111111
3     s4    0.01   6.111111
4     s5    0.01   6.111111
5     s6    0.01   6.111111
6     s7    0.01   6.111111
7     s8    0.01   6.111111
8     s9    0.01   6.111111
9    s10    0.99  15.000000
10   s11    0.99  15.000000
11   s12    0.99  15.000000
   stock  rating     weight
0     s1    0.01   2.180610
1     s2    0.01   2.180610
2     s3    0.01   2.180610
3     s4    0.01   2.180610
4     s5    0.01   2.180610
5     s6    0.01   2.180610
6     s7    0.01   2.180610
7     s8    0.01   2.180610
8     s9    0.01   2.180610
9    s10    0.01   2.180610
10   s11    0.01   2.180610
11   s12    0.01   2.180610
12   s13    0.01   2.180610
13   s14    0.01   2.180610
14   s15    0.01   2.180610
15   s16    0.01   2.180610
16   s17    0.01   2.180610
17   s18    0.01   2.180610
18   s19    0.01   2.180610
19   s20    0.99  11.713684
20   s21    0.99  11.713684
21   s22    0.99  11

0.30000000000000004

In [None]:
def build_portfolio():
    return portfolio

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.