In [2]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

In [3]:

# valid_stocks(tickers_file) reads in a given tickers file and produces a list of tickers
#                            that are valid according to restrictions such as currency and 
#                            average monthly volume.
# tickers_file: csv file with tickers 
def valid_stocks(tickers_file):
    # Read CSV and get tickers
    tickers_df = pd.read_csv(tickers_file)
    tickers_df.columns = (['Tickers'])
    tickers_list = tickers_df['Tickers'].tolist()

    # Start and end dates
    start = '2023-10-01'
    end = '2024-09-30'

    valid_tickers = []

    for ticker in tickers_list:
        # Loads in ticker info from yfinance
        stock = yf.Ticker(ticker)
        info = stock.fast_info 

        # filter ticker by currency
        try:
            currency = info['currency']
        except:
            continue

        if currency != 'USD' and currency != 'CAD':
            continue

        #filter ticker by average monthly volume
        try:
            hist = stock.history(start=start, end=end, interval='1d')
        except:
            continue
        monthly_volume = pd.DataFrame()
        monthly_volume['volume'] = hist['Volume'].resample('ME').sum()
        monthly_volume['count'] = hist['Volume'].resample('ME').count()
        monthly_volume['avg monthly volume'] = monthly_volume['volume'] / monthly_volume['count']
        invalid_trading_days = monthly_volume[monthly_volume['count'] < 18]
        invalid_monthly_vol = monthly_volume[monthly_volume['avg monthly volume'] < 100000]

        if len(invalid_monthly_vol) > 0 or len(invalid_trading_days) > 0:
            continue


        valid_tickers.append(ticker)

    return valid_tickers

valid_tickers = valid_stocks('Tickers_Example.csv')

$AGN: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$CELG: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$MON: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$RTN: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")


In [None]:
#get_close_prices(start, end, tickers, cutoff) retrieves all close prices for stocks in tickers from a start date
#                                              to an end date. It also takes in a cutoff that excludes all stocks
#                                              that do not have close price data before this cutoff date. Function 
#                                              will return all close prices for the valid stocks in CAD starting from the 
#                                              date at which the youngest valid stock began tracking close prices.
# Example: get_close_prices('2020-01-01', '2024-01-01', ['AAPL', 'NVDA'], '2022-01-01')
# Restrictions:
#       * start < cutoff < end
def get_close_prices(start, end, tickers, cutoff):

    stock_data = yf.download(" ".join(tickers), start=start, end=end, interval='1mo')["Close"]
    stock_data.index = stock_data.index.strftime("%Y-%m-%d")

    for column in stock_data.columns.values:
        # if the first close price is less than cutoff
        if np.isnan(stock_data.at[cutoff, column]):
            stock_data.drop(columns=[column], inplace=True)

    print(stock_data)

    # multi_data = pd.DataFrame()
    # df = []
    # appended_tickers = []

    # # loop through tickers 
    # for ticker in tickers:
    #     # get all data and put into a series
    #     data = yf.download(ticker, start=start, end=end, interval='1d')
    #     close = data['Close']
    #     close = close.rename(ticker)

    #     # if the first close price is less than cutoff
    #     if close.index.min() < pd.Timestamp(cutoff):
    #         # add stock close prices to df
    #         df.append(close)
    #         appended_tickers.append(ticker)

    # # create df with all the data
    # multi_data = pd.concat(df, axis=1)
    # drop all values so that there are valid data points for each date in the index
    # multi_data.dropna(subset=appended_tickers, inplace=True)

    # # Get CAD->USD exchange rate
    # cadusd = yf.download('CAD=x', start='2024-11-08', end=end, interval='1d')
    # # Get Close price from most recent trading day
    # exchange_rate = cadusd.iloc[-1, 3]

    # # convert everything to CAD
    # for ticker in appended_tickers:
    #     stock = yf.Ticker(ticker)
    #     info = stock.fast_info

    #     if info['currency'] == 'USD':
    #         multi_data[ticker] = multi_data[ticker] * exchange_rate
    
    return #multi_data

start = '1980-11-09'
end = '2024-11-23'
cutoff = '1989-01-01'

get_close_prices(start, end, valid_tickers, cutoff)

[*********************100%***********************]  36 of 36 completed


Ticker             ABT        AIG         AXP          BA        BAC  \
Date                                                                   
1985-01-01    1.325660  62.472427    5.519815    9.240741   4.687500   
1985-02-01    1.343196  61.313580    5.503722    9.703704   4.468750   
1985-03-01    1.486984  61.840328    5.326702    9.259259   4.593750   
1985-04-01    1.430872  63.736626    5.552001    8.944444   4.968750   
1985-05-01    1.518548  69.952263    5.922134    9.851852   5.187500   
...                ...        ...         ...         ...        ...   
2024-07-01  105.940002  79.230003  253.039993  190.600006  40.310001   
2024-08-01  113.269997  77.050003  258.649994  173.740005  40.750000   
2024-09-01  114.010002  73.230003  271.200012  152.039993  39.680000   
2024-10-01  113.370003  75.879997  270.079987  149.309998  41.820000   
2024-11-01  116.800003  75.379997  292.970001  148.960007  46.080002   

Ticker             BK        BMY          C         CAT        