In [9]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
import random
from datetime import datetime

In [10]:

# valid_stocks(tickers_file) reads in a given tickers file and produces a list of tickers
#                            that are valid according to restrictions such as currency and 
#                            average monthly volume.
# tickers_file: csv file with tickers 
def valid_stocks(tickers_file):
    # Read CSV and get tickers
    tickers_df = pd.read_csv(tickers_file)
    tickers_df.columns = (['Tickers'])
    tickers_list = tickers_df['Tickers'].tolist()

    # Start and end dates
    start = '2023-10-01'
    end = '2024-09-30'

    valid_tickers = []

    for ticker in tickers_list:
        # Loads in ticker info from yfinance
        stock = yf.Ticker(ticker)
        info = stock.fast_info 

        # filter ticker by currency
        try:
            currency = info['currency']
        except:
            continue

        if currency != 'USD' and currency != 'CAD':
            continue

        #filter ticker by average monthly volume
        try:
            hist = stock.history(start=start, end=end, interval='1d')
        except:
            continue
        monthly_volume = pd.DataFrame()
        monthly_volume['volume'] = hist['Volume'].resample('ME').sum()
        monthly_volume['count'] = hist['Volume'].resample('ME').count()
        monthly_volume['avg monthly volume'] = monthly_volume['volume'] / monthly_volume['count']
        invalid_trading_days = monthly_volume[monthly_volume['count'] < 18]
        invalid_monthly_vol = monthly_volume[monthly_volume['avg monthly volume'] < 100000]

        if len(invalid_monthly_vol) > 0 or len(invalid_trading_days) > 0:
            continue


        valid_tickers.append(ticker)

    return valid_tickers

valid_tickers = valid_stocks('Tickers_Example.csv')

$AGN: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$CELG: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$MON: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")
$RTN: possibly delisted; no price data found  (period=5d) (Yahoo error = "No data found, symbol may be delisted")


In [14]:
#get_close_prices(start, end, tickers, cutoff) retrieves all close prices for stocks in tickers from a start date
#                                              to an end date. It also takes in a cutoff that excludes all stocks
#                                              that do not have close price data before this cutoff date. Function 
#                                              will return all close prices for the valid stocks starting from the 
#                                              date at which the youngest valid stock began tracking close prices.
# Example: get_close_prices('2020-01-01', '2024-01-01', ['AAPL', 'NVDA'], '2022-01-01')
# Restrictions:
#       * start < cutoff < end
def get_close_prices(start, end, tickers, cutoff):

    multi_data = pd.DataFrame()
    df = []
    appended_tickers = []

    # loop through tickers 
    for ticker in tickers:
        # get all data and put into a series
        data = yf.download(ticker, start=start, end=end, interval='1d')
        close = data['Close']
        close = close.rename(ticker)

        # if the first close price is less than cutoff
        if close.index.min() < pd.Timestamp(cutoff):
            # add stock close prices to df
            df.append(close)
            appended_tickers.append(ticker)

    # create df with all the data
    multi_data= pd.concat(df, axis=1)
    # drop all values so that there are valid data points for each date in the index
    multi_data.dropna(subset=appended_tickers, inplace=True)
    
    return multi_data

start = '2014-11-09'
end = '2024-11-09'
cutoff = '2015-01-01'

get_close_prices(start, end, valid_tickers, cutoff)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Unnamed: 0_level_0,ABBV,ABT,ACN,AIG,AMZN,AXP,BA,BAC,BB.TO,BIIB,...,PM,QCOM,RY.TO,T.TO,TD.TO,TXN,UNH,UNP,UPS,USB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-11-10,63.790001,44.090000,84.160004,54.360001,15.255500,92.400002,124.639999,17.370001,12.63,327.869995,...,88.419998,69.360001,81.580002,20.674999,56.340000,51.560001,94.750000,120.900002,109.050003,43.930000
2014-11-11,63.849998,44.250000,83.940002,54.360001,15.600500,91.739998,125.360001,17.320000,12.48,324.589996,...,88.089996,69.639999,81.790001,20.750000,56.450001,51.320000,95.690002,121.239998,108.760002,44.029999
2014-11-12,63.759998,44.230000,84.120003,54.349998,15.575500,91.550003,125.870003,17.290001,12.76,321.019989,...,87.900002,70.290001,82.250000,20.920000,57.000000,51.279999,95.680000,120.709999,108.620003,43.939999
2014-11-13,63.630001,44.590000,84.440002,54.500000,15.824000,91.070000,128.529999,17.219999,13.74,317.980011,...,87.860001,70.650002,82.150002,21.160000,57.049999,51.180000,95.900002,120.250000,107.790001,43.930000
2014-11-14,64.000000,43.810001,84.639999,53.900002,16.391001,90.669998,128.860001,17.139999,12.64,305.429993,...,86.279999,70.849998,82.209999,21.375000,57.060001,51.630001,95.110001,120.709999,106.830002,43.730000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-04,200.470001,117.650002,343.899994,76.330002,195.779999,270.190002,155.070007,41.330002,3.16,173.520004,...,130.330002,165.179993,170.029999,21.820000,76.540001,203.660004,557.770020,229.910004,133.000000,47.189999
2024-11-05,201.679993,117.730003,345.489990,75.389999,199.500000,275.940002,151.000000,41.880001,3.16,176.929993,...,132.190002,165.910004,170.419998,21.610001,77.089996,202.460007,567.030029,233.869995,133.839996,47.610001
2024-11-06,201.199997,116.800003,356.130005,77.019997,207.089996,295.160004,147.160004,45.410000,3.20,174.399994,...,125.309998,172.990005,171.100006,21.049999,78.769997,214.970001,596.690002,248.490005,133.919998,51.259998
2024-11-07,200.509995,115.300003,360.709991,76.589996,210.050003,286.820007,150.979996,44.770000,3.21,173.919998,...,123.779999,172.910004,172.660004,21.020000,78.739998,217.539993,605.450012,242.559998,134.149994,49.560001
