In [2]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime
from dateutil.relativedelta import relativedelta

## Group Assignment
### Team Number: 05
### Team Member Names: Piero Camposeo, Sathun Suthakaran, Ishaan Bansal
### Team Strategy Chosen: SAFE

*Abstract and explanation go here*

In [3]:
#Read in csv file of tickers
csv_file = "Tickers.csv"

# Make values into a list
tickers = pd.read_csv(csv_file)
ticker_list = tickers.iloc[:, 0].values.tolist()
ticker_list.insert(0, tickers.columns[0])

In [4]:
#Consumes desired amount of trading days within a month, a dataframe of days, a dstaframe of months, and a list of months that cooresponds to the months
#    on the dataframe, outputs the monthly dataframe with only months that have 'daycount' amount of trading days for that stock
#Note: Function will cover up to the last date. For example, the function will not produce October for a dataframe which ends on October 1st.
#   In our code, we simply passed a days dataframe that ends one month after the months dataframe does (*)
def df_with_valid_months(daycount, df_days, df_months, months):
    for month in range (len(months)):
        if len(df_days.filter(like = months[month], axis = 0)) < daycount:
            df_months.drop(df_months.index[month], axis = 0, inplace = True) #Drop any months with not enough trading days
            
    return(df_months)

#Consumes historical data for a stock, outputs average volume
def average_volume(df_hist):
    volume = df_hist['Volume'].dropna().values.tolist()
    vol_av = np.mean(volume)

    return(vol_av)


In [5]:
#Consumes a list of tickers, outputs tickers that:
#   a) Are US listed
#   b) Have an average monthly volume of 200 000 shares
def filter_stocks(ticker_list):
    #dates for volume calculation, end date for valid months function (see (*))
    vol_date_s = '2022-01-01'
    vol_date_e = '2022-10-02'
    endDForValidMonthsFunct = str((pd.to_datetime(vol_date_e) + relativedelta(months = 1)).strftime("%Y-%m-%d"))

    #As outlined in assignment information
    minimumMonthlyTradingDays = 20
    minimum_volume = 200000

    #Here we will put valid stocks that fit our criteria
    newlist = []
    
    #for df_with_valid_months function, created a series of months from vol_date_s to vol_date_e
    months = pd.date_range(vol_date_s, vol_date_e, freq='MS').strftime("%Y-%m").tolist()

    #Checks for criteria needed to be a valid ticker
    for ticker in ticker_list:
        tick = yf.Ticker(ticker)
        tick_hist = tick.history(start = vol_date_s, end  = vol_date_e, interval = '1mo').dropna()
        
        try:
            #Custom dataframe for valid months check (daily, ending one month after monthly dataframe)
            tickHistForValidMonthsCheck = tick.history(start = vol_date_s, end  = endDForValidMonthsFunct, interval = '1d').dropna()
            #Dataframe with valid months only
            tickHistWithValidMonths = df_with_valid_months(minimumMonthlyTradingDays, tickHistForValidMonthsCheck, tick_hist, months)

            monthly_average_volume = 0 #Initialize this to 0 to prevent errors
            monthly_average_volume = average_volume(tickHistWithValidMonths)

            currency = tick.info['financialCurrency']
        
            #Check for criteria
            if (currency == 'USD' and monthly_average_volume >= minimum_volume):
                    newlist.append(ticker)
        #Function will throw an error if the stock isn't valid due to the attempt to call on its .info
        except:
            print(f"Error retrieving {ticker}.")

    return(newlist)
    
#Final list of valid tickers
ticker_list = filter_stocks(ticker_list)

- AGN: No data found, symbol may be delisted
- AGN: No data found, symbol may be delisted
Error retrieving AGN.
- CELG: No data found, symbol may be delisted
- CELG: No data found, symbol may be delisted
Error retrieving CELG.
- PCLN: No data found for this date range, symbol may be delisted
- PCLN: No data found for this date range, symbol may be delisted
Error retrieving PCLN.
- RTN: No data found, symbol may be delisted
- RTN: No data found, symbol may be delisted
Error retrieving RTN.
- TWX: No data found for this date range, symbol may be delisted
- TWX: No data found for this date range, symbol may be delisted
Error retrieving TWX.


In [None]:
#Sathun

In [None]:
#Ishaan
# Consumes a Dataframe of a list of stocks and their daily percent returns and produces a list of the 25 least volatile stocks
def lowest_volatile_stocks(pr_df):
    
    all_stock_vol = []   # List to store the volatility of the stocks in the dataframe
    time_period = len(pr_df)   # variable to store the number of data points available/time period

    for i in range(len(pr_df.columns)):   # Performs the operations for every column which is a stock
        stock_std = pr_df.iloc[:,i].std()   # Calculates the std of the stock's daily percent return
        stock_vol = stock_std * time_period**1/2   # Calculates the volatility of the stock
        
        all_stock_vol.append(stock_vol)   # Stores the volatility of the stock in the list
    
    all_stock_vol_df = pd.DataFrame({"Stocks": list(pr_df.columns), "Volatility": all_stock_vol,})   # Creates a dataframe that will store each stock and their volatility
    all_stock_vol_df.sort_values(by = "Volatility", inplace = True)   # Sorts the dataframe in non-decreasing order of volatility

    portfolio = list(all_stock_vol_df['Stocks'].head(25))   # Creates a list of the 25 least volatile stocks in the DataFrame provided
    
    return portfolio  # Returns the list of the 25 least volatile stocks

In [None]:
# Consumes a list of tickers and a DataFrame. The requirements of the DataFrame are:
#   a) df is either a DataFrame of closing prices of stocks with the stock name as the column header or a DataFrame of percent returns of stocks with the stock name as the column header
#   b) df must include all of the tickers in list_of_ticks
# The function returns a dataframe of the stocks from list_of_ticks and either their closing prices or daily percent returns, depending on what was inputted, with the stock name as the column header
def lowest_volatile_stocks_df_filter(list_of_ticks, df):

    final_df = pd.DataFrame({list_of_ticks[0]: df[list_of_ticks[0]],})  # Creates the dataframe to store the data and creates the first column with the first stock's data from list_of_ticks

    for i in range(1,25):   # Performs the operations for every ticker in list_of_ticks, excluding the first ticker
        final_df[list_of_ticks[i]] = df[list_of_ticks[i]]   # Creates a new column with the stock's data and the stock name as the header

    return final_df    # Returns the final dataframe with all the tickers from list_of_ticks and their data from df

In [6]:
ticker_list

['AAPL',
 'ABBV',
 'ABT',
 'ACN',
 'AIG',
 'AMZN',
 'AXP',
 'BA',
 'BAC',
 'BIIB',
 'BK',
 'BLK',
 'BMY',
 'C',
 'CAT',
 'CL',
 'CMCSA',
 'COF',
 'COP',
 'COST',
 'CSCO',
 'CVS',
 'GM',
 'GOOG',
 'JPM',
 'KMI',
 'KO',
 'LLY',
 'LMT',
 'MO',
 'MON',
 'MRK',
 'MS',
 'MSFT',
 'NEE',
 'NKE',
 'ORCL',
 'OXY',
 'PEP',
 'PFE',
 'PG',
 'PM',
 'PYPL',
 'QCOM',
 'SBUX',
 'SLB',
 'SO',
 'SPG',
 'T',
 'TGT',
 'TXN',
 'UNH',
 'UNP',
 'UPS',
 'USB']

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Piero Camposeo, Sathun Suthakaran, Ishaan Bansal