In [2]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime

## Group Assignment
### Team Number: 4
### Team Member Names: Jacky Xu, Esha Kumar, Jingyi Fang
### Team Strategy Chosen: SAFE

In [3]:
# read the ticker file
tickers = pd.read_csv("Tickers.csv",header=None)
# rename the column as "ticker"
tickers.columns=['ticker']
# create a list that contains all the tickers
ticker_lst = tickers['ticker'].tolist()

len(ticker_lst)

62

In [4]:
# define function call_tickers that takes a list of tickers and return a list of called tickers
def call_tickers(lst):
    # create an empty list to store the called tickers
    called_ticker_lst = []
    # write a for loop to iterate the ticker list
    for i in lst:
        # call each ticker and append it to the called ticker list
        called_ticker_lst.append(yf.Ticker(i))
    # return the called ticker list when finish
    return called_ticker_lst

# defein function filter_USD that takes a list of called tickers and return a list of only US listed tickers
def filter_USD(lst):
    # create a new list to store the us tickers
    usd_lst = []
    # write a for loop that iterates the called ticker list
    for i in lst:
        # use try and except to ignore the delisted tickers
        try:
            # if the currency of the stock is "USD"
            if i.info['currency'] == 'USD':
                # append the ticker to the usd list
                usd_lst.append(i)
        except:
            pass
    # return the usd list
    return usd_lst

# define function filter_daily_volumn that takes a called ticker list, start date, end date, 
# and returns a list of tickers with an average daily_volume of at least 10000 shares in that time period
def filter_daily_volume(lst,start,end):
    # write a for loop to iterate the called ticker list
    for i in lst:
        # extract the volume of the stock in the given time period and see if the avergae is less than 10000
        if i.history(start = start, end = end,interval='1d')['Volume'].mean() < 10000:
            # if yes, remove the ticker from the list
            lst.remove(i)
        # return the list with unqualified tickers removed
        return lst


    
def get_prices(lst,start,end):
    prices = pd.DataFrame()
    for i in lst:
        prices[str(i)[24:-1]] = i.history(start = start,end=end,interval = '1d')['Close']
        
    return prices

def get_monthly_returns(prices):
    monthly_returns = prices.resample('MS').first().pct_change()
    monthly_returns = monthly_returns.iloc[1:]
    return monthly_returns
    

def get_std(prices):
    std = pd.DataFrame(columns = ('ticker','std'))
    index = 0
    for i in prices.columns:
        std.loc[index] = [i, prices[i].std()]
        index += 1
    return std

def rank(df,column_name,method):
    df[column_name+'_rank'] = df[column_name].rank(method=method)
    return df

In [5]:
# Getting the Symbol (Ticker) of each Stock
def get_ticker_Symbol (lst):
    
    ticker_lst = []
    for ticker in lst:
        ticker_lst.append(ticker.info['symbol'])
    return ticker_lst

In [6]:
# Calculating Beta
def get_beta(good_tickers, prices, start_date, end_date, ticker_symbols):
    Ticker = '^GSPC'
    MarketIndex = yf.Ticker(Ticker) # The symbol yfinance uses for the S&P 500

    MarketIndex_hist = MarketIndex.history(start=start_date, end=end_date)

    # DataFrame for Market Index
    marketDF = pd.DataFrame(MarketIndex_hist['Close'])
    marketDF.columns = [Ticker]
    

    # Loop iterates through the column of prices
    betaList = []
    for i in range(len(ticker_symbols)):
        currentStockPrice = prices[ticker_symbols[i]]
        currentStockPrice = pd.concat([currentStockPrice, marketDF], join = 'inner', axis=1)
        
        # Getting the Monthly Return of each stock
        monthly_returns = currentStockPrice.resample('MS').first().pct_change()  # Dropping the first entry (since it's N/A)
        monthly_returns.drop(index=monthly_returns.index[0], inplace=True)
        
        
        # Calculate the market variance (you will need to reference the column correponding to the market)
        MarketVar = monthly_returns[Ticker].var()
        
        betaList.append(monthly_returns.cov() / MarketVar)
        
    # Filter through betaList and extract the beta for each stock
    for i in range(len(betaList)):
        betaList[i] = betaList[i].iat[0,1]
    
    # Creating a DataFrame for Tickers and their Beta value
    beta = pd.DataFrame(columns = ('ticker','beta'))
    for i in range(len(good_tickers)):
        beta.loc[i] = [str(good_tickers[i])[24:-1], betaList[i]]

    return beta

In [9]:
start_date = "2021-07-02"
end_date = "2021-10-22"


# call the tickers
called_tickers = call_tickers(ticker_lst)

# filter the usd tickers
usd_tickers = filter_USD(called_tickers)

# filter the tickers with required average daily volumn
good_tickers = filter_daily_volume(usd_tickers, start_date, end_date)

# Get a list of Ticker Symbols (list of Strings)
ticker_symbols = get_ticker_Symbol (good_tickers)

# extract the closing prices for the good tickers
prices = get_prices(good_tickers, start_date, end_date)

# calculate the std according to the closing prices
#std = get_std(prices)

# rank them in ascending order
#std = rank(std,'std','min')

# calculate the beta according to the closing prices
beta = get_beta(good_tickers, prices, start_date, end_date, ticker_symbols)

# rank them in ascending order
#beta = rank(beta,'beta','min')

In [10]:
std = get_std(prices)
std = rank(std,'std','min')
std.head()

Unnamed: 0,ticker,std,std_rank
0,AAPL,3.650374,26.0
1,ABBV,4.474768,31.0
2,ABT,3.891702,27.0
3,ACN,11.649347,48.0
4,AIG,4.139379,30.0


In [11]:
beta = get_beta(good_tickers, prices, start_date, end_date, ticker_symbols)
beta = rank(beta,'beta','min')
beta.head()

Unnamed: 0,ticker,beta,beta_rank
0,AAPL,1.764481,44.0
1,ABBV,0.136363,17.0
2,ABT,2.021355,50.0
3,ACN,1.60196,40.0
4,AIG,1.535149,38.0


In [15]:
std_beta_rank = pd.concat([std,beta['beta'],beta['beta_rank']],join='inner',axis=1)
std_beta_rank['final_rank'] = std_beta_rank['std_rank']+std_beta_rank['beta_rank']
std_beta_rank = std_beta_rank.sort_values(by='final_rank')
std_beta_rank = std_beta_rank[:40]

In [22]:
def pair_by_corr(rank, prices):
    pairs = pd.DataFrame()
    counter = 0
    while counter < 10:

        min_corr = 1
        self = rank.iloc[0,0]
        self_monthly_returns = pd.DataFrame(get_monthly_returns(prices[self]))
        for i in range (1,len(rank)-1):
            candidate = rank.iloc[i,0]
            candidate_monthly_returns = pd.DataFrame(get_monthly_returns(prices[candidate]))
            corr = self_monthly_returns[self].corr(candidate_monthly_returns[candidate])
            if corr <= min_corr:
                min_corr = corr
                champion = candidate
                champion_monthly_returns = candidate_monthly_returns
        pairs[self+'_price'] = prices[self].resample('MS').first()
        pairs[self+'_return'] = self_monthly_returns[self]
        pairs[champion+'_price'] = prices[champion].resample('MS').first()
        pairs[champion+'_return'] = champion_monthly_returns[champion]
        rank = rank[1:]
        rank = rank[rank['ticker'] != str(champion)]
        counter += 1
    return pairs
            
        
        

In [23]:
pairs = pair_by_corr(std_beta_rank,prices)

Unnamed: 0_level_0,KMI_price,KMI_return,AAPL_price,AAPL_return,T_price,T_return,MO_price,MO_return,MON_price,MON_return,...,PM_price,PM_return,BAC_price,BAC_return,ABBV_price,ABBV_return,CVS_price,CVS_return,AXP_price,AXP_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-07-01,18.005268,,139.546967,,28.158052,,46.675827,,9.655,,...,99.07695,,40.940205,,112.535881,,81.460457,,168.082153,
2021-08-01,17.050467,-0.053029,145.090546,0.039726,27.574768,-0.020715,46.872234,0.004208,9.705,0.005179,...,98.810242,-0.002692,37.766464,-0.077521,114.067108,0.013607,81.933922,0.005812,168.85025,0.00457
2021-09-01,15.908519,-0.066975,152.287735,0.049605,26.672285,-0.032729,49.180004,0.049235,9.68,-0.002576,...,102.297195,0.035289,40.98,0.08509,110.925194,-0.027544,85.214851,0.040044,164.032227,-0.028534
2021-10-01,16.646847,0.046411,142.442108,-0.064651,26.642857,-0.001103,45.860001,-0.067507,9.76,0.008264,...,96.080002,-0.060776,43.080002,0.051245,107.783287,-0.028325,83.584328,-0.019134,173.508667,0.057772


## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.