In [53]:
from IPython.display import display, Math, Latex

import pandas as pd
import numpy as np
import numpy_financial as npf
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime

## Group Assignment
### Team Number: 4
### Team Member Names: Jacky Xu, Esha Kumar, Jingyi Fang
### Team Strategy Chosen: SAFE

In [3]:
# read the ticker file
tickers = pd.read_csv("Tickers.csv",header=None)
# rename the column as "ticker"
tickers.columns=['ticker']
# create a list that contains all the tickers
ticker_lst = tickers['ticker'].tolist()

len(ticker_lst)

62

In [68]:
# define function call_tickers that takes a list of tickers and return a list of called tickers
def call_tickers(lst):
    # create an empty list to store the called tickers
    called_ticker_lst = []
    # write a for loop to iterate the ticker list
    for i in lst:
        # call each ticker and append it to the called ticker list
        called_ticker_lst.append(yf.Ticker(i))
    # return the called ticker list when finish
    return called_ticker_lst

# defein function filter_USD that takes a list of called tickers and return a list of only US listed tickers
def filter_USD(lst):
    # create a new list to store the us tickers
    usd_lst = []
    # write a for loop that iterates the called ticker list
    for i in lst:
        # use try and except to ignore the delisted tickers
        try:
            # if the currency of the stock is "USD"
            if i.info['currency'] == 'USD':
                # append the ticker to the usd list
                usd_lst.append(i)
        except:
            pass
    # return the usd list
    return usd_lst

# define function filter_daily_volumn that takes a called ticker list, start date, end date, 
# and returns a list of tickers with an average daily_volume of at least 10000 shares in that time period
def filter_daily_volume(lst,start,end):
    # write a for loop to iterate the called ticker list
    for i in lst:
        # extract the volume of the stock in the given time period and see if the avergae is less than 10000
        if i.history(start = start, end = end,interval='1d')['Volume'].mean() < 10000:
            # if yes, remove the ticker from the list
            lst.remove(i)
        # return the list with unqualified tickers removed
        return lst


    
def get_prices(lst,start,end):
    prices = pd.DataFrame()
    for i in lst:
        prices[str(i)[24:-1]] = i.history(start = start,end=end,interval = '1d')['Close']
        
    return prices

def get_monthly_returns(prices):
    monthly_returns = prices.resample('MS').first().pct_change()
    monthly_returns = monthly_returns.iloc[1:]
    return monthly_returns
    

def get_std(prices):
    std = pd.DataFrame(columns = ('ticker','std'))
    index = 0
    for i in prices.columns:
        std.loc[index] = [i, prices[i].std()]
        index += 1
    return std

def rank(df,column_name,method):
    df[column_name+'_rank'] = df[column_name].rank(method=method)
    return df

def get_std_beta_rank(std,beta):
    rank = pd.concat([std,beta['beta'],beta['beta_rank']],join='inner',axis=1)
    rank['final_rank'] = rank['std_rank'] + rank['beta_rank']
    rank = rank.sort_values(by='final_rank')
    rank = rank[:40]
    return std_beta_rank

In [5]:
# Getting the Symbol (Ticker) of each Stock
def get_ticker_Symbol (lst):
    
    ticker_lst = []
    for ticker in lst:
        ticker_lst.append(ticker.info['symbol'])
    return ticker_lst

In [6]:
# Calculating Beta
def get_beta(good_tickers, prices, start_date, end_date, ticker_symbols):
    Ticker = '^GSPC'
    MarketIndex = yf.Ticker(Ticker) # The symbol yfinance uses for the S&P 500

    MarketIndex_hist = MarketIndex.history(start=start_date, end=end_date)

    # DataFrame for Market Index
    marketDF = pd.DataFrame(MarketIndex_hist['Close'])
    marketDF.columns = [Ticker]
    

    # Loop iterates through the column of prices
    betaList = []
    for i in range(len(ticker_symbols)):
        currentStockPrice = prices[ticker_symbols[i]]
        currentStockPrice = pd.concat([currentStockPrice, marketDF], join = 'inner', axis=1)
        
        # Getting the Monthly Return of each stock
        monthly_returns = currentStockPrice.resample('MS').first().pct_change()  # Dropping the first entry (since it's N/A)
        monthly_returns.drop(index=monthly_returns.index[0], inplace=True)
        
        
        # Calculate the market variance (you will need to reference the column correponding to the market)
        MarketVar = monthly_returns[Ticker].var()
        
        betaList.append(monthly_returns.cov() / MarketVar)
        
    # Filter through betaList and extract the beta for each stock
    for i in range(len(betaList)):
        betaList[i] = betaList[i].iat[0,1]
    
    # Creating a DataFrame for Tickers and their Beta value
    beta = pd.DataFrame(columns = ('ticker','beta'))
    for i in range(len(good_tickers)):
        beta.loc[i] = [str(good_tickers[i])[24:-1], betaList[i]]

    return beta

In [9]:
start_date = "2021-07-02"
end_date = "2021-10-22"


# call the tickers
called_tickers = call_tickers(ticker_lst)

# filter the usd tickers
usd_tickers = filter_USD(called_tickers)

# filter the tickers with required average daily volumn
good_tickers = filter_daily_volume(usd_tickers, start_date, end_date)

# Get a list of Ticker Symbols (list of Strings)
ticker_symbols = get_ticker_Symbol (good_tickers)

# extract the closing prices for the good tickers
prices = get_prices(good_tickers, start_date, end_date)

# calculate the std according to the closing prices
std = get_std(prices)

# rank them in ascending order
std = rank(std,'std','min')

# calculate the beta according to the closing prices
beta = get_beta(good_tickers, prices, start_date, end_date, ticker_symbols)

# rank them in ascending order
beta = rank(beta,'beta','min')

# combine the rank of std and beta, then rank the combined rank
std_beta_rank = get_std_beta_rank(std,beta)

In [72]:
def pair_by_corr(rank, prices):
    pairs = pd.DataFrame()
    counter = 0
    while counter < 10:

        min_corr = 1
        self = rank.iloc[0,0]
        self_monthly_returns = pd.DataFrame(get_monthly_returns(prices[self]))
        for i in range (1,len(rank)-1):
            candidate = rank.iloc[i,0]
            candidate_monthly_returns = pd.DataFrame(get_monthly_returns(prices[candidate]))
            corr = self_monthly_returns[self].corr(candidate_monthly_returns[candidate])
            if corr <= min_corr:
                min_corr = corr
                champion = candidate
                champion_monthly_returns = candidate_monthly_returns
        pairs[self+'_price'] = prices[self].resample('MS').first()
        pairs[self+'_return'] = self_monthly_returns[self]
        pairs[champion+'_price'] = prices[champion].resample('MS').first()
        pairs[champion+'_return'] = champion_monthly_returns[champion]
        rank = rank[1:]
        rank = rank[rank['ticker'] != str(champion)]
        counter += 1
    return pairs
            
        
        

In [73]:
pairs = pair_by_corr(std_beta_rank,prices)

In [74]:
def get_w1(stock1, stock2, pairs):
    min_ratio = 100
    stock1_weight = 0
    stock1_shares = 500000 / pairs[stock1+'_price'][0]
    stock2_shares = 500000 / pairs[stock2+'_price'][0]
    for i in range(0,101):
        new_portfolio = pd.DataFrame()
        new_portfolio['value'] = (pairs[stock1+"_price"]*stock1_shares)*(i/100)+(pairs[stock2+"_price"]*stock2_shares)*(1-i/100)
        new_portfolio['return'] = new_portfolio['value'].pct_change()*100
        sharpe_ratio = new_portfolio['return'].mean()/new_portfolio['return'].std()
        if sharpe_ratio <= min_ratio:
            min_ratio = sharpe_ratio
            stock1_weight = i
    return stock1_weight

def get_range(stock1_weight):
    if stock1_weight >= 50:
        range_max = 35/stock1_weight
        range_min = 2.5/(100-stock1_weight)
    else:
        range_max = 2.5/stock1_weight,0
        range_min = 35/(100-stock1_weight)
    return math.ceil(range_min*100),math.floor(range_max*100)
    

In [75]:
w1 = get_w1('KMI','AAPL',pairs)
range_min,range_max = get_range(w1)
range_min,range_max

(6, 64)

## Contribution Declaration

The following team members made a meaningful contribution to this assignment:

Insert Names Here.