In [16]:
import pandas as pd
import numpy as np
import yfinance as yf
from yfinance import EquityQuery

import time
from datetime import datetime
from dateutil.relativedelta import relativedelta
import random

pd.set_option('display.max_rows', 50)

In [None]:
# dat = yf.Ticker("2YY=F")
# dat.info




#US stocks
# exchanges = ['NMS', 'NYQ', 'NGM']
# countries = ['us']

# selected_stocks_dict, ticker_list = get_tickers_stocks(countries, 5000000, exchanges, 120)

# df_us = get_close_prices(ticker_list, period = 2)
# df_us

In [29]:
#Here we first create a filter with EquityQuery and then use it in the yf.screen() function
def get_tickers_stocks(min_dayvolume, exchanges, n):

    #markets = ['region'] + markets
    exchanges = ['exchange'] + exchanges
    
    q = EquityQuery('and', [            #EquityQuery('is-in', markets), #remove the notion of a region
                  EquityQuery('is-in', exchanges),
                  EquityQuery('gt', ['dayvolume', min_dayvolume])
    ])


    response = yf.screen(q, sortField = 'lastclosemarketcap.lasttwelvemonths', sortAsc = False, size=n) #select top 100 companies by market cap


    selected_stocks = {}
    ticker_list = []
    for stock in response['quotes']:
        ticker = stock['symbol']
        ticker_list.append(stock['symbol'])
        selected_stocks[ticker] = {} #initialize the new sub dictionary
        try:
            selected_stocks[ticker]['name'] = stock['shortName']
            selected_stocks[ticker]['type'] = stock['quoteType']
            selected_stocks[ticker]['exchange'] = stock['fullExchangeName']
        except:
            continue

    return selected_stocks, ticker_list





def get_close_prices(ticker_list, period = 2, start = '2022-01-01'):
    
    date_obj = datetime.strptime(start, '%Y-%m-%d')
    end = date_obj + relativedelta(years=period)
    enddate = end.strftime('%Y-%m-%d')

    df = yf.download(ticker_list, start=start, end=enddate)
    df_close = df['Close']

    df_return = df_close.dropna(how='all').copy()

    #Drop tickers that have more that 10% missing data
    df_return = df_return.drop(columns=df_return.columns[df_return.isna().mean() >= 0.1])
        
    #Impute missing data with the previous price (for eu and asia mostly)
    df_cleaned = df_return.ffill().copy()

    
    return df_cleaned





def double_listed_stocks(full_stocks_dict):
    
    company_names = []
    duplicated_tickers = []
    for ticker, sub_dict in full_stocks_dict.items():

        try:
            name = sub_dict['name']
        except:
            print(f'There is no name found in dict for {ticker}')

        if name not in company_names:
            company_names.append(name)
        else:
            duplicated_tickers.append(ticker)
    
    return duplicated_tickers




#Sharpe Ratio calculation

def sharpe_ratio_calculation(df, rf_rate_annual = 0.02, ):
    df_pct_change = df.pct_change()

    avg_return = df_pct_change.mean()
    sigma = df_pct_change.std()

    return_annual = avg_return * 252
    sigma_annual = sigma * np.sqrt(252)

    sharpe_ratio = (return_annual - rf_rate_annual) / sigma_annual

    return sharpe_ratio




def generate_rand_portfolios(n_reps:int, n_stocks:int, tickers:list):
    random_portfolios = {}
    for i in range(0, n_reps):
        stocks_indices = list()
        stocks_indices = random.sample(tickers, n_stocks)
        random_portfolios[f'portfolio_{i}'] = stocks_indices
        
    return random_portfolios 

In [None]:
#GET THE STOCKS

us_exchanges = ['NMS', 'NYQ', 'NGM']
eu_exchanges = ['PAR', 'FRA', 'LSE', 'AMS']
asia_exchanges = ['SHH', 'JPX', 'HKG']

selected_exchanges = us_exchanges + eu_exchanges + asia_exchanges

full_selected_stocks = {}
df_all = pd.DataFrame()
for exchange in selected_exchanges:
    print(f'Extracting from {exchange}')
    exchanges = [exchange]
    selected_stocks_dict, ticker_list = get_tickers_stocks(50000, exchanges, 50)

    full_selected_stocks.update(selected_stocks_dict)

    if len(ticker_list) > 0: 
        print('YES')
        df = get_close_prices(ticker_list, period = 2, start = '2022-01-01')
        df_all = pd.concat([df_all, df], axis=1)

doubly_listed_tickers = double_listed_stocks(full_selected_stocks)

for ticker_to_drop in doubly_listed_tickers:
    try:
        df_all = df_all.drop(columns=[ticker_to_drop])
    except:
        pass


df_all = df_all.ffill() #ffill again after concatenating the tickers

In [None]:
#GETTING CRYPTOS FROM COINBASE 50 INDEX
#https://www.marketvector.com/factsheets/download/COIN50.d.pdf

coinbase_50_cryptos = ['BTC', 'ETH', 'XRP', 'SOL', 'DOGE', 'ADA', 'LINK', 'XLM', 'AVAX', 'SHIB', 'DOT', 'LTC', 'BCH', 
                       'UNI', 'NEAR', 'PEPE', 'APT', 'ICP', 'ETC', 'AAVE', 'RNDR', 'ATOM', 'MATIC', 'ALGO', 'EOS', 'MKR', 
                       'ASI', 'QNT', 'BONK', 'STX', 'INJ', 'GRT', 'LDO', 'XTZ', 'CRV', 'SAND', 'ZEC', 'HNT', 'JASMY', 'MANA', 
                       'AXS', 'WIF', 'CHZ', 'COMP', 'APE', 'AERO', '1INCH', 'SNX', 'ROSE', 'LPT']

crypto_tickers_fixed = [tick + "-USD" for tick in coinbase_50_cryptos]

cryptos_df = get_close_prices(crypto_tickers_fixed, period = 2, start = '2022-01-01')

print('Number of NA values in cryptos_df is ', df.isna().any().sum())

cryptos_df

In [13]:
df_all

Ticker,AAPL,ADBE,ADI,ADP,AMAT,AMD,AMGN,AMZN,APP,ASML,...,3328.HK,3968.HK,3988.HK,6030.HK,6066.HK,6099.HK,6690.HK,6818.HK,6886.HK,9633.HK
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-03,178.879929,564.369995,167.036636,227.886505,155.430908,150.240005,204.506332,170.404495,94.379997,773.428345,...,3.642475,51.972218,2.146322,17.463108,7.311821,9.706450,30.106386,2.078732,10.809107,50.104034
2022-01-04,176.609680,554.000000,165.527969,228.951202,153.905090,144.419998,205.543777,167.522003,88.610001,755.680542,...,3.688582,52.101391,2.184243,17.719921,7.337659,9.575280,29.553127,2.086182,10.859382,45.203068
2022-01-05,171.911835,514.429993,162.972672,226.999298,149.415054,136.149994,203.108017,164.356995,82.029999,723.171814,...,3.727005,53.177868,2.214580,17.086454,7.242923,9.493300,29.968071,2.056380,10.758832,44.346588
2022-01-06,169.042068,514.119995,163.491302,225.439651,151.941925,136.229996,203.135086,163.253998,82.150002,740.095459,...,3.703951,51.972218,2.199411,17.017973,7.174026,9.378528,30.705746,2.086182,10.658282,44.489330
2022-01-07,169.209152,510.700012,159.201065,223.058167,146.567505,132.000000,205.047607,162.554001,80.449997,733.287170,...,3.757743,54.297401,2.252501,17.163496,7.329047,9.476905,31.074583,2.153238,10.842624,44.203838
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-25,192.444595,598.750000,192.972641,224.810135,160.424896,139.600006,273.615173,153.419998,41.209999,746.537109,...,4.262811,24.267561,2.611336,14.436060,6.240967,5.724939,20.562372,1.983270,8.641578,43.897945
2023-12-26,191.897858,598.260010,194.705582,226.338257,162.632523,143.410004,273.364807,153.410004,40.400002,756.606262,...,4.262811,24.267561,2.611336,14.436060,6.240967,5.724939,20.562372,1.983270,8.641578,43.897945
2023-12-27,191.997269,596.080017,195.175537,225.433075,162.563232,146.070007,275.897217,153.339996,40.650002,757.945557,...,4.344094,24.455683,2.647355,14.511443,6.316501,5.809267,20.851984,1.992164,8.761857,44.143459
2023-12-28,192.424713,595.520020,196.046906,226.289612,161.484146,148.759995,277.755615,153.380005,40.759998,751.814758,...,4.380219,25.537376,2.674369,14.850673,6.486451,5.959184,21.286398,2.018845,8.983910,44.388973


In [None]:
#Random Portfolios generation

tickers = list(df_all.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)
