In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from yfinance import EquityQuery

import time
from datetime import datetime
from dateutil.relativedelta import relativedelta


pd.set_option('display.max_rows', 50)

In [2]:
# dat = yf.Ticker("2YY=F")
# dat.info




#US stocks
# exchanges = ['NMS', 'NYQ', 'NGM']
# countries = ['us']

# selected_stocks_dict, ticker_list = get_tickers_stocks(countries, 5000000, exchanges, 120)

# df_us = get_close_prices(ticker_list, period = 2)
# df_us

In [27]:
#Here we first create a filter with EquityQuery and then use it in the yf.screen() function
def get_tickers_stocks(min_dayvolume, exchanges, n):

    #markets = ['region'] + markets
    exchanges = ['exchange'] + exchanges
    
    q = EquityQuery('and', [            #EquityQuery('is-in', markets), #remove the notion of a region
                  EquityQuery('is-in', exchanges),
                  EquityQuery('gt', ['dayvolume', min_dayvolume])
    ])


    response = yf.screen(q, sortField = 'lastclosemarketcap.lasttwelvemonths', sortAsc = False, size=n) #select top 100 companies by market cap


    selected_stocks = {}
    ticker_list = []
    for stock in response['quotes']:
        ticker = stock['symbol']
        ticker_list.append(stock['symbol'])
        selected_stocks[ticker] = {} #initialize the new sub dictionary
        try:
            selected_stocks[ticker]['name'] = stock['shortName']
            selected_stocks[ticker]['type'] = stock['quoteType']
            selected_stocks[ticker]['exchange'] = stock['fullExchangeName']
        except:
            continue

    return selected_stocks, ticker_list





def get_close_prices(ticker_list, period = 2, start = '2022-01-01'):
    
    date_obj = datetime.strptime(start, '%Y-%m-%d')
    end = date_obj + relativedelta(years=period)
    enddate = end.strftime('%Y-%m-%d')

    df = yf.download(ticker_list, start=start, end=enddate)
    df_close = df['Close']

    df_return = df_close.dropna(how='all').copy()

    #Drop tickers that have more that 10% missing data
    df_return = df_return.drop(columns=df_return.columns[df_return.isna().mean() >= 0.1])
        
    #Impute missing data with the previous price (for eu and asia mostly)
    df_cleaned = df_return.ffill().copy()

    
    return df_cleaned





def double_listed_stocks(full_stocks_dict):
    
    company_names = []
    duplicated_tickers = []
    for ticker, sub_dict in full_stocks_dict.items():

        try:
            name = sub_dict['name']
        except:
            print(f'There is no name found in dict for {ticker}')

        if name not in company_names:
            company_names.append(name)
        else:
            duplicated_tickers.append(ticker)
    
    return duplicated_tickers




#Sharpe Ratio calculation

def sharpe_ratio_calculation(df, rf_rate_annual = 0.02, ):
    df_pct_change = df.pct_change()

    avg_return = df_pct_change.mean()
    sigma = df_pct_change.std()

    return_annual = avg_return * 252
    sigma_annual = sigma * np.sqrt(252)

    sharpe_ratio = (return_annual - rf_rate_annual) / sigma_annual

    return sharpe_ratio

In [4]:
#GET THE STOCKS

us_exchanges = ['NMS', 'NYQ', 'NGM']
eu_exchanges = ['PAR', 'FRA', 'LSE', 'AMS']
asia_exchanges = ['SHH', 'JPX', 'HKG']

selected_exchanges = us_exchanges + eu_exchanges + asia_exchanges

full_selected_stocks = {}
df_all = pd.DataFrame()
for exchange in selected_exchanges:
    print(f'Extracting from {exchange}')
    exchanges = [exchange]
    selected_stocks_dict, ticker_list = get_tickers_stocks(50000, exchanges, 50)

    full_selected_stocks.update(selected_stocks_dict)

    if len(ticker_list) > 0: 
        print('YES')
        df = get_close_prices(ticker_list, period = 2, start = '2022-01-01')
        df_all = pd.concat([df_all, df], axis=1)

doubly_listed_tickers = double_listed_stocks(full_selected_stocks)

for ticker_to_drop in doubly_listed_tickers:
    try:
        df_all = df_all.drop(columns=[ticker_to_drop])
    except:
        pass


df_all = df_all.ffill() #ffill again after concatenating the tickers

Extracting from NMS


[******                12%                       ]  6 of 50 completed

YES
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  50 of 50 completed

1 Failed download:
['KSPI']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-01-01 -> 2024-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1641013200, endDate = 1704085200")')


Extracting from NYQ
YES


[*********************100%***********************]  50 of 50 completed


Extracting from NGM
YES


[*********************100%***********************]  50 of 50 completed

2 Failed downloads:
['SOUNW']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-01-01 -> 2024-01-01)')
['DJTWW']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-01-01 -> 2024-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1641013200, endDate = 1704085200")')
[**                     4%                       ]  2 of 50 completed

Extracting from PAR
YES


[*********************100%***********************]  50 of 50 completed

8 Failed downloads:
['ACALM.PA', 'ACAKP.PA', 'ACAKV.PA', 'ACALG.PA', 'ACAKR.PA', 'ACALB.PA', 'CNPAP.PA', 'ACAKZ.PA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-01-01 -> 2024-01-01)')


Extracting from FRA
YES


[*********************100%***********************]  50 of 50 completed

3 Failed downloads:
['7D0.F', 'D7G.F', 'I41.F']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-01-01 -> 2024-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1640991600, endDate = 1704063600")')
[**                     4%                       ]  2 of 50 completed

Extracting from LSE
YES


[*********************100%***********************]  50 of 50 completed

1 Failed download:
['0QYR.L']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-01-01 -> 2024-01-01)')


Extracting from AMS
YES


[*********************100%***********************]  50 of 50 completed

6 Failed downloads:
['NL0000120004.AS', 'NL0011220108.AS', 'NL0015000QL2.AS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-01-01 -> 2024-01-01)')
['THEON.AS', 'HAVAS.AS', 'CVC.AS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-01-01 -> 2024-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1640991600, endDate = 1704063600")')
[                       0%                       ]

Extracting from SHH
YES


[*********************100%***********************]  50 of 50 completed


Extracting from JPX
YES


[*********************100%***********************]  50 of 50 completed
[                       0%                       ]

Extracting from HKG
YES


[*********************100%***********************]  50 of 50 completed

2 Failed downloads:
['0300.HK', '6936.HK']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-01-01 -> 2024-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1640966400, endDate = 1704038400")')


There is no name found in dict for SOUNW


In [None]:
#GETTING CRYPTOS FROM COINBASE 50 INDEX
#https://www.marketvector.com/factsheets/download/COIN50.d.pdf

coinbase_50_cryptos = ['BTC', 'ETH', 'XRP', 'SOL', 'DOGE', 'ADA', 'LINK', 'XLM', 'AVAX', 'SHIB', 'DOT', 'LTC', 'BCH', 
                       'UNI', 'NEAR', 'PEPE', 'APT', 'ICP', 'ETC', 'AAVE', 'RNDR', 'ATOM', 'MATIC', 'ALGO', 'EOS', 'MKR', 
                       'ASI', 'QNT', 'BONK', 'STX', 'INJ', 'GRT', 'LDO', 'XTZ', 'CRV', 'SAND', 'ZEC', 'HNT', 'JASMY', 'MANA', 
                       'AXS', 'WIF', 'CHZ', 'COMP', 'APE', 'AERO', '1INCH', 'SNX', 'ROSE', 'LPT']

crypto_tickers_fixed = [tick + "-USD" for tick in coinbase_50_cryptos]

cryptos_df = get_close_prices(crypto_tickers_fixed, period = 2, start = '2022-01-01')
cryptos_df