In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from yfinance import EquityQuery

import cvxpy as cp
import time
from datetime import datetime
from dateutil.relativedelta import relativedelta
import random

pd.set_option('display.max_rows', 50)

In [None]:
# dat = yf.Ticker("2YY=F")
# dat.info




#US stocks
# exchanges = ['NMS', 'NYQ', 'NGM']
# countries = ['us']

# selected_stocks_dict, ticker_list = get_tickers_stocks(countries, 5000000, exchanges, 120)

# df_us = get_close_prices(ticker_list, period = 2)
# df_us

In [None]:
#Here we first create a filter with EquityQuery and then use it in the yf.screen() function
def get_tickers_stocks(min_dayvolume, exchanges, n):

    #markets = ['region'] + markets
    exchanges = ['exchange'] + exchanges
    
    q = EquityQuery('and', [            #EquityQuery('is-in', markets), #remove the notion of a region
                  EquityQuery('is-in', exchanges),
                  EquityQuery('gt', ['dayvolume', min_dayvolume])
    ])


    response = yf.screen(q, sortField = 'lastclosemarketcap.lasttwelvemonths', sortAsc = False, size=n) #select top 100 companies by market cap


    selected_stocks = {}
    ticker_list = []
    for stock in response['quotes']:
        ticker = stock['symbol']
        ticker_list.append(stock['symbol'])
        selected_stocks[ticker] = {} #initialize the new sub dictionary
        try:
            selected_stocks[ticker]['name'] = stock['shortName']
            selected_stocks[ticker]['type'] = stock['quoteType']
            selected_stocks[ticker]['exchange'] = stock['fullExchangeName']
        except:
            continue

    return selected_stocks, ticker_list





def get_close_prices(ticker_list, period = 2, start = '2022-01-01'):
    
    date_obj = datetime.strptime(start, '%Y-%m-%d')
    end = date_obj + relativedelta(years=period)
    enddate = end.strftime('%Y-%m-%d')

    df = yf.download(ticker_list, start=start, end=enddate)
    df_close = df['Close']

    df_return = df_close.dropna(how='all').copy()

    #Drop tickers that have more that 10% missing data
    df_return = df_return.drop(columns=df_return.columns[df_return.isna().mean() >= 0.1])
        
    #Impute missing data with the previous price (for eu and asia mostly)
    df_cleaned = df_return.ffill().copy()

    
    return df_cleaned





def double_listed_stocks(full_stocks_dict):
    
    company_names = []
    duplicated_tickers = []
    for ticker, sub_dict in full_stocks_dict.items():

        try:
            name = sub_dict['name']
        except:
            print(f'There is no name found in dict for {ticker}')

        if name not in company_names:
            company_names.append(name)
        else:
            duplicated_tickers.append(ticker)
    
    return duplicated_tickers




#Sharpe Ratio calculation

def sharpe_ratio_calculation(df, rf_rate_annual = 0.02, ):
    df_pct_change = df.pct_change()

    avg_return = df_pct_change.mean()
    sigma = df_pct_change.std()

    return_annual = avg_return * 252
    sigma_annual = sigma * np.sqrt(252)

    sharpe_ratio = (return_annual - rf_rate_annual) / sigma_annual

    return sharpe_ratio




def generate_rand_portfolios(n_reps:int, n_stocks:int, tickers:list):
    random_portfolios = {}
    for i in range(0, n_reps):
        stocks_indices = list()
        stocks_indices = random.sample(tickers, n_stocks)
        random_portfolios[f'portfolio_{i}'] = stocks_indices
        
    return random_portfolios 

In [None]:
#GET THE STOCKS

us_exchanges = ['NMS', 'NYQ', 'NGM']
eu_exchanges = ['PAR', 'FRA', 'LSE', 'AMS']
asia_exchanges = ['SHH', 'JPX', 'HKG']

selected_exchanges = us_exchanges + eu_exchanges + asia_exchanges

full_selected_stocks = {}
df_all_stocks = pd.DataFrame()
for exchange in selected_exchanges:
    print(f'Extracting from {exchange}')
    exchanges = [exchange]
    selected_stocks_dict, ticker_list = get_tickers_stocks(50000, exchanges, 50)

    full_selected_stocks.update(selected_stocks_dict)

    if len(ticker_list) > 0: 
        print('YES')
        df = get_close_prices(ticker_list, period = 2, start = '2022-01-01')
        df_all_stocks = pd.concat([df_all_stocks, df], axis=1)

doubly_listed_tickers = double_listed_stocks(full_selected_stocks)

for ticker_to_drop in doubly_listed_tickers:
    try:
        df_all_stocks = df_all_stocks.drop(columns=[ticker_to_drop])
    except:
        pass


df_all_stocks = df_all_stocks.ffill() #ffill again after concatenating the tickers

In [None]:
#GETTING CRYPTOS FROM COINBASE 50 INDEX
#https://www.marketvector.com/factsheets/download/COIN50.d.pdf

coinbase_50_cryptos = ['BTC', 'ETH', 'XRP', 'SOL', 'DOGE', 'ADA', 'LINK', 'XLM', 'AVAX', 'SHIB', 'DOT', 'LTC', 'BCH', 
                       'UNI', 'NEAR', 'PEPE', 'APT', 'ICP', 'ETC', 'AAVE', 'RNDR', 'ATOM', 'MATIC', 'ALGO', 'EOS', 'MKR', 
                       'ASI', 'QNT', 'BONK', 'STX', 'INJ', 'GRT', 'LDO', 'XTZ', 'CRV', 'SAND', 'ZEC', 'HNT', 'JASMY', 'MANA', 
                       'AXS', 'WIF', 'CHZ', 'COMP', 'APE', 'AERO', '1INCH', 'SNX', 'ROSE', 'LPT']

crypto_tickers_fixed = [tick + "-USD" for tick in coinbase_50_cryptos]

cryptos_df = get_close_prices(crypto_tickers_fixed, period = 2, start = '2022-01-01')

print('Number of NA values in cryptos_df is ', df.isna().any().sum())

cryptos_df

In [None]:
#Random Portfolios generation

tickers = list(df_all_stocks.columns)

random.seed(42)
random_portfolios = generate_rand_portfolios(n_reps=1000, n_stocks=15, tickers=tickers)

In [None]:
#Get the df of % change
df_pct_change = df_all_stocks.pct_change()
cov_matrix = df_pct_change.cov().values

########
cov_matrix = df_pct_change.cov().values
cov_matrix = (cov_matrix + cov_matrix.T) / 2
# Ensure it's PSD: Add small ridge term to diagonal
cov_matrix += 1e-6 * np.eye(cov_matrix.shape[0])
########


n_assets = len(tickers)
w = cp.Variable(n_assets)

portfolio_variance = cp.quad_form(w, cov_matrix)
objective = cp.Minimize((1/2)*portfolio_variance)

# constraints = [cp.sum(w) == 1, 
#                 w >= 0] 
z = cp.Variable(n_assets, boolean=True)
constraints = [
    cp.sum(w) == 1,  # Fully invested
    w >= 0,  # No shorting
    cp.sum(z) == 15,  # Exactly 15 assets selected
    w <= z * 1e5  # Enforce non-zero weights only for selected assets (using large constant to allow non-zero weight)
]

prob = cp.Problem(objective, constraints)
prob.solve()


In [None]:
optimal_weights = w.value
for ticker, weight in zip(tickers, optimal_weights):
    if weight.round(2) != 0:
        print(f"{ticker}: {weight:.4f}")

In [None]:
#Select top five sharpe ratio portfolios from a portfolio
for name, port in random_portfolios.items():
    portfolio = port
    sharpe_ratio = sharpe_ratio_calculation(df_all_stocks, rf_rate_annual = 0.02)
    sharpe_ratio = sharpe_ratio.apply(lambda x: float(x))

    dict_portfolio = converted = {k: float(v) for k, v in dict(sharpe_ratio[portfolio]).items()} 

    sorted_dict = dict(sorted(dict_portfolio.items(), key=lambda x:x[1], reverse=True))
    top_five = dict(list(sorted_dict.items())[:5])

In [None]:
import cvxpy as cp
import numpy as np

# % change of stock prices
df_pct_change = df_all_stocks.pct_change().dropna()

# Covariance matrix of returns
cov_matrix = df_pct_change.cov().values

# Make sure it's symmetric and PSD
cov_matrix = (cov_matrix + cov_matrix.T) / 2
cov_matrix += 1e-6 * np.eye(cov_matrix.shape[0])

# Number of assets
n_assets = len(tickers)

# Variables
w = cp.Variable(n_assets)  # Weights of each asset
z = cp.Variable(n_assets, boolean=True)  # Binary indicator for inclusion/exclusion

# Portfolio variance objective
portfolio_variance = cp.quad_form(w, cov_matrix)
objective = cp.Minimize((1/2) * portfolio_variance)

# Constraints
constraints = [
    cp.sum(w) == 1,  # Fully invested
    w >= 0,  # No shorting
    cp.sum(z) == 15,  # Exactly 15 assets selected
    w <= z * 1e5  # Enforce non-zero weights only for selected assets (using large constant to allow non-zero weight)
]

# Problem
prob = cp.Problem(objective, constraints)

# Solve
prob.solve(solver=cp.CBC)

# Get the results
optimal_weights = w.value
selected_assets = [tickers[i] for i in range(n_assets) if z.value[i] > 0.5]

# Print optimal weights and selected assets
print("Optimal Weights:", optimal_weights)
print("Selected Assets:", selected_assets)
