https://aiguru.medium.com/maximizing-returns-using-python-portfolio-optimization-to-create-a-strategy-for-all-s-p500-tickers-ea7f8899ebe8

In [1]:
# import libraries
import pandas as pd
import numpy as np
import yfinance as yf

In [2]:
# Fetch data method from a list of tickers
def fetch_data(tickers, start, end):
    data = pd.DataFrame()
    for ticker in tickers:
        try:
            df = yf.download(ticker, start=start, end=end)
            df.drop(["Open", "High", "Low", "Close", "Volume"], axis=1, inplace=True)
            df.columns = [ticker]
            data = pd.concat([data, df], axis=1)
        except:
            pass
    return data         

In [3]:
# Forward fill NaN values
def fill_nan_values(data):
    return data.fillna(method="ffill")

In [4]:
# Date range
start = "2010-01-01"
end   = "2023-02-25"

# Tickers list from Wikipedia
tickers = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]["Symbol"].tolist()

# Fetch data and fill NaN
data = fetch_data(tickers, start, end)
data = fill_nan_values(data)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [6]:
# Calculate returns
returns = data.pct_change()

In [8]:
# OPTIMIZATION

# calculate portfolio stats
def portolio_stats(weights, returns):
    portfolio_return = np.sum(returns.mean() * weights) * 252
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov()*252, weights)))
    sharpe_ratio = portfolio_return / portfolio_volatility
    return np.array([portfolio_return, portfolio_volatility, sharpe_ratio])

In [9]:
from scipy.optimize import minimize

# Optimize method
def optimize_portfolio(returns):
    num_tickers = len(returns.columns)
    init_weights = [1 / num_tickers] * num_tickers
    bounds = ((0, 1),) * num_tickers
    constraints = {"type":"eq", "fun": lambda x: np.sum(x)-1}
    optimized = minimize(lambda x: -portolio_stats(x, returns)[2], init_weights, method="SLSQP", bounds=bounds, constraints=constraints)
    return optimized.x

In [10]:
# Get optimized weights
weights = optimize_portfolio(returns)

In [11]:
# SELECT PORTFOLIO

# Method to sort optimized weights and tickers
def optimize_portfolio(data):
    returns = data.pct_change()
    mean_returns = returns.mean()
    cov_matrix = returns.cov()
    num_portfolios = 50000
    
    results = np.zeros((3 + len(data.columns), num_portfolios))
    
    for i in range(num_portfolios):
        weights = np.random.random(len(data.columns))
        weights /= np.sum(weights)
        portfolio_return = np.sum(mean_returns * weights)
        portfolio_std_dev = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        results[0, i] = portfolio_return
        results[1, i] = portfolio_std_dev
        results[2, i] = results[0, i] / results[1, i]
        for j in range(len(weights)):
            results[j+3, i] = weights[j]
            
    sharpe_ratio = results[2]
    max_sharpe_ratio_index = sharpe_ratio.argmax()
    max_sharpe_ratio_portfolio = results[:, max_sharpe_ratio_index]
    optimized_weights = max_sharpe_ratio_portfolio[3:]
    
    sorted_weights = sorted(optimized_weights, reverse=True)
    top_n_tickers = []
    for i in range(N):
        top_n_tickers.append(data.columns[np.where(optimized_weights==sorted_weights[i])[0][0]])
    
    return optimized_weights, top_n_tickers

In [14]:
# Select 10 Tickers with most allocation
N = 10
optimized_weights, top_n_tickers = optimize_portfolio(data)
selected_weights = []
for ticker in data.columns:
    if ticker in top_n_tickers:
        selected_weights.append(optimized_weights[np.where(data.columns==ticker)[0][0]])
    else:
        selected_weights.append(0)

In [15]:
print(selected_weights)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.003990939715758749, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.003923729830493293, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.003992579968579592, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.003973544580292666, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import yfinance as yf

In [2]:
# Fetch data method from a list of tickers
def fetch_data(tickers, start, end):
    data = pd.DataFrame()
    for ticker in tickers:
        try:
            df = yf.download(ticker, start=start, end=end)
            df.drop(['Open', 'High', 'Low', 'Close', 'Volume'], axis=1, inplace=True)
            df.columns = [ticker]
            data = pd.concat([data, df], axis=1)
        except:
            pass
    return data

In [3]:
# Forward fill NaN values
def fill_nan_values(data):
    return data.fillna(method='ffill')

In [4]:
# Date range
start = '2010-01-01'
end = '2023-02-21'

# Tickers list from Wikipedia
tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()

# Fetch data and fill NaN 
data = fetch_data(tickers, start, end)
data = fill_nan_values(data)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [5]:
# Calculate returns
returns = data.pct_change()

In [6]:
# Calculate portfolio stats
def portfolio_stats(weights, returns):
    portfolio_return = np.sum(returns.mean() * weights) * 252
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))
    sharpe_ratio = portfolio_return / portfolio_volatility
    return np.array([portfolio_return, portfolio_volatility, sharpe_ratio])

In [7]:
from scipy.optimize import minimize

# Optimize method
def optimize_portfolio(returns):
    num_tickers = len(returns.columns)
    init_weights = [1/num_tickers]*num_tickers
    bounds = ((0, 1),)*num_tickers
    constraints = {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}
    optimized = minimize(lambda x: -portfolio_stats(x, returns)[2], init_weights,
                         method='SLSQP', bounds=bounds, constraints=constraints)
    return optimized.x

In [8]:
# Get Optimized Weights
weights = optimize_portfolio(returns)

In [9]:
# Method to sort optimized weights and tickers
def optimize_portfolio(data):
    returns = data.pct_change()
    mean_returns = returns.mean()
    cov_matrix = returns.cov()
    num_portfolios = 50000

    results = np.zeros((3 + len(data.columns), num_portfolios))

    for i in range(num_portfolios):
        weights = np.random.random(len(data.columns))
        weights /= np.sum(weights)
        portfolio_return = np.sum(mean_returns * weights)
        portfolio_std_dev = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
        results[0, i] = portfolio_return
        results[1, i] = portfolio_std_dev
        results[2, i] = results[0, i] / results[1, i]
        for j in range(len(weights)):
            results[j + 3, i] = weights[j]

    sharpe_ratio = results[2]
    max_sharpe_ratio_index = sharpe_ratio.argmax()
    max_sharpe_ratio_portfolio = results[:, max_sharpe_ratio_index]
    optimized_weights = max_sharpe_ratio_portfolio[3:]

    sorted_weights = sorted(optimized_weights, reverse=True)
    top_n_tickers = []
    for i in range(N):
        top_n_tickers.append(data.columns[np.where(optimized_weights == sorted_weights[i])[0][0]])

    return optimized_weights, top_n_tickers

In [10]:
# Select 10 Tickers with most allocation
N = 10
optimized_weights, top_n_tickers = optimize_portfolio(data)
selected_weights = []
for ticker in data.columns:
    if ticker in top_n_tickers:
        selected_weights.append(optimized_weights[np.where(data.columns == ticker)[0][0]])
    else:
        selected_weights.append(0)

In [11]:
print(selected_weights)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.003952634159750593, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.003906568265381565, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.003942354419143179, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.00391094466552592, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 