In [2]:
import numpy as np
import pandas as pd
import yfinance as yf
from pyqubo import Array
import neal
import matplotlib.pyplot as plt
import requests

In [1]:
def download_data(features, start, end):
    data = yf.download(features, start=start, end=end)
    data = data.loc[:,('Adj Close', slice(None))]
    data.columns = features 
    return data

def calculate_spearman_correlations_with_last_column(data):
    returns = data.pct_change()

    # Fill NaN values with 0
    returns_filled = returns.fillna(0)

    # Extract the last column for comparison
    last_column = returns_filled.iloc[:, -1]

    # Initialize an empty list to store correlations
    correlations = []

    # Calculate Spearman correlations with the last column
    for column in returns_filled.columns[:-1]:  # Exclude the last column itself
        correlation = returns_filled[column].corr(last_column, method='spearman')
        # Check if the correlation is NaN and replace it with 0
        if pd.isna(correlation):
            correlation = 0
        correlations.append(correlation)

    # Return the correlations list
    return correlations


def get_latest_data(data):
    return data.iloc[-1].tolist()  # Get the last row of the dataframe, which has the latest prices


In [3]:
class PortfolioQUBO_v1(object):
    def __init__(self, features, C_M, A, theta1, theta2, theta3):
        self.theta1 = theta1  # Weight for the return objective
        self.theta2 = theta2  # Weight for the risk (correlation) objective
        self.theta3 = theta3  # Weight for the budget constraint
        self.features = len(features)
        self.C_M = C_M  # Correlation matrix
        self.A = A      # features value or expected returns
        self.array = Array.create('features', shape=(self.features), vartype='BINARY')

    def Last(self):
        # Adjust this method to use correlations and/or returns
        # Example: Maximize returns based on expected returns (self.A)
        H = sum(-self.A[i] * self.array[i] for i in range(self.features))
        return self.theta1 * H

    def Risk(self):
        # Objective: Minimize risk (as measured by portfolio variance)
        H = sum(self.C_M.iloc[i, j] * self.array[i] * self.array[j] for i in range(self.features) for j in range(self.features))
        return self.theta2 * H


In [5]:
# Importing necessary libraries
import pandas as pd

# Reading data from the new CSV file
file_path = '../data_p/quantum_data.address_class4.csv'
data = pd.read_csv(file_path)

# Assuming the CSV contains columns for features and their historical prices
# We will process this data similarly to how the original data was processed

# Calculate covariance matrix, and latest data of rows
C_M = calculate_spearman_correlations_with_last_column(data)
print("C_M:", C_M)
A = get_latest_data(data)
print("A:", A)

# Define other parameters
B = 1000
theta1, theta2, theta3 = 0.5, 0.3, 0.2

# Create QUBO object and compile
portfolio_qubo = PortfolioQUBO_v1(data.columns.tolist(), C_M, A, theta1, theta2, theta3)
objective = portfolio_qubo.Risk()
print("Objective:", objective)
model = objective.compile()
print("Model:", model)
qubo, offset = model.to_qubo()
print("QUBO:", qubo)

# Solve QUBO using Simulated Annealing Sampler
sampler = neal.SimulatedAnnealingSampler()
response = sampler.sample_qubo(qubo)

# Print results
for sample, energy in response.data(['sample', 'energy']):
    print(sample, energy)


C_M: [-0.21162427371385173, 0.0, 0.3188735372407557, 0.3158530842493006, 0.3197365238097428, 0.31671607081828773, 0.3520985201467614, -0.19650208717244674, 0.3533930000002421, -0.1591184708947136, -0.23336098087344442, 0, 0.08333333333333334, -0.21162427371385173, -0.21162427371385173, -0.21162427371385173, 0.08333333333333334, -0.21162427371385173, -0.22400773509428723, -0.08394154019223252, 0.08333333333333334, 0, 0, 0, 0, 0, 0.14433756729740646, 0, 0, 0, 0, 0, 0, 0, -0.14433756729740646, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0.08459280110666133, -0.0504401384429554, -0.0545544725589981, -0.0504401384429554, -0.26361345574227146, -0.26253967384515425, 0, -0.2603921100509199, 0, 0, 0, 0, -0.26361345574227146, -0.26253967384515425, 0, -0.2603921100509199, -0.26361345574227146, -0.26253967384515425, 0, -0.2603921100509199, -0.26361345574227146, -0.26253967384515425, 0, -0.2603921100509199]
A: [1.0, 1.0, 1.9376, 1.9376, 1362.5881076171877, 1362.5881076171877, 1.9376, 0.0, 1362.5881076171877, 0.0, 



AttributeError: 'list' object has no attribute 'iloc'

In [None]:
# PortfolioQUBO_v2_nAssets and Return constraints are added to the PortfolioQUBO_v2
class PortfolioQUBO_v2(object):
    def __init__(self, assets, Sigma, n, Mu, R_dot, lambda1, lambda2, lambda3):        
        self.lambda1 = lambda1
        self.lambda2 = lambda2
        self.lambda3 = lambda3
        self.assets = assets
        self.Sigma = Sigma  
        self.n = n
        self.Mu = Mu
        self.R_dot = R_dot
        self.array = Array.create('asset', shape=(assets), vartype='BINARY')

    def Risk(self):
        """Minimize the portfolio variance."""
        H = sum(self.Sigma[i][j] * self.array[i] * self.array[j] for i in range(self.assets) for j in range(self.assets))      
        return self.lambda1 * H
  
    def nAssets(self):
        """Ensure the portfolio has exactly n assets."""
        H = sum(self.array[i] for i in range(self.assets))
        return self.lambda2 * (H - self.n)**2
    
    def Return(self):
        """Ensure the portfolio has an expected return close to R_dot."""
        H = sum(self.Mu[i] * self.array[i] for i in range(self.assets))
        return self.lambda3 * (H - self.R_dot)**2


In [None]:
# multifactor QUBO
class fm3_QUBO(object):
    def __init__(self, assets, SR, Cov, A, nbits, TF, theta1, theta2, theta3):
        self.theta1 = theta1
        self.theta2 = theta2
        self.theta3 = theta3
        self.assets = len(assets)
        self.SR = SR  
        self.Cov = Cov
        self.A = A
        self.nbits = nbits
        self.TF = TF
        self.array = Array.create('asset', shape=(self.assets, nbits), vartype='BINARY')
        
    def Return(self):
        H = sum(-self.SR[i] * sum(self.array[i][s] * 2**s for s in range(self.nbits)) for i in range(self.assets))
        return self.theta1 * H
        
    def Risk(self):
        H = 0
        for i in range(self.assets):
            for j in range(i, self.assets):
                for si in range(self.nbits):
                    for sj in range(si + 1, self.nbits):
                        H += self.Cov[i][j] * self.array[i][si] * self.array[j][sj]
        return self.theta2 * H
  
    def Budget(self):
        H = sum(sum(self.array[i][s] * 2**s for s in range(self.nbits)) for i in range(self.assets))
        return self.theta3 * (H - self.TF)**2

    


In [None]:
# PortfolioQUBO_v2 with multifactor 
class PortfolioQUBO_v2_multifactor(object):
    def __init__(self, assets, Sigma, n, Mu, R_dot, lambda1, lambda2, lambda3):        
        self.lambda1 = lambda1
        self.lambda2 = lambda2
        self.lambda3 = lambda3
        self.assets = assets
        self.Sigma = Sigma  
        self.nbits = n
        self.Mu = Mu
        self.R_dot = R_dot
        self.array = Array.create('asset', shape=(assets,self.nbits), vartype='BINARY')

    def Risk(self):
        """Minimize the portfolio variance."""
        H = 0
        for i in range(self.assets):
            for j in range(i, self.assets):
                for si in range(self.nbits):
                    for sj in range(si + 1, self.nbits):
                        H += self.Sigma[i][j] * self.array[i][si] * self.array[j][sj]
        return self.lambda1 * H

    def Return(self):
        """Ensure the portfolio has an expected return close to R_dot."""  
        H = sum(self.Mu[i]*sum(self.array[i][s] * 2**(s) for s in range(self.nbits)) for i in range(self.assets))      
        return self.lambda3*(H - self.R_dot*(2**(self.nbits+1)-1))**2

In [None]:
# Fetch S&P 500 tickers from Wikipedia
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
response = requests.get(url)

# Use pandas read_html to parse the website
table = pd.read_html(response.text)
df = table[0]
tickers = df['Symbol'].tolist()

In [None]:
def download_and_organize_data(assets, start_date, end_date):
    data_dict = {}
    failed_tickers = []
    successful_tickers = []

    for ticker in assets:
        try:
            # Attempt to download the data
            data = yf.download(ticker, start=start_date, end=end_date)
            if not data.empty:  # Ensure downloaded data is not empty
                successful_tickers.append(ticker)
                data_dict[ticker] = data['Adj Close']
            else:
                failed_tickers.append(ticker)
        except Exception as e:
            # Log the failed tickers and their respective errors
            print(f"Failed to download data for {ticker}. Reason: {e}")
            failed_tickers.append(ticker)

    # Organize the data
    df = pd.concat(data_dict, axis=1)
    df.columns = successful_tickers
    return df, failed_tickers


In [None]:
start_date = "2020-01-01"
end_date = "2022-01-01"
data, failed_tickers = download_and_organize_data(tickers, start_date, end_date)

print("Downloaded Data:")
print(data.head())  # Display the head of the successfully downloaded data
print("\nFailed to download data for:", failed_tickers)


In [None]:
# Calculate expected returns, covariance matrix, and the latest prices
E_R = calculate_expected_returns(data)
Cov = calculate_covariance_matrix(data)
A = get_latest_prices(data)