In [2]:
from quantinuum_wrapper import QuantinuumWrapper
from pytket import Circuit

def run(input_data, solver_params, extra_arguments):
    
    ##### THIW IS HOW YOU READ INPUT DATA FROM JSON #####
    
    #for asset in input_data:
    # print (asset, input_data[asset]['name']
    #data
    #######################################################
    from utils import Json_parse
    price_df, esg_s, asset_arr = Json_parse.json_parse(input_data)

    from utils import Pre_processing
    mu, Sigma = Pre_processing.compute_mu_sigma(price_df)

    # 
    plt.imshow(Sigma, interpolation="nearest")
    plt.show()

    from utils import run_portfolio_optimization_suite
    config_example = {
        'compute_inputs': {'shrinkage': True},
        'scalarized_qp': {
            'lamb_grid': [0.5, 1.0, 2.0],
            'eta_grid': [0.0, 0.2, 0.5],
            'lb': 0.0, 'ub': 0.2, 'allow_short': False # Example: Max 20% per asset
        },
        'nsga2': {
            'pop_size': 150, 'generations': 80, 'plot': True
        },
        'cardinality': {
            'k': 15, 'lamb': 1.0, 'eta': 0.2
        },
        'cvar': {
            'alpha': 0.90, 'lamb': 0.5, 'eta': 0.3, 'use_mu_in_obj': True
        }
    }

    # Assuming prices_df and esg_df are loaded pandas DataFrames
    results_dict = run_portfolio_optimization_suite(price_df, mu, Sigma, asset_arr, esg_s, config_example)
    print("\n--- Results Summary ---")
    print(results_dict.keys())
    ##############################################
    ## THIS IS HOW YOU WILL ACCESS QUANTINUUM MACHINES ##
    backend = QuantinuumWrapper.get_target()
    
    circ = Circuit(2).H(0).CX(0, 1).CZ(0, 1)
    circ.measure_all()
    backend.default_compilation_pass().apply(circ)
    compiled_circ = backend.get_compiled_circuit(circ)
    handle = backend.process_circuit(compiled_circ, n_shots=3000)

    ## THE RETURN MUST BE JSON COMPATIBLE ##
    return {"result": str(backend.get_result(handle).get_counts())}


In [3]:
from qiskit import QuantumCircuit, transpile
from qiskit_aer import AerSimulator
from qiskit.visualization import plot_histogram
import matplotlib.pyplot as plt
import numpy as np
from typing import Union
import pandas as pd

print(Union[np.ndarray, pd.Series])

typing.Union[numpy.ndarray, pandas.core.series.Series]


In [4]:
def json_parse(input_data):
    """
    Reads Json data and outputs price and esg score of each assets as well as an array of all asset ID

    Parameters
    ----------
    input_data : dict
        Dictionary from input_data.json after json.load()

    Returns
    ----------
    price_df : pd.DataFrame
        DataFrame of price data with index as dates and columns as asset IDs
    esg_s : pd.Series(esg_dict)
        Series of esgScore data with index as assetIDs
    asset_arr : np.array
        Array of all asset IDs
    """
    asset_arr = np.empty(len(input_data),dtype=object)

    iterator = 0
    price_dict = {}
    esg_dict = {}
    for asset_code in input_data:
        asset_arr[iterator] = asset_code

        # price_dict
        history_dict = input_data[asset_code]["History"]
        filtered_history_dict = {date:history_dict[date]["Close"] for date in history_dict} # Use Close price as we assume "hold overnight" investors

        price_dict[asset_code] = filtered_history_dict

        # esg_dict
        esgScores_dict = input_data[asset_code]["Sustainability"]["esgScores"]
        environmentScore = esgScores_dict["environmentScore"]
        governanceScore = esgScores_dict["governanceScore"]
        socialScore = esgScores_dict["socialScore"]
        avg_esgScore = (environmentScore + governanceScore + socialScore)/3 # Take an average
        esg_dict[asset_code] = avg_esgScore
        
        iterator += 1

    
    price_df = pd.DataFrame.from_dict(price_dict)
    esg_s = pd.Series(esg_dict)

    return price_df, esg_s, asset_arr

## Input

In [5]:
import json

input_1 = 'input.json'
input_2 = 'small_dataset.json'
input_3 = 'big_dataset.json'

with open(input_2) as f:
    input_data = json.load(f)
    
price_df, esg_s, asset_arr = json_parse(input_data['data'])

## MonteCarlo func

In [6]:
def find_good_states_montecarlo(
    esg_scores: np.ndarray,
    threshold: float,
    num_samples: int = 1000000,  # number of Monte Carlo samples
    min_assets: int = 1
) -> list[int]:
    """
    Monte Carlo sampling version of the ESG filtering algorithm.

    Randomly generates binary vectors of length n, converts them to integers,
    and keeps those whose average ESG <= threshold.

    Parameters
    ----------
    esg_scores : np.ndarray
        Array of ESG scores for each asset (length n).
    threshold : float
        ESG threshold for filtering.
    num_samples : int, optional
        Number of random samples to draw.
    min_assets : int, optional
        Minimum number of assets to include (nonzero bits).

    Returns
    -------
    list[int]
        List of integer representations of "good" binary states.
    """
    n = len(esg_scores)
    good_states = []

    for _ in range(num_samples):
        # Generate a random binary vector
        bits = np.random.randint(0, 2, size=n)

        # Ensure at least `min_assets` selected
        while np.sum(bits) < min_assets:
            bits = np.random.randint(0, 2, size=n)

        # Compute the average ESG score
        avg_esg = np.dot(esg_scores, bits) / np.sum(bits)

        # Keep if condition satisfied
        if avg_esg <= threshold:
            # Convert binary vector to integer representation
            state_int = int("".join(map(str, bits)), 2)
            good_states.append(state_int)

    return good_states


print(find_good_states_montecarlo(num_samples=10000, esg_scores = esg_s, threshold = 6))


[234, 4530, 4504, 226, 4163, 4291, 4211, 4800, 4266, 5138, 4114, 4448, 106, 4376, 106, 4344, 4131, 4258, 4410, 4146, 4176, 4306, 4328, 4274, 4336, 4288, 4522, 4210, 2, 4320, 4672, 4096, 4546, 4834, 128, 4259, 4376, 4826, 4754, 5314, 4440, 4099, 4307, 114, 208, 2, 4259, 4176, 4323, 2, 4722, 4234, 4338, 5218, 4272, 4336, 4562, 4794, 4258, 4298, 4850, 4818, 4216, 4794, 4656, 4600, 4163, 34, 4602, 4714, 4482, 80, 250, 4752, 4306, 4848, 4464, 4402, 4610, 4322, 4520, 98, 240, 4290, 122, 4154, 4386, 194, 4272, 4338, 4418, 4162, 170, 4842, 170, 4552, 4674, 4544, 4464, 4834, 4298, 4232, 4163, 5186, 114, 4504, 5250, 4250, 4722, 4400, 5202, 160, 154, 4536, 5362, 4378, 4290, 4115, 4512, 4634, 4770, 146, 4682, 4394, 4291, 98, 4282, 4730, 4272, 5330, 4232, 226, 4682, 4258, 202, 4594, 138, 4720, 210, 4114, 4410, 4323, 4138, 4168, 5250, 4099, 32, 5202, 5234, 4522, 4154, 42, 4168, 4498, 64, 4402, 4410, 4266, 4480, 4147, 4179, 4552, 4202, 106, 4224, 5170, 4586, 4562, 4256, 18, 4120, 4818, 160, 4506, 471

## Ver 2

In [13]:
from math import pi, sqrt, ceil
# Parameters
esg_scores = esg_s
n = len(esg_s)
backend = AerSimulator()


good_states = find_good_states_montecarlo(num_samples=100000, esg_scores = esg_s, threshold = 6)


M = len(good_states)
print(f"Total portfolios: {2**n}, Good portfolios: {M}")

if M == 0:
    print("No solutions found - cannot run Grover")
    exit()

# Optimal number of Grover iterations
R = ceil((pi/4) * sqrt(2**n / M)) - 1  # Conservative estimate
print(f"Grover iterations: {R}")

# Build the quantum circuit
qc = QuantumCircuit(n, n)

# Step 1: Initial superposition
qc.h(range(n))

# Step 2: Grover iterations
for _ in range(R):
    # Oracle: mark good states
    for state_int in good_states:
        # Convert to binary string for controls
        bit_str = format(state_int, f'0{n}b')
        
        # Flip phase of this specific basis state
        # Apply X to qubits that should be 0 in the pattern
        for q, bit in enumerate(bit_str):
            if bit == '0':
                qc.x(q)
        
        # Multi-controlled Z (control on all qubits being 1 after X gates)
        if n == 1:
            qc.z(0)
        elif n == 2:
            qc.cz(0, 1)
        else:
            qc.h(n-1)
            qc.mcx(list(range(n-1)), n-1)
            qc.h(n-1)
        
        # Uncompute X gates
        for q, bit in enumerate(bit_str):
            if bit == '0':
                qc.x(q)
    
    # Diffusion operator
    qc.h(range(n))
    qc.x(range(n))
    if n == 1:
        qc.z(0)
    elif n == 2:
        qc.cz(0, 1)
    else:
        qc.h(n-1)
        qc.mcx(list(range(n-1)), n-1)
        qc.h(n-1)
    qc.x(range(n))
    qc.h(range(n))

# Measurement
qc.measure(range(n), range(n))

# Execute
compiled = transpile(qc, backend)
job = backend.run(compiled, shots=2048)
result = job.result()
counts = result.get_counts()

print("Results (good portfolios should appear most frequently):")
for bitstring, count in sorted(counts.items(), key=lambda x: x[1], reverse=True)[:10]:
    bits = np.array(list(bitstring[::-1]), dtype=int)  # qiskit uses little-endian
    if np.sum(bits) > 0:
        avg_esg = np.dot(esg_scores, bits) / np.sum(bits)
        print(f"Portfolio {bitstring}: count={count}, avg_ESG={avg_esg:.2f}")
    else:
        print(f"Portfolio {bitstring}: count={count}, empty")

plot_histogram(counts)
plt.show()

Total portfolios: 8192, Good portfolios: 3014
Grover iterations: 1
Results (good portfolios should appear most frequently):
Portfolio 0101101001001: count=6, avg_ESG=5.96
Portfolio 0000111000001: count=5, avg_ESG=5.38
Portfolio 0000100101001: count=5, avg_ESG=5.99
Portfolio 0100110110001: count=5, avg_ESG=5.68
Portfolio 0001100110001: count=5, avg_ESG=5.97
Portfolio 0100101100000: count=5, avg_ESG=5.71
Portfolio 0000100110001: count=5, avg_ESG=5.76
Portfolio 0000000100000: count=5, avg_ESG=5.99
Portfolio 0101111010001: count=5, avg_ESG=5.82
Portfolio 0000001000000: count=5, avg_ESG=5.85


## Counter

In [14]:
def count_bit_positions(count_dict):
    """
    Counts the occurrences of '1's at each position in the bitstring keys,
    weighted by the dictionary's values (counts).

    Args:
        count_dict (dict): A dictionary where keys are bitstrings of equal length
                           and values are their counts (integers).

    Returns:
        dict: A dictionary where keys are 1-based bit positions (int) and
              values are the total counts of '1's at that position.
    """
    if not count_dict:
        return {}

    # Get the length of the bitstrings from the first key
    # Assumes all keys have the same length
    try:
        first_key = next(iter(count_dict))
        bit_length = len(first_key)
    except StopIteration:
        return {} # Handle empty dictionary case

    # Initialize the position counts dictionary with 0s
    # Using 1-based indexing for positions (1 to bit_length)
    position_counts = {i: 0 for i in range(1, bit_length + 1)}

    # Iterate over each item in the input dictionary
    for bitstring, count in count_dict.items():
        # Ensure bitstring length matches, skip if not (or raise error)
        if len(bitstring) != bit_length:
            print(f"Warning: Skipping bitstring '{bitstring}' with incorrect length.")
            continue

        # Iterate over each character in the bitstring
        for i in range(bit_length):
            if bitstring[i] == '1':
                # Position is i + 1 (because of 1-based indexing)
                position = i + 1
                # Add the count from the input dict to our position counter
                position_counts[position] += count

    return position_counts


position_totals = count_bit_positions(dict(counts))
print("\nTotal counts of '1's at each position (1-based index):")
print(position_totals)



Total counts of '1's at each position (1-based index):
{1: 903, 2: 1051, 3: 896, 4: 974, 5: 1064, 6: 991, 7: 994, 8: 1000, 9: 965, 10: 968, 11: 886, 12: 868, 13: 1126}


In [63]:
def analyze_asset_participation(counts, esg_scores, threshold, n):
    """
    Analyze which assets are never/always selected in high-ESG portfolios
    """
    total_shots = sum(counts.values())
    asset_selection_count = np.zeros(n)
    high_esg_portfolios = 0
    
    for bitstring, count in counts.items():
        # Convert bitstring to portfolio (qiskit uses little-endian)
        bits = np.array(list(bitstring[::-1]), dtype=int)
        
        # Verify it's actually a high-ESG portfolio
        if np.sum(bits) > 0:
            avg_esg = np.dot(esg_scores, bits) / np.sum(bits)
            if avg_esg >= threshold:
                high_esg_portfolios += count
                asset_selection_count += bits * count
    
    print(f"Total high-ESG portfolios sampled: {high_esg_portfolios}")
    
    # Calculate selection frequency
    asset_selection_freq = asset_selection_count / high_esg_portfolios if high_esg_portfolios > 0 else np.zeros(n)
    
    # Identify assets to remove
    assets_to_remove = []
    assets_always_selected = []
    
    for i in range(n):
        freq = asset_selection_freq[i]
        if freq == 0:
            assets_to_remove.append(i)
            print(f"Asset {i}: NEVER selected (ESG={esg_scores[i]}) - REMOVE")
        elif freq == 1:
            assets_always_selected.append(i)
            print(f"Asset {i}: ALWAYS selected (ESG={esg_scores[i]}) - FIX TO 1")
        else:
            print(f"Asset {i}: selected in {freq*100:.1f}% of portfolios (ESG={esg_scores[i]})")
    
    return assets_to_remove, assets_always_selected, asset_selection_freq

# Run the analysis
assets_to_remove, assets_always_selected, selection_freq = analyze_asset_participation(
    counts, esg_scores, threshold, n
)

Total high-ESG portfolios sampled: 992
Asset 0: selected in 47.8% of portfolios (ESG=3.936666666666666)
Asset 1: selected in 50.1% of portfolios (ESG=11.226666666666667)
Asset 2: selected in 50.9% of portfolios (ESG=8.716666666666667)
Asset 3: selected in 51.5% of portfolios (ESG=8.18)
Asset 4: selected in 50.2% of portfolios (ESG=7.286666666666666)
Asset 5: selected in 48.4% of portfolios (ESG=5.989999999999999)
Asset 6: selected in 50.2% of portfolios (ESG=5.853333333333334)
Asset 7: selected in 51.6% of portfolios (ESG=5.876666666666666)
Asset 8: selected in 50.3% of portfolios (ESG=5.843333333333334)
Asset 9: selected in 49.5% of portfolios (ESG=6.796666666666667)
Asset 10: selected in 51.6% of portfolios (ESG=11.0)
Asset 11: selected in 47.9% of portfolios (ESG=5.156666666666666)
Asset 12: selected in 52.4% of portfolios (ESG=8.896666666666667)


  print(f"Asset {i}: selected in {freq*100:.1f}% of portfolios (ESG={esg_scores[i]})")


In [15]:
import numpy as np
import pandas as pd


# ---------------------------- 1. Baseline QP ----------------------------

import cvxpy as cp
from typing import Union # For type hinting

def solve_scalarized_qp(
    Sigma: Union[np.ndarray, pd.DataFrame],
    mu: Union[np.ndarray, pd.Series],
    esg: Union[np.ndarray, pd.Series],
    lamb: float = 1.0,
    eta: float = 0.1,
    lb: float = 0.0,
    ub: float = 1.0,
    allow_short: bool = False,
    allow_cash: bool = False, # New parameter
    solver = cp.OSQP
) -> np.ndarray:
    """Solves a convex quadratic portfolio optimization problem.

    This function finds the optimal portfolio weights 'w' that minimize a
    scalarized objective function combining portfolio variance (risk),
    expected return, and ESG score, subject to standard portfolio constraints.

    The objective function minimized is:
        w^T * Sigma * w - lamb * (mu^T * w) - eta * (esg^T * w)
    Effectively, it maximizes a utility function U = lamb*Return + eta*ESG - Risk.

    Args:
        Sigma (np.array | pd.DataFrame): Covariance matrix (n x n). Must be positive semi-definite.
        mu (np.array | pd.Series): Expected returns vector (n,).
        esg (np.array | pd.Series): ESG scores vector (n,).
        lamb (float, optional): Weight for the expected return term. Defaults to 1.0.
        eta (float, optional): Weight for the ESG score term. Defaults to 0.1.
        lb (float, optional): Lower bound for individual asset weights. Defaults to 0.0.
        ub (float, optional): Upper bound for individual asset weights. Defaults to 1.0.
        allow_short (bool, optional): If True, ignores the lb >= 0 constraint. Defaults to False.
        allow_cash (bool, optional): If True, constraints sum(weights) <= 1 instead of == 1,
                                     allowing for a portion of the portfolio to be uninvested (cash).
                                     Defaults to False (fully invested).
        solver (cp.Solver, optional): The CVXPY solver. Defaults to cp.OSQP.

    Returns:
        np.array: Optimal portfolio weights (shape n,).

    Raises:
        RuntimeError: If the solver fails.
        ValueError: If input dimensions are inconsistent.
    """
    # --- Input Validation and Conversion ---
    if isinstance(mu, (pd.Series, pd.DataFrame)):
        mu_np = mu.values.flatten()
    else:
        mu_np = np.array(mu).flatten()

    n = len(mu_np)

    if isinstance(esg, (pd.Series, pd.DataFrame)):
        esg_np = esg.values.flatten()
    else:
        esg_np = np.array(esg).flatten()

    if isinstance(Sigma, pd.DataFrame):
        Sigma_np = Sigma.values
    else:
        Sigma_np = np.array(Sigma)

    if len(esg_np) != n or Sigma_np.shape != (n, n):
         raise ValueError(
             f"Dimension mismatch: n={n}, mu={len(mu_np)}, esg={len(esg_np)}, Sigma={Sigma_np.shape}"
         )
    # ----------------------------------------

    w = cp.Variable(n, name="weights")

    # --- Objective Function ---
    objective = cp.quad_form(w, Sigma_np) - lamb * (mu_np @ w) - eta * (esg_np @ w)

    # --- Constraints ---
    constraints = []

    # Budget Constraint: sum(w) == 1 (fully invested) or sum(w) <= 1 (allow cash)
    if allow_cash:
        constraints.append(cp.sum(w) <= 1)
        # If allowing cash, weights must still be non-negative unless shorting is also allowed
        if not allow_short:
             constraints.append(w >= 0) # Ensure weights don't go negative to fund cash
    else:
        constraints.append(cp.sum(w) == 1)

    # Individual Weight Bounds
    if not allow_short:
        # If sum(w) <= 1 is used, lb might technically not be needed if >= 0 already added
        # but including it handles cases where lb > 0 is desired.
        constraints.append(w >= lb)
    # If allow_short is True, we don't add w >= lb constraint.
    # The upper bound always applies.
    constraints.append(w <= ub)

    # --- Solve Problem ---
    prob = cp.Problem(cp.Minimize(objective), constraints)
    try:
        prob.solve(solver=solver, verbose=False)
        if prob.status not in [cp.OPTIMAL, cp.OPTIMAL_INACCURATE]:
             raise RuntimeError(f'QP solver failed or did not find optimal solution. Status: {prob.status}')
        if w.value is None:
             raise RuntimeError('QP solver finished but weights are None.')

        result_w = np.array(w.value).flatten()
        # Clean near-zero values
        result_w[np.abs(result_w) < 1e-8] = 0.0
        return result_w
    except Exception as e:
        raise RuntimeError(f'QP solver failed during execution: {e}')



# ---------------------------- 2. CVaR optimization ----------------------------
import cvxpy as cp
from typing import Union # For type hinting

def solve_cvar_lp(
    returns_scenarios: Union[np.ndarray, pd.DataFrame],
    mu: Union[np.ndarray, pd.Series],
    esg: Union[np.ndarray, pd.Series],
    alpha: float = 0.95,
    lamb: float = 1.0,
    eta: float = 0.1,
    lb: float = 0.0,
    ub: float = 1.0,
    allow_short: bool = False,
    allow_cash: bool = False, # New parameter
    solver = cp.OSQP
) -> np.ndarray:
    """Solves portfolio optimization minimizing CVaR - lambda*Return - eta*ESG.

    This function uses a linear programming formulation (based on Rockafellar-Uryasev)
    to find the optimal portfolio weights 'w' that minimize a weighted combination
    of Conditional Value-at-Risk (CVaR) and negative expected return and ESG score.

    The objective function minimized is approximately:
        CVaR_{alpha}(Loss) - lamb * (mu^T * w) - eta * (esg^T * w)
    where Loss = -Return.

    Args:
        returns_scenarios (np.array | pd.DataFrame): Shape (T, n) of T scenarios
                                                    for n assets. Each row is one
                                                    scenario's returns.
        mu (np.array | pd.Series): Shape (n,) of expected returns.
        esg (np.array | pd.Series): Shape (n,) of ESG scores.
        alpha (float, optional): Confidence level for CVaR (e.g., 0.95 for 95% CVaR).
                                 Defaults to 0.95.
        lamb (float, optional): Weight for the expected return term in the objective.
                                Higher values prioritize return. Defaults to 1.0.
        eta (float, optional): Weight for the ESG term in the objective. Higher values
                               prioritize ESG. Defaults to 0.1.
        lb (float, optional): Lower bound for individual asset weights. Defaults to 0.0.
        ub (float, optional): Upper bound for individual asset weights. Defaults to 1.0.
        allow_short (bool, optional): If True, ignores the non-negativity implied by lb=0.0
                                      and allows weights to be negative. Defaults to False.
        allow_cash (bool, optional): If True, constraints sum(weights) <= 1 instead of == 1,
                                     allowing for a portion of the portfolio to be uninvested (cash).
                                     Defaults to False (fully invested).
        solver (cp.Solver, optional): CVXPY solver to use (e.g., cp.OSQP, cp.ECOS, cp.SCS).
                                      Defaults to cp.OSQP.

    Returns:
        np.array: Optimal portfolio weights, shape (n,).

    Raises:
        RuntimeError: If the solver fails or does not find an optimal solution.
        ValueError: If input dimensions are inconsistent.
    """
    # --- Input Validation and Conversion ---
    if isinstance(returns_scenarios, pd.DataFrame):
        returns_scenarios_np = returns_scenarios.values
    else:
        returns_scenarios_np = np.array(returns_scenarios)

    T, n = returns_scenarios_np.shape

    if isinstance(mu, (pd.Series, pd.DataFrame)):
        mu_np = mu.values.flatten()
    else:
        mu_np = np.array(mu).flatten()

    if isinstance(esg, (pd.Series, pd.DataFrame)):
        esg_np = esg.values.flatten()
    else:
        esg_np = np.array(esg).flatten()

    # Check dimensions
    if len(mu_np) != n or len(esg_np) != n:
        raise ValueError(
            f"Dimension mismatch: n={n} from scenarios, mu={len(mu_np)}, esg={len(esg_np)}"
        )
    # ----------------------------------------

    # --- CVXPY Variables ---
    w = cp.Variable(n, name="weights")
    v = cp.Variable(1, name="VaR_alpha")          # Auxiliary variable for VaR
    xi = cp.Variable(T, nonneg=True, name="losses_over_VaR") # Auxiliary variables for losses exceeding VaR

    # --- Calculate Portfolio Losses per Scenario ---
    portfolio_losses = -returns_scenarios_np @ w  # Shape (T,)

    # --- Core CVaR Constraint ---
    # xi_t >= Loss_t - v  (for all t=1..T)
    # Ensures xi captures the positive part of (Loss_t - v)
    cvar_core_constraint = xi >= portfolio_losses - v

    # --- Define Constraints List ---
    constraints = [
        w <= ub,                 # Upper bound on weights
        cvar_core_constraint     # The CVaR formulation constraint
        # xi >= 0 is handled by cp.Variable(T, nonneg=True)
    ]

    # --- Budget Constraint (Allow Cash or Fully Invested) ---
    if allow_cash:
        constraints.append(cp.sum(w) <= 1)
        # If allowing cash, weights usually must still be non-negative
        # unless shorting is *also* allowed. Add non-negativity if needed.
        if not allow_short:
             constraints.append(w >= 0) # Prevent negative weights funding cash
    else:
        # Fully invested
        constraints.append(cp.sum(w) == 1)

    # --- Lower Bound / Short Selling Constraint ---
    if not allow_short:
        # Add the lower bound constraint (potentially redundant if allow_cash added w >= 0)
        # but handles cases where lb > 0 is desired.
        constraints.append(w >= lb)
    # If allow_short is True, we simply don't add w >= lb.

    # --- Objective Function ---
    # CVaR = VaR + Average Tail Loss
    cvar_term = v + (1.0 / ((1 - alpha) * T)) * cp.sum(xi)
    # Minimize: CVaR - lambda * Expected Return - eta * ESG Score
    objective = cvar_term - lamb * (mu_np @ w) - eta * (esg_np @ w)

    # --- Problem Setup & Solving ---
    prob = cp.Problem(cp.Minimize(objective), constraints)
    prob.solve(solver=cp.OSQP, verbose=False)
    if w.value is None:
        raise RuntimeError('CVaR LP solver failed')
    return np.array(w.value).reshape(-1,)


# ---------------------------- Evaluation metrics & plotting ----------------------------
from typing import Optional, Dict, Union, Tuple, Any

def calculate_portfolio_metrics(
    weights: np.ndarray,
    expected_returns: Union[np.ndarray, pd.Series],
    covariance_matrix: Union[np.ndarray, pd.DataFrame],
    esg_scores: Union[np.ndarray, pd.Series],
    returns_oos: Optional[Union[pd.DataFrame, pd.Series]] = None,
    annualization_factor: int = 252
    ) -> Dict[str, float]:
    """
    Calculates various performance and characteristic metrics for a given portfolio.

    Computes the expected return, risk (variance and volatility), and ESG score
    based on input parameters. Optionally calculates realized out-of-sample (OOS)
    mean return and annualized volatility if OOS returns are provided.

    Args:
        weights (np.ndarray): A 1D array of portfolio weights for n assets (shape n,).
            Weights should ideally sum to 1.
        expected_returns (Union[np.ndarray, pd.Series]): A 1D array or Series of
            expected returns for each asset (shape n,).
        covariance_matrix (Union[np.ndarray, pd.DataFrame]): The covariance matrix
            of asset returns (shape n x n).
        esg_scores (Union[np.ndarray, pd.Series]): A 1D array or Series of ESG
            scores for each asset (shape n,).
        returns_oos (Optional[Union[pd.DataFrame, pd.Series]], optional):
            Out-of-sample historical or simulated returns used to calculate realized
            performance. Should be a DataFrame (T x n) or Series if only one asset,
            where T is the number of periods. Assumes daily returns if using the
            default annualization_factor. Defaults to None.
        annualization_factor (int, optional): The factor used to annualize the
            out-of-sample volatility (e.g., 252 for daily returns, 12 for monthly).
            Defaults to 252.

    Returns:
        Dict[str, float]: A dictionary containing the calculated portfolio metrics:
            - 'expected_return': The expected portfolio return (annualized if inputs are).
            - 'variance': The portfolio variance.
            - 'volatility': The portfolio standard deviation (square root of variance).
            - 'esg_score': The weighted average ESG score of the portfolio.
            - 'oos_mean_return' (optional): The mean of the out-of-sample portfolio returns.
            - 'oos_annualized_volatility' (optional): The annualized standard deviation
              (volatility) of the out-of-sample portfolio returns.

    Raises:
        ValueError: If input dimensions are inconsistent.
    """
    # --- Input Validation and Conversion ---
    n_assets = len(weights)

    if isinstance(expected_returns, pd.Series):
        mu = expected_returns.values
    else:
        mu = np.asarray(expected_returns)

    if isinstance(covariance_matrix, pd.DataFrame):
        Sigma = covariance_matrix.values
    else:
        Sigma = np.asarray(covariance_matrix)

    if isinstance(esg_scores, pd.Series):
        esg = esg_scores.values
    else:
        esg = np.asarray(esg_scores)

    if mu.shape != (n_assets,) or esg.shape != (n_assets,) or Sigma.shape != (n_assets, n_assets):
        raise ValueError(
            f"Input dimension mismatch: weights({weights.shape}), mu({mu.shape}), "
            f"esg({esg.shape}), Sigma({Sigma.shape})"
        )
    if not np.isclose(np.sum(weights), 1.0):
        print("Warning: Portfolio weights do not sum close to 1.")

    # --- Calculate Core Metrics ---
    port_expected_return = float(mu @ weights)
    port_variance = float(weights.T @ Sigma @ weights)
    # Ensure variance is non-negative due to potential floating point errors
    port_variance = max(0, port_variance)
    port_volatility = np.sqrt(port_variance)
    port_esg_score = float(esg @ weights)

    metrics = {
        'expected_return': port_expected_return,
        'variance': port_variance,
        'volatility': port_volatility,
        'esg_score': port_esg_score
    }

    # --- Calculate Out-of-Sample Metrics (Optional) ---
    if returns_oos is not None:
        if isinstance(returns_oos, (pd.DataFrame, pd.Series)):
            oos_returns_np = returns_oos.values
        else:
            oos_returns_np = np.asarray(returns_oos)

        # Handle potential 1D array for single asset OOS returns
        if oos_returns_np.ndim == 1:
             if n_assets != 1:
                 raise ValueError("returns_oos is 1D but multiple assets exist.")
             # Reshape for consistent calculation
             oos_returns_np = oos_returns_np.reshape(-1, 1)
        elif oos_returns_np.shape[1] != n_assets:
             raise ValueError(
                 f"Dimension mismatch for returns_oos: Expected {n_assets} columns, "
                 f"got {oos_returns_np.shape[1]}"
             )

        # Calculate portfolio returns for each OOS period
        portfolio_oos_returns = oos_returns_np @ weights

        # Calculate OOS metrics
        oos_mean = float(np.mean(portfolio_oos_returns))
        oos_vol = float(np.std(portfolio_oos_returns))
        oos_annualized_vol = oos_vol * np.sqrt(annualization_factor)

        metrics['oos_mean_return'] = oos_mean
        metrics['oos_annualized_volatility'] = oos_annualized_vol

    return metrics

In [43]:
# ---------------------------- Portfolio Optimization Suite ----------------------------

'''
- Remove prices
- Change esg_s to esg_s

- Add asset_arr
- Add mu
- Add Sigma
'''

def run_portfolio_optimization_suite(
    price_df,
    mu,
    Sigma,
    asset_arr,
    esg_s  : pd.Series,
    config: Dict[str, Any],
    oos_prices = None
) -> Dict[str, Any]:
    """
    Runs a suite of portfolio optimization methods based on the provided configuration.

    This function takes price and ESG data, computes necessary inputs (expected
    returns, covariance matrix), and then executes various optimization
    strategies as specified in the configuration dictionary. It evaluates
    the resulting portfolios and returns a structured dictionary of results.

    Args:
        price_df (pd.DataFrame): DataFrame of historical asset price_df (rows=dates, cols=asset_arr).
        esg_s (pd.Series): DataFrame of ESG scores (rows=asset_arr, cols=score).
                                  Must have an index compatible with price columns.
        config (Dict[str, Any]): A dictionary controlling which optimizations to run
                                 and their parameters. Expected keys:
            'compute_inputs': { 'shrinkage': bool }
            'scalarized_qp' (optional): {
                'lamb_grid': List[float],
                'eta_grid': List[float],
                'lb': float, 'ub': float, 'allow_short': bool, 'solver': cp.Solver
            }
            'nsga2' (optional): {
                'pop_size': int, 'generations': int, 'plot': bool
            }
            'cardinality' (optional): {
                'k': int, 'lamb': float, 'eta': float
            }
            'cvar' (optional): {
                'alpha': float, 'lamb': float, 'eta': float, 'use_mu_in_obj': bool
            }
        oos_prices (Optional[pd.DataFrame], optional): Out-of-sample price_df for
                                                      evaluating realized metrics.
                                                      Defaults to None.

    Returns:
        Dict[str, Any]: A dictionary containing the results of the executed
                        optimization strategies. Keys correspond to strategy names
                        (e.g., 'scalarized_qp_results', 'nsga2_result', etc.).
    """
    results = {}
    # Align ESG scores with the asset_arr used (e.g., after dropping NaNs)
    try:
        esg_arr = esg_s.loc[asset_arr].values.flatten()
    except KeyError:
        raise ValueError("ESG scores index does not match price columns.")
    if len(esg_arr) != len(asset_arr):
         raise ValueError("ESG scores could not be aligned with asset_arr.")

    results['inputs'] = {'mu': mu, 'Sigma': Sigma, 'esg_arr': esg_arr, 'asset_arr': asset_arr}
    print(f"Inputs computed for {len(asset_arr)} asset_arr.")

    # --- Out-of-Sample Returns (if applicable) ---
    returns_oos_df = None
    if oos_prices is not None:
        returns_oos_df = oos_prices.pct_change().dropna()
        # Ensure OOS returns align with the asset_arr used in optimization
        try:
            returns_oos_df = returns_oos_df[asset_arr]
        except KeyError:
             print("Warning: OOS price columns do not perfectly match optimized asset_arr. Using intersection.")
             common_assets = asset_arr.intersection(returns_oos_df.columns)
             returns_oos_df = returns_oos_df[common_assets]
             # Re-align other inputs - this is complex, better to ensure data matches upfront
             # For simplicity here, we'll proceed but metrics might be slightly off
             # A robust implementation might filter asset_arr earlier or raise an error

    # --- 1. Scalarized QP Grid Search ---
    if 'scalarized_qp' in config:
        print("\nRunning Scalarized QP grid search...")
        qp_config = config['scalarized_qp']
        qp_results = []
        for lamb in qp_config.get('lamb_grid', [1.0]):
            for eta in qp_config.get('eta_grid', [0.1]):
                try:
                    w = solve_scalarized_qp(
                        Sigma, mu, esg_arr,
                        lamb=lamb, eta=eta,
                        lb=qp_config.get('lb', 0.0),
                        ub=qp_config.get('ub', 1.0),
                        allow_short=qp_config.get('allow_short', False),
                        allow_cash=qp_config.get('allow_cash', False),
                        solver=qp_config.get('solver', cp.OSQP)
                    )
                    metrics = calculate_portfolio_metrics(w, mu, Sigma, esg_arr, returns_oos=returns_oos_df)
                    qp_results.append({'lambda': lamb, 'eta': eta, 'weights': w, **metrics})
                    print(f"  lambda={lamb:.2f}, eta={eta:.2f} -> "
                          f"ret={metrics['expected_return']:.4f}, "
                          f"vol={metrics['volatility']:.4f}, "
                          f"esg={metrics['esg_score']:.4f}")
                except Exception as e:
                    print(f"  Failed for lambda={lamb}, eta={eta}: {e}")
        results['scalarized_qp_results'] = qp_results

    # --- 2. CVaR Optimization ---
    if 'cvar' in config:
        print("\nRunning CVaR Optimization...")
        cvar_config = config['cvar']
        daily_returns = price_df.dropna().pct_change().dropna()
         # Align scenarios with asset_arr used in mu/Sigma calculation
        try:
            scenarios = daily_returns[asset_arr].values
            if scenarios.shape[1] != len(asset_arr):
                 raise ValueError("Scenario dimensions don't match asset_arr")

            # Decide whether to include mu term based on config
            cvar_lamb = cvar_config.get('lamb', 0.0) if cvar_config.get('use_mu_in_obj', False) else 0.0

            w_cvar = solve_cvar_lp(
                scenarios, mu, esg_arr, # Pass mu here
                alpha=cvar_config.get('alpha', 0.95),
                lamb=cvar_lamb, # Pass lambda for return term
                eta=cvar_config.get('eta', 0.1),
                lb=cvar_config.get('lb', 0.0),
                ub=cvar_config.get('ub', 1.0),
                allow_short=cvar_config.get('allow_short', False),
                allow_cash=qp_config.get('allow_cash', False),
                solver=cvar_config.get('solver', cp.OSQP)
            )
            metrics_cvar = calculate_portfolio_metrics(w_cvar, mu, Sigma, esg_arr, returns_oos=returns_oos_df)
            print(f"  CVaR alpha={cvar_config.get('alpha', 0.95)}, "
                  f"return lambda={cvar_lamb:.2f}, "
                  f"ESG eta={cvar_config.get('eta', 0.1):.2f}")
            print(f"  Non-zero weights: {np.sum(np.abs(w_cvar) > 1e-6)}")
            print(f"  CVaR Metrics: ret={metrics_cvar['expected_return']:.4f}, "
                  f"vol={metrics_cvar['volatility']:.4f}, "
                  f"esg={metrics_cvar['esg_score']:.4f}")
            results['cvar_result'] = {'weights': w_cvar, 'metrics': metrics_cvar}

        except Exception as e:
            print(f"  CVaR optimization failed: {e}")
            
# --- Quantum-native constraint-filtered QAOA refinement ---
    print("\nRunning Quantum Constraint-Filtered QAOA refinement...")
    n_small = min(8, len(asset_arr))
    idx_small = np.arange(n_small)
    Sigma_small = Sigma[np.ix_(idx_small, idx_small)]
    mu_small = mu[idx_small]
    esg_small = esg_arr[idx_small]
    quantum_candidates = quantum_refinement_pipeline(Sigma_small, mu_small, esg_small, B=3)
    for q in quantum_candidates:
        print(f"prob={q['prob']:.4f}, ret={q['return']:.4f}, risk={q['risk']:.6f}, esg={q['esg']:.4f}")
    results['quantum_constraint_filtered'] = quantum_candidates

    print("\nOptimization suite finished.")
    return results

In [44]:
def run(input_data, solver_params, extra_arguments):
    
    ##### THIW IS HOW YOU READ INPUT DATA FROM JSON #####
    
    #for asset in input_data:
    # print (asset, input_data[asset]['name']
    #data
    #######################################################
    from utils import Json_parse
    price_df, esg_s, asset_arr = Json_parse.json_parse(input_data)

    from utils import Pre_processing
    mu, Sigma = Pre_processing.compute_mu_sigma(price_df)

    config_example = {
        'compute_inputs': {'shrinkage': True},
        'scalarized_qp': {
            'lamb_grid': [0.5, 1.0, 2.0],
            'eta_grid': [0.0, 0.2, 0.5],
            'lb': 0.0, 'ub': 0.2, 'allow_short': False # Example: Max 20% per asset
        },
        'cvar': {
            'alpha': 0.90, 'lamb': 0.5, 'eta': 0.3, 'use_mu_in_obj': True
        },
        'quantum_refinement_pipeline': {
            'enabled': True,          # <â€” turns quantum on/off
            'n_assets': 8,            # subset size
            'bits_per_asset': 3,      # discretization
            'target_sum': 12          # total active qubits (budget)
        }
    }

    # Assuming prices_df and esg_df are loaded pandas DataFrames
    results_dict = run_portfolio_optimization_suite(price_df, mu, Sigma, asset_arr, esg_s, config_example)
    print("\n--- Results Summary ---")
    print(results_dict.keys())
    ##############################################
    ## THIS IS HOW YOU WILL ACCESS QUANTINUUM MACHINES ##
    backend = QuantinuumWrapper.get_target()
    
    circ = Circuit(2).H(0).CX(0, 1).CZ(0, 1)
    circ.measure_all()
    backend.default_compilation_pass().apply(circ)
    compiled_circ = backend.get_compiled_circuit(circ)
    handle = backend.process_circuit(compiled_circ, n_shots=3000)

    ## THE RETURN MUST BE JSON COMPATIBLE ##
    return {"result": str(backend.get_result(handle).get_counts())}


In [45]:
# input_json='input.json'
input_json = 'small_dataset.json'

In [46]:
#############################################################################################################################
############################################## PLEASE, DON'T MODIFY THIS CELL. ##############################################
######################## IF THIS CELL DOES NOT WORK, YOUR CODE WON'T WORK ON THE EVALUATION PLATFORM ########################
#############################################################################################################################

%load_ext autoreload
%autoreload 2

import json
import main
with open(input_json) as f:
  input_data = json.load(f)
data=input_data['data']
result=main.run(data,None,None)
json.dumps(result)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Inputs computed for 13 asset_arr.

Running Scalarized QP grid search...
  lambda=0.50, eta=0.00 -> ret=0.3695, vol=0.1291, esg=6.5180
  lambda=0.50, eta=0.20 -> ret=0.1788, vol=0.1084, esg=9.4253
  lambda=0.50, eta=0.50 -> ret=0.0680, vol=0.1179, esg=9.6040
  lambda=1.00, eta=0.00 -> ret=0.3695, vol=0.1291, esg=6.5180
  lambda=1.00, eta=0.20 -> ret=0.1788, vol=0.1084, esg=9.4254
  lambda=1.00, eta=0.50 -> ret=0.1788, vol=0.1084, esg=9.4253
  lambda=2.00, eta=0.00 -> ret=0.3695, vol=0.1291, esg=6.5180
  lambda=2.00, eta=0.20 -> ret=0.2725, vol=0.1159, esg=8.8167
  lambda=2.00, eta=0.50 -> ret=0.1788, vol=0.1084, esg=9.4253

Running CVaR Optimization...
  CVaR alpha=0.9, return lambda=0.50, ESG eta=0.30
  Non-zero weights: 5
  CVaR Metrics: ret=0.1882, vol=0.2036, esg=11.0000

Optimization suite finished.

--- Results Summary ---
dict_keys(['inputs', 'scalarized_qp_results', 'cvar_result'])


  rets = prices_df.pct_change().dropna() # percentage change between the current element and prior element


'{"result": "Counter({(1, 1): 1503, (0, 0): 1497})"}'