# Best Implementation

In [None]:
import numpy as np
import torch
import torch.autograd as autograd

import gpytorch
from gpytorch.models import ExactGP
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, MaternKernel
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.mlls import ExactMarginalLogLikelihood

import cyipopt
from cyipopt import Problem
from scipy.spatial import ConvexHull

import logging

# Set up logging configuration
logging.basicConfig(filename='optimization_log.txt', 
                    level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Set random seed for reproducibility
np.random.seed(2001)
torch.manual_seed(2001)

# Parameters
T = 10  # Time horizon
D = 2  # Number of risky assets
r = 0.02  # Risk-free return in pct.
Rf = np.exp(r)  # Risk-free return
Rf = r  # Risk-free return
tau = 0.005  # Transaction cost rate
beta = 0.975  # Discount factor
gamma = 3.0  # Risk aversion coefficient

# Risky assets - deterministic
mu = np.array([0.07, 0.07])
Sigma = np.array([[0.2, 0], [0, 0.2]])

# Include consumption flag
include_consumption = False  # Set to True to include consumption

# Define the GPR model with ARD
class GPRegressionModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.MaternKernel(nu=1.5, ard_num_dims=train_x.shape[1])
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

def train_gp_model(train_x, train_y):
    likelihood = gpytorch.likelihoods.GaussianLikelihood(
        noise_constraint=gpytorch.constraints.GreaterThan(1e-6)
    )
    model = GPRegressionModel(train_x, train_y, likelihood)
    model.train()
    likelihood.train()

    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    training_iterations = 200
    for i in range(training_iterations):
        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
        optimizer.step()

    return model, likelihood

def utility(var, gamma):
    if gamma == 1:
        return torch.log(var)  # Log utility for gamma = 1
    else:
        return (var ** (1.0 - gamma)) / (1 - gamma)  # CRRA utility

def safe_utility(var, gamma):
    var = torch.clamp(var, min=1e-10)
    return utility(var, gamma)

def normalized_bond_holdings(xt, delta_plus, delta_minus, tau):
    delta = delta_plus - delta_minus
    transaction_costs = tau * torch.sum(delta_plus - delta_minus)
    # Compute bond holdings
    bt = 1.0 - torch.sum(xt + delta) - transaction_costs
    
    if bt < 0 and bt + 1e-5 >= 0.0:
        bt = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)
    return bt

def normalized_state_dynamics(xt, delta_plus, delta_minus, Rt, bt, Rf):
    delta = delta_plus - delta_minus
    # Wealth at t+1
    pi_t1 = bt * Rf + torch.sum((xt + delta) * Rt)
    # Ensure pi_t1 is positive
    epsilon = 1e-8
    pi_t1 = torch.clamp(pi_t1, min=epsilon)
    # Portfolio weights at t+1
    xt1 = ((xt + delta) * Rt) / pi_t1
    # xt1.requires_grad_(True)  # Not needed here
    return pi_t1, xt1

def V_terminal(xT):
    return utility(1.0 - tau * torch.sum(torch.abs(xT)), gamma)

def bellman_equation(vt_next_in, vt_next_out, xt, delta_plus, delta_minus, beta, gamma, tau, Rf, convex_hull=None):
    # Compute bond holdings
    bt = normalized_bond_holdings(xt, delta_plus, delta_minus, tau)

    # Simulate returns (expected returns for simplicity)
    Rt = torch.tensor(mu, dtype=torch.float32)

    # Compute next period wealth dynamics
    pi_t1, xt1 = normalized_state_dynamics(xt, delta_plus, delta_minus, Rt, bt, Rf)

    # Do not set requires_grad on xt1
    if  isinstance(vt_next_in, gpytorch.models.ExactGP):    
        xt1.requires_grad = True
    # Determine whether the next state is inside or outside the NTR

    if is_in_ntr(xt1.detach().cpu().numpy(), convex_hull):
        # Inside the NTR, use vt_next_in
        if isinstance(vt_next_in, gpytorch.models.ExactGP):
            vt_next_in.eval()
            # with torch.no_grad():
            vt_next_val = vt_next_in(xt1).mean.squeeze(-1)
        elif callable(vt_next_in):
            vt_next_val = vt_next_in(xt1)
        elif vt_next_in is None:
            vt_next_val = V_terminal(xt1)
        else:
            raise TypeError("Expected vt_next_in to be a GP model or function.")
    else:
        # Outside the NTR, use vt_next_out
        if isinstance(vt_next_out, gpytorch.models.ExactGP):
            vt_next_out.eval()
            # with torch.no_grad():
            vt_next_val = vt_next_out(xt1).mean.squeeze(-1)
        elif callable(vt_next_out):
            vt_next_val = vt_next_out(xt1)
        elif vt_next_out is None:
            vt_next_val = V_terminal(xt1)
        else:
            raise TypeError("Expected vt_next_out to be a GP model or function.")
        
    # Decide which value function to use based on NTR
    # in_ntr = is_in_ntr(xt1.detach().cpu().numpy(), convex_hull)
    # vt_val = vt_next_in(xt1).mean.squeeze(0) if in_ntr else vt_next_out(xt1).mean.squeeze(0)    

    # Compute the value function
    vt = beta * (pi_t1 ** (1.0 - gamma)) * vt_next_val

    # vt = beta * (pi_t1 ** (1.0 - gamma)) * vt_val
    # Compute the value function
    if torch.isnan(pi_t1) or torch.isinf(pi_t1):
        raise ValueError("Invalid pi_t1 encountered in bellman_equation.")
    if torch.isnan(vt_next_val) or torch.isinf(vt_next_val):
        raise ValueError("Invalid vt_next_val encountered in bellman_equation.")
    if torch.isnan(vt) or torch.isinf(vt):
        raise ValueError("Invalid vt encountered in bellman_equation.")

    # vt = torch.clamp(vt, min=-1e21, max=1e21)

    return vt

def sample_state_points(D):
    from itertools import product
    # Generate all combinations of 0 and 1 for D dimensions
    points = list(product([0, 1], repeat=D))
    
    # Add the midpoint
    midpoint = [0.5] * D
    points.append(midpoint)
    
    # Filter out points where the sum exceeds 1
    valid_points = [point for point in points if sum(point) <= 1]
    
    return torch.tensor(valid_points, dtype=torch.float32)

def sample_state_points_simplex(D, N):
    # Generate random points in the simplex
    def random_points_in_simplex(n, k):
        points = np.random.dirichlet(np.ones(k), size=n)
        return points
    points = random_points_in_simplex(N, D)
    return torch.tensor(points, dtype=torch.float32)

def is_in_ntr(x, convex_hull):
    if convex_hull is None:
        return False
    new_point = np.array(x)
    hull = convex_hull
    A = hull.equations[:, :-1]
    b = -hull.equations[:, -1]
    inequalities = np.dot(A, new_point) + b
    return np.all(inequalities <= 1e-5)  # Allow for numerical tolerance

def MertonPoint(mu, Sigma, r, gamma):
    # Compute the Merton portfolio weights
    Lambda = np.diag(np.sqrt(np.diag(Sigma)))
    Lambda_Sigma_Lambda = np.dot(Lambda, np.dot(Sigma, Lambda))
    Lambda_Sigma_Lambda_inv = np.linalg.inv(Lambda_Sigma_Lambda)
    mu_r = mu - r
    pi = np.dot(Lambda_Sigma_Lambda_inv, mu_r / gamma)
    return pi

class PortfolioOptimization(cyipopt.Problem):
    def __init__(
        self,
        D,
        xt,
        vt_next_in,
        vt_next_out,
        t,
        T,
        beta,
        gamma,
        tau,
        Rf,
        mu,
        Sigma,
        convex_hull=None,
        include_consumption=False,
        ntr_mid_point=None
    ):
        self.D = D
        self.xt = xt.detach().clone()  # Ensure self.xt is a leaf variable
        self.vt_next_in = vt_next_in
        self.vt_next_out = vt_next_out
        self.t = t
        self.T = T
        self.beta = beta
        self.gamma = gamma
        self.tau = tau
        self.Rf = Rf
        self.mu = mu
        self.Sigma = Sigma
        self.convex_hull = convex_hull
        self.include_consumption = include_consumption
        self.ntr_mid_point = ntr_mid_point


        # Number of variables: delta_plus, delta_minus
        self.n = 2 * D

        # Number of constraints: D constraints from xt + delta >= 0, and 3 scalar constraints
        self.m = D + 3

        # Variable bounds
        lb = np.zeros(self.n)
        ub = np.ones(self.n)

        # Constraint bounds
        cl = np.zeros(self.m)
        cu = np.full(self.m, np.inf)  # All constraints are inequalities (>= 0)

        super().__init__(n=self.n, m=self.m, problem_obj=self, lb=lb, ub=ub, cl=cl, cu=cu)

    def objective(self, params):
        idx = 0

        # Convert params to a tensor with gradient tracking
        params_tensor = torch.tensor(params, dtype=torch.float32, requires_grad=True)
        delta_plus = params_tensor[idx : idx+self.D]
        delta_minus = params_tensor[idx + self.D : idx + 2 * self.D]
        
        # Compute the value function
        vt = bellman_equation(
            self.vt_next_in,
            self.vt_next_out,
            self.xt,
            delta_plus,
            delta_minus,
            self.beta,
            self.gamma,
            self.tau,
            self.Rf,
            self.convex_hull
        )
        
        if torch.isnan(vt).any() or torch.isinf(vt).any():
            raise ValueError("NaN or Inf detected in objective function!")
        
        # Logging for debugging
        # logging.info(f"delta_plus: {delta_plus.detach().cpu().numpy()}, delta_minus: {delta_minus.detach().cpu().numpy()}")
        # logging.info(f"Objective Value (vt): {vt.item()}")
        # # Logging for debugging
        # logging.info(f"point: {self.xt.detach().cpu().numpy()}")
        # logging.info(f"delta_plus: {delta_plus.detach().cpu().numpy()}, delta_minus: {delta_minus.detach().cpu().numpy()}")
        # logging.info(f"delta: {delta_plus.detach().cpu().numpy() - delta_minus.detach().cpu().numpy()}")
        # logging.info(f"Objective Value (vt): {vt.item()}")        
        return -vt.item()  # IPOPT minimizes, so negate to maximize

    def gradient(self, params):
        # Convert params to a tensor with gradient tracking
        params_tensor = torch.tensor(params, dtype=torch.float32, requires_grad=True)
        delta_plus = params_tensor[:self.D]
        delta_minus = params_tensor[self.D:2 * self.D]
        
        # Compute the value function
        vt = bellman_equation(
            self.vt_next_in,
            self.vt_next_out,
            self.xt,
            delta_plus,
            delta_minus,
            self.beta,
            self.gamma,
            self.tau,
            self.Rf,
            self.convex_hull
        )
        
        # Compute gradients
        vt.backward()
        
        # Extract gradients
        grads = params_tensor.grad.detach().cpu().numpy()
        
        # Logging for debugging
        # logging.info(f"Gradients: {grads}")
        
        return -grads  # IPOPT minimizes, so negate gradients

    
    def compute_constraints(self, params_tensor):
        delta_plus = params_tensor[:self.D]
        delta_minus = params_tensor[self.D:2 * self.D]
        delta = delta_plus - delta_minus

        # Constraint 1 to D: x + delta >= 0 (each component)
        constraints_x_plus_delta = self.xt + delta  # Shape: [D]

        # Constraint D+1: bt >= 0
        bt = normalized_bond_holdings(self.xt, delta_plus, delta_minus, self.tau)  # Scalar

        # Constraint D+2: 1 - sum(x + delta) >= 0
        constraint_sum_le_1 = 1.0 - torch.sum(self.xt + delta)  # Scalar

        # Constraint D+3: sum(x + delta) >= 0
        constraint_sum_ge_0 = torch.sum(self.xt + delta)  # Scalar

        # Concatenate all scalar constraints into a 1D tensor in the correct order
        constraints_combined = torch.cat([
            constraints_x_plus_delta.view(-1),    # g1, g2 for D=2
            bt.view(1),                           # g3 (1 constraint)
            constraint_sum_le_1.view(1),          # g4 (1 constraint)
            constraint_sum_ge_0.view(1)           # g5 (1 constraints)
        ])

        # Logging for debugging
        # logging.info(f"Constraints Combined Shape: {constraints_combined.shape}")
        # logging.info(f"Constraints Combined: {constraints_combined}")

        return constraints_combined    
    
    def constraints(self, params):
        # Convert params to a tensor with gradient tracking enabled
        params_tensor = torch.tensor(params, dtype=torch.float32)

        # Use the compute_constraints function to calculate constraints
        constraints_tensor = self.compute_constraints(params_tensor)

        # Convert constraints tensor to numpy array for IPOPT compatibility
        constraints_array = constraints_tensor.detach().cpu().numpy()

        # Logging for debugging
        # logging.info(f"Constraints Array Shape: {constraints_array.shape}")
        # logging.info(f"Constraints Array: {constraints_array}")

        return constraints_array    

    # WORKS
    def jacobian(self, params):
        # Convert parameters to a tensor with gradient tracking
        params_tensor = torch.tensor(params, dtype=torch.float32, requires_grad=True)

        # Compute all constraints as scalar values
        constraints_combined = self.compute_constraints(params_tensor)

        # Compute the Jacobian using torch.autograd.functional.jacobian
        jacobian_tensor = torch.autograd.functional.jacobian(
            lambda x: self.compute_constraints(x),
            params_tensor
        )

        # Logging for debugging
        # print(f"Jacobian Tensor Shape: {jacobian_tensor.shape}")
        # print(f"Jacobian Tensor: {jacobian_tensor}")

        # Ensure the Jacobian has shape (m, n)
        if jacobian_tensor.dim() == 2 and jacobian_tensor.shape == (self.m, self.n):
            return jacobian_tensor.detach().cpu().numpy().flatten()
        else:
            raise ValueError(f"Unexpected Jacobian shape: {jacobian_tensor.shape}")


def solve_bellman_with_ipopt(
    D, xt, vt_next_in, vt_next_out, t, T, beta, gamma, tau, Rf, mu, Sigma,
    convex_hull=None, ntr_mid_point=None, include_consumption=False, num_starts=10, drop_tolerance=0.2
):
    best_solution = None
    best_info = None
    best_obj_val = float('-inf')
    failed_attempts = 0
    max_failed_attempts = int(num_starts * (1.0 - drop_tolerance))

    print(f"xt: {xt}")

    def generate_feasible_initial_guess(xt, D, tau, include_consumption=False):
        while True:
            if torch.allclose(xt, torch.tensor([0.0, 0.0])):  # Close to (0.0, 0.0)
                delta_plus = torch.rand(D) * 0.1  # Small buys
                delta_minus = torch.zeros(D)  # No selling allowed

            elif torch.allclose(xt, torch.tensor([1.0, 0.0])):  # Close to (1.0, 0.0)
                delta_plus = torch.tensor([0.05, 0.0])  # Small buy in the first asset
                delta_minus = torch.tensor([0.05, 0.0])  # Small sell in the first asset

            elif torch.allclose(xt, torch.tensor([0.0, 1.0])):  # Close to (0.0, 1.0)
                delta_plus = torch.tensor([0.0, 0.05])  # Small buy in the second asset
                delta_minus = torch.tensor([0.0, 0.05])  # Small sell in the second asset

            else:
                # Normal case for other points
                delta_plus = torch.rand(D) * xt
                delta_minus = torch.rand(D) * (1 - xt)
            # Ensure that delta_plus and delta_minus are within the bounds
            delta_plus = torch.clamp(delta_plus, 0, 1)
            delta_minus = torch.clamp(delta_minus, 0, 1)

            # Compute delta
            delta = delta_plus - delta_minus

            # Compute transaction costs
            transaction_costs = tau * torch.sum(delta_plus - delta_minus)

            # Compute bond holdings (bt), ensuring non-negative bond holdings
            bt = 1.0 - torch.sum(xt + delta) - transaction_costs
            if bt < 0:
                continue  # Retry if bond holdings are negative

            # Optionally include consumption
            c_t = torch.tensor(0.0) if not include_consumption else torch.rand(1) * 0.05

            # Verify that x + delta >= 0
            x_plus_delta = xt + delta
            if torch.any(x_plus_delta < 0):
                continue  # Retry if any asset constraint is violated

            # Verify that 1 - sum(x + delta) >= 0
            if 1.0 - torch.sum(x_plus_delta) < 0:
                continue  # Retry if sum constraint is violated

            # Verify no shorting constraints: delta >= -xt
            if torch.any(delta < -xt):
                continue  # Retry if no shorting constraint is violated

            # Return the initial guess if all constraints are satisfied
            initial_guess = torch.cat([delta_plus, delta_minus])
            return initial_guess

    # Loop through multiple starting points
    for start_idx in range(num_starts):
        try:
            initial_guess = generate_feasible_initial_guess(xt, D, tau, include_consumption=False)        
            # logging.info(f"Initial Guess from point: {xt}")
            # logging.info(f"Initial Guess: {initial_guess}")
        
        except ValueError as e:
            logging.warning(f"Start {start_idx}: {e}")
            failed_attempts += 1
            if failed_attempts > max_failed_attempts:
                logging.error(f"Exceeded maximum allowed failed attempts: {max_failed_attempts}")
                return None, None, None, None, None
            continue

        # delta_plus = initial_guess[:D]

        try:
            # Create the optimization problem
            prob = PortfolioOptimization(
                D,
                xt,
                vt_next_in,
                vt_next_out,
                t,
                T,
                beta,
                gamma,
                tau,
                Rf,
                mu,
                Sigma=Sigma,
                convex_hull=convex_hull,
                ntr_mid_point=ntr_mid_point,  # Pass ntr_mid_point
                include_consumption=include_consumption,
            )
            # Define variable bounds
            lb = np.zeros(2 * D)
            ub = np.ones(2 * D)        
       
        # except Exception as e:
        #     logging.error(f"Error setting up optimization problem: {e}")


            prob.add_option("tol", 1e-6)
            prob.add_option("max_iter", 700)
            prob.add_option("print_level", 2)
            prob.add_option("acceptable_tol", 1e-5)
            prob.add_option("honor_original_bounds", "yes")
            prob.add_option("mu_strategy", "adaptive")  # Adaptive step size strategy
            prob.add_option("mu_oracle", "quality-function")  # Control step quality            
            # prob.add_option("derivative_test", "none")            
            
            # prob.add_option("derivative_test", "first-order")
            # prob.add_option("derivative_test_tol", 1e-4)            
            
            # Optionally disable derivative checker if it's causing issues
            # prob.add_option("derivative_test", "none")
            
            # prob.add_option("barrier_tol_factor", 0.1)

            solution, info = prob.solve(initial_guess)

            # Check if this solution is better than the current best
            if info['status'] == 0 and (best_solution is None or info['obj_val'] > best_obj_val):
                best_solution = solution
                best_info = info
                best_obj_val = info['obj_val']
                # print(f"delta_plus: {best_solution[:D]}, delta_minus: {best_solution[D:2 * D]}, omega: {xt.cpu().numpy() + best_solution[:D] - best_solution[D:2 * D]}")

        except Exception as e:
            print(f"Optimization failed for start {start_idx}: {e}")
            failed_attempts += 1
            # If too many failures occur, drop this point
            if failed_attempts > max_failed_attempts:
                print(f"Exceeded maximum allowed failed attempts: {max_failed_attempts}")
                return None, None, None, None, None
            continue

    if best_solution is None:
        print(f"No optimizer solution found for point {xt}!")
        return None, None, None, None, None

    # After finding the best solution, extract the variables
    idx = 0
    delta_plus_opt = best_solution[idx : idx + D]
    delta_minus_opt = best_solution[idx + D : idx + 2 * D]
    delta_opt = delta_plus_opt - delta_minus_opt

    # Compute omega_i_t and bond holdings (bt)
    omega_i_t = xt.cpu().numpy() + delta_opt
    bt = normalized_bond_holdings(
        xt, torch.tensor(delta_plus_opt), torch.tensor(delta_minus_opt), tau
    ).item()
    
    print(f"Delta+: {delta_plus_opt}, Delta-: {delta_minus_opt}, Delta: {delta_opt}, Omega: {omega_i_t}, bt: {bt}")
    return delta_plus_opt, delta_minus_opt, delta_opt, omega_i_t, bt

def approximate_ntr(vt_next_in, vt_next_out, D, t, T, beta, gamma, tau, Rf, mu, Sigma):
    # Step 1: Sample state points
    tilde_X_t = sample_state_points(D)
    N = len(tilde_X_t)
    tilde_omega_t = []

    for i in range(N):
        tilde_x_i_t = tilde_X_t[i]
        # Step 2: Solve optimization problem
        delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
            D, tilde_x_i_t, vt_next_in, vt_next_out, t, T, beta, gamma, tau, Rf, mu, Sigma
        )
        if delta_plus is not None:
            # Step 3: Compute NTR vertices
            tilde_omega_i_t = (tilde_x_i_t + delta).detach().cpu().numpy()
            tilde_omega_t.append(tilde_omega_i_t)

    # Step 4: Compute convex hull of the vertices to represent the NTR
    tilde_omega_t = np.array(tilde_omega_t)
    if len(tilde_omega_t) >= D + 1:
        convex_hull = ConvexHull(tilde_omega_t)
    else:
        convex_hull = None  # Cannot compute convex hull with fewer points

    return tilde_omega_t, convex_hull

def bayesian_active_learning(model, likelihood, state_space, num_new_points=10):
    model.eval()
    likelihood.eval()
    
    # Convert state_space to a tensor if needed
    # state_space = torch.tensor(state_space, dtype=torch.float32)
    # state_space

    # Compute predictive mean and variance across the state space
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        pred_dist = model(state_space)
        variance = pred_dist.variance

    # Select points with the highest variance
    _, top_var_indices = torch.topk(variance, num_new_points)
    new_samples = state_space[top_var_indices]

    return new_samples

def dynamic_programming(T, N, D, gamma, beta, tau, Rf, mu, Sigma):
    # Initialize value function V
    V = [[None, None] for _ in range(T + 1)]
    
    # Set terminal value function
    V[T][0] = V_terminal  # For inside NTR
    V[T][1] = V_terminal  # For outside NTR

    NTRs = [None for _ in range(T)]  # Store NTRs for each period

    for t in reversed(range(T)):

            # break

        print(f"Time step {t}")
        


        # Step 2a: Approximate NTR
        print("Step 2a: Approximate NTR")
        tilde_omega_t, convex_hull = approximate_ntr(V[t + 1][0], V[t + 1][1], D, t, T, beta, gamma, tau, Rf, mu, Sigma)
        NTRs[t] = convex_hull

        # Step 2b: Sample state points
        print("Step 2b: Sample state points")
        X_t = sample_state_points_simplex(D, N)
        data_in = []
        data_out = []

        for i in range(len(X_t)):
            x_i_t = X_t[i]

            if t == T-2:
                print(f"shape of V[t+1][0]: {V[t+1][0].shape}")
                print(f"shape of V[t+1][1]: {V[t+1][1].shape}")
                print(f"shape of x_i_t: {x_i_t.shape}")
                
                            
            # Step 2c: Solve optimization problem
            print(f"Step 2c: Solve optimization problem for point {x_i_t}")
            delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
                D, x_i_t, V[t + 1][0], V[t + 1][1], t, T, beta, gamma, tau, Rf, mu, Sigma,
                convex_hull=NTRs[t]
            )
            if delta_plus is None:
                continue  # Skip if optimization failed
            print(f"Time: {t}, Point: {x_i_t}, Delta+: {delta_plus}, Delta-: {delta_minus}, Delta: {delta}, Omega: {omega_i_t}, bt: {b_t}")
            # Compute value using Bellman equation
            v_i_t = bellman_equation(V[t + 1][0], V[t + 1][1], x_i_t, 
                                     torch.tensor(delta_plus), torch.tensor(delta_minus), beta, gamma, tau, Rf, convex_hull=NTRs[t])

            # Determine if the point is inside the NTR and append to the respective data set
            x_i_t_np = x_i_t.detach().cpu().numpy()
            in_ntr = is_in_ntr(x_i_t_np, convex_hull)
            if in_ntr:
                data_in.append((x_i_t_np, v_i_t.item()))
            else:
                data_out.append((x_i_t_np, v_i_t.item()))

        # # Step 2e: Train GPR models for inside and outside NTR
        # if data_in:
        #     train_x_in = torch.tensor([d[0] for d in data_in], dtype=torch.float32)
        #     train_y_in = torch.tensor([d[1] for d in data_in], dtype=torch.float32)
        #     model_in, likelihood_in = train_gp_model(train_x_in, train_y_in)
        #     V[t][0] = model_in
        # else:
        #     V[t][0] = V[t + 1][0]

        # if data_out:
        #     train_x_out = torch.tensor([d[0] for d in data_out], dtype=torch.float32)
        #     train_y_out = torch.tensor([d[1] for d in data_out], dtype=torch.float32)
        #     model_out, likelihood_out = train_gp_model(train_x_out, train_y_out)
        #     V[t][1] = model_out
        # else:
        #     V[t][1] = V[t + 1][1]
        # Step 2e: Train GPR models for inside and outside NTR
        print("Step 2e: Train GPR models for inside and outside NTR")
        if data_in:
            train_x_in = torch.tensor([d[0] for d in data_in], dtype=torch.float32)
            print(f"train_x_in done ")
            train_y_in = torch.tensor([d[1] for d in data_in], dtype=torch.float32)
            print(f"train_y_in done ")
            model_in, likelihood_in = train_gp_model(train_x_in, train_y_in)
            V[t][0] = model_in
            print(f"train gp model_in done ")
            # Apply Bayesian Active Learning to add new high-uncertainty points
            new_samples_in = bayesian_active_learning(model_in, likelihood_in, X_t, num_new_points=25)
            for sample in new_samples_in:
                delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
                    D, sample, V[t + 1][0], V[t + 1][1], t, T, beta, gamma, tau, Rf, mu, Sigma, convex_hull=NTRs[t]
                )
                if delta_plus is None:
                    continue
                v_i_t = bellman_equation(V[t + 1][0], V[t + 1][1], sample, 
                                         torch.tensor(delta_plus), torch.tensor(delta_minus), beta, gamma, tau, Rf, convex_hull=NTRs[t])
                data_in.append((sample.detach().cpu().numpy(), v_i_t.item()))
            print(f"new_samples_in done")
        if data_out:
            train_x_out = torch.tensor([d[0] for d in data_out], dtype=torch.float32)
            train_y_out = torch.tensor([d[1] for d in data_out], dtype=torch.float32)
            model_out, likelihood_out = train_gp_model(train_x_out, train_y_out)
            V[t][1] = model_out

            # Apply Bayesian Active Learning to add new high-uncertainty points
            new_samples_out = bayesian_active_learning(model_out, likelihood_out, X_t, num_new_points=25)
            for sample in new_samples_out:
                delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
                    D, sample, V[t + 1][0], V[t + 1][1], t, T, beta, gamma, tau, Rf, mu, Sigma, convex_hull=NTRs[t]
                )
                if delta_plus is None:
                    continue
                v_i_t = bellman_equation(V[t + 1][0], V[t + 1][1], sample, 
                                         torch.tensor(delta_plus), torch.tensor(delta_minus), beta, gamma, tau, Rf, convex_hull=NTRs[t])
                data_out.append((sample.detach().cpu().numpy(), v_i_t.item()))

        # Retrain GPR models with the new data
        if data_in:
            train_x_in = torch.tensor([d[0] for d in data_in], dtype=torch.float32)
            train_y_in = torch.tensor([d[1] for d in data_in], dtype=torch.float32)
            model_in, likelihood_in = train_gp_model(train_x_in, train_y_in)
            V[t][0] = model_in

        if data_out:
            train_x_out = torch.tensor([d[0] for d in data_out], dtype=torch.float32)
            train_y_out = torch.tensor([d[1] for d in data_out], dtype=torch.float32)
            model_out, likelihood_out = train_gp_model(train_x_out, train_y_out)
            V[t][1] = model_out    



    return V, NTRs

# Parameters
T = 6  # Time horizon
N = 100  # Number of sample points
D = 2  # Number of risky assets

V, NTRs = dynamic_programming(T, N, D, gamma, beta, tau, Rf, mu, Sigma)

# Testing terminal

In [None]:
import numpy as np
import torch
import torch.autograd as autograd

import gpytorch
from gpytorch.models import ExactGP
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, MaternKernel
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.mlls import ExactMarginalLogLikelihood

import cyipopt
from cyipopt import Problem
from scipy.spatial import ConvexHull

import logging

# Set up logging configuration
logging.basicConfig(filename='optimization_log.txt', 
                    level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Set random seed for reproducibility
np.random.seed(2001)
torch.manual_seed(2001)

# Parameters
T = 10  # Time horizon
D = 2  # Number of risky assets
r = 0.02  # Risk-free return in pct.
Rf = np.exp(r)  # Risk-free return
Rf = r  # Risk-free return
tau = 0.01  # Transaction cost rate
beta = 0.975  # Discount factor
gamma = 3.0  # Risk aversion coefficient

# Risky assets - deterministic
mu = np.array([0.07, 0.07])
Sigma = np.array([[0.2, 0], [0, 0.2]])

# Include consumption flag
include_consumption = False  # Set to True to include consumption

# Parameters
T = 6  # Time horizon
N = 10  # Number of sample points
D = 2  # Number of risky assets

# Define the GPR model with ARD
class GPRegressionModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.MaternKernel(nu=1.5, ard_num_dims=train_x.shape[1])
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

def train_gp_model(train_x, train_y):
    likelihood = gpytorch.likelihoods.GaussianLikelihood(
        noise_constraint=gpytorch.constraints.GreaterThan(1e-6)
    )
    model = GPRegressionModel(train_x, train_y, likelihood)
    model.train()
    likelihood.train()

    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    training_iterations = 200
    for i in range(training_iterations):
        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
        optimizer.step()

    return model, likelihood

def utility(var, gamma):
    if gamma == 1:
        return torch.log(var)  # Log utility for gamma = 1
    else:
        return (var ** (1.0 - gamma)) / (1 - gamma)  # CRRA utility

def safe_utility(var, gamma):
    var = torch.clamp(var, min=1e-10)
    return utility(var, gamma)

def normalized_bond_holdings(xt, delta_plus, delta_minus, tau):
    delta = delta_plus - delta_minus
    transaction_costs = tau * torch.sum(delta_plus - delta_minus)
    # Compute bond holdings
    bt = 1.0 - torch.sum(xt + delta) - transaction_costs
    
    if bt < 0 and bt + 1e-5 >= 0.0:
        bt = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)
    return bt

def normalized_state_dynamics(xt, delta_plus, delta_minus, Rt, bt, Rf):
    delta = delta_plus - delta_minus
    # Wealth at t+1 (batch processing)
    pi_t1 = bt * Rf + torch.sum((xt + delta) * Rt, dim=-1)
    
    # Ensure pi_t1 is positive
    epsilon = 1e-8
    pi_t1 = torch.clamp(pi_t1, min=epsilon)
    
    # Portfolio weights at t+1
    xt1 = ((xt + delta) * Rt) / pi_t1.unsqueeze(-1)  # Broadcast pi_t1 across D dimensions
    
    return pi_t1, xt1

def V_terminal(xT):
    # Apply utility function over the batch dimension
    utility_vals = utility(1.0 - tau * torch.sum(torch.abs(xT), dim=-1), gamma)
    return utility_vals.unsqueeze(-1)  # Ensure the shape is (batch_size, 1)

def bellman_equation(vt_next_in, vt_next_out, xt, delta_plus, delta_minus, beta, gamma, tau, Rf, convex_hull=None):
    bt = normalized_bond_holdings(xt, delta_plus, delta_minus, tau)
    
    # Broadcasting Rt over the batch
    Rt = torch.tensor(mu, dtype=torch.float32).unsqueeze(0).expand(xt.size(0), -1)
    
    pi_t1, xt1 = normalized_state_dynamics(xt, delta_plus, delta_minus, Rt, bt, Rf)
    
    if  isinstance(vt_next_in, gpytorch.models.ExactGP):    
        xt1_new = xt1.detach().clone().requires_grad_(True)  # Create new tensor with requires_grad=True
    else:
        xt1_new = xt1
    # Determine whether the next state is inside or outside the NTR

    # print(f"xt1 shape:  {np.shape(xt1)} xt1_new shape: {np.shape(xt1_new)}")
    # print(f"xt1 len:  {len(np.shape(xt1))} xt1_new len: {len(np.shape(xt1_new))}")
    
    if is_in_ntr(xt1.detach().cpu().numpy(), convex_hull,delta_plus, delta_minus):
        # Inside the NTR, use vt_next_in
        if isinstance(vt_next_in, gpytorch.models.ExactGP):
            vt_next_in.eval()
            vt_next_val = vt_next_in(xt1_new).mean
        elif callable(vt_next_in):
            vt_next_val = vt_next_in(xt1_new)
        elif vt_next_in is None:
            vt_next_val = V_terminal(xt1_new)
        else:
            raise TypeError("Expected vt_next_in to be a GP model or function.")
    else:
        # Outside the NTR, use vt_next_out
        if isinstance(vt_next_out, gpytorch.models.ExactGP):
            vt_next_out.eval()
            # with torch.no_grad():
            vt_next_val = vt_next_out(xt1_new).mean
        elif callable(vt_next_out):
            vt_next_val = vt_next_out(xt1_new)
        elif vt_next_out is None:
            vt_next_val = V_terminal(xt1_new)
        else:
            raise TypeError("Expected vt_next_out to be a GP model or function.")
        

    # Decide which value function to use based on NTR
    # in_ntr = is_in_ntr(xt1.detach().cpu().numpy(), convex_hull)
    # vt_val = vt_next_in(xt1).mean.squeeze(0) if in_ntr else vt_next_out(xt1).mean.squeeze(0)    

    # Compute the value function
    vt = beta * (pi_t1 ** (1.0 - gamma)) * vt_next_val.item()

    # vt = beta * (pi_t1 ** (1.0 - gamma)) * vt_val
    # Compute the value function
    if torch.isnan(pi_t1) or torch.isinf(pi_t1):
        raise ValueError("Invalid pi_t1 encountered in bellman_equation.")
    if torch.isnan(vt_next_val) or torch.isinf(vt_next_val):
        raise ValueError("Invalid vt_next_val encountered in bellman_equation.")
    if torch.isnan(vt) or torch.isinf(vt):
        raise ValueError("Invalid vt encountered in bellman_equation.")

    # vt = torch.clamp(vt, min=-1e21, max=1e21)

    return vt

def sample_state_points(D):
    from itertools import product
    # Generate all combinations of 0 and 1 for D dimensions
    points = list(product([0, 1], repeat=D))
    
    # Add the midpoint
    midpoint = [0.5] * D
    points.append(midpoint)
    
    # Filter out points where the sum exceeds 1
    valid_points = [point for point in points if sum(point) <= 1]
    
    return torch.tensor(valid_points, dtype=torch.float32)

def sample_state_points_simplex(D, N):
    # Generate random points in the simplex
    def random_points_in_simplex(n, k):
        points = np.random.dirichlet(np.ones(k), size=n)
        return points
    points = random_points_in_simplex(N, D)
    return torch.tensor(points, dtype=torch.float32)

def is_in_ntr(x, convex_hull):
    if convex_hull is None:
        return False
    new_point = np.array(x)
    hull = convex_hull
    A = hull.equations[:, :-1]
    b = -hull.equations[:, -1]
    inequalities = np.dot(A, new_point) + b
    return np.all(inequalities <= 2e-3)  # Allow for numerical tolerance

def is_in_ntr(x, convex_hull, delta_plus, delta_minus, epsilon_ntr=1e-3):
    if convex_hull is None:
        return False
    # Ensure delta_plus and delta_minus are tensors
    if not isinstance(delta_plus, torch.Tensor):
        delta_plus = torch.tensor(delta_plus, dtype=torch.float32)
    if not isinstance(delta_minus, torch.Tensor):
        delta_minus = torch.tensor(delta_minus, dtype=torch.float32)
            
    # Check if point is near NTR using convex hull
    new_point = np.array(x)
    hull = convex_hull
    A = hull.equations[:, :-1]
    b = -hull.equations[:, -1]
    inequalities = np.dot(A, new_point) + b
    in_convex_hull = np.all(inequalities <= epsilon_ntr)

    delta = delta_plus - delta_minus

    # Use delta policy: if both delta_plus and delta_minus are small, we are likely in the NTR
    delta_policy = torch.all(torch.abs(delta) < epsilon_ntr)

    return in_convex_hull or delta_policy

def MertonPoint(mu, Sigma, r, gamma):
    # Compute the Merton portfolio weights
    Lambda = np.diag(np.sqrt(np.diag(Sigma)))
    Lambda_Sigma_Lambda = np.dot(Lambda, np.dot(Sigma, Lambda))
    Lambda_Sigma_Lambda_inv = np.linalg.inv(Lambda_Sigma_Lambda)
    mu_r = mu - r
    pi = np.dot(Lambda_Sigma_Lambda_inv, mu_r / gamma)
    return pi

class PortfolioOptimization(cyipopt.Problem):
    def __init__(
        self,
        D,
        xt,
        vt_next_in,
        vt_next_out,
        t,
        T,
        beta,
        gamma,
        tau,
        Rf,
        mu,
        Sigma,
        convex_hull=None,
        include_consumption=False,
        ntr_mid_point=None
    ):
        self.D = D
        self.xt = xt.detach().clone()  # Ensure self.xt is a leaf variable
        self.vt_next_in = vt_next_in
        self.vt_next_out = vt_next_out
        self.t = t
        self.T = T
        self.beta = beta
        self.gamma = gamma
        self.tau = tau
        self.Rf = Rf
        self.mu = mu
        self.Sigma = Sigma
        self.convex_hull = convex_hull
        self.include_consumption = include_consumption
        self.ntr_mid_point = ntr_mid_point


        # Number of variables: delta_plus, delta_minus
        self.n = 2 * D

        # Number of constraints: D constraints from xt + delta >= 0, and 3 scalar constraints
        self.m = D + 3

        # Variable bounds
        lb = np.zeros(self.n)
        ub = np.ones(self.n)

        # Constraint bounds
        cl = np.zeros(self.m)
        cu = np.full(self.m, np.inf)  # All constraints are inequalities (>= 0)

        super().__init__(n=self.n, m=self.m, problem_obj=self, lb=lb, ub=ub, cl=cl, cu=cu)

    def objective(self, params):
        idx = 0

        # Convert params to a tensor with gradient tracking
        params_tensor = torch.tensor(params, dtype=torch.float32, requires_grad=True)
        delta_plus = params_tensor[idx : idx+self.D]
        delta_minus = params_tensor[idx + self.D : idx + 2 * self.D]
        
        # Compute the value function
        vt = bellman_equation(
            self.vt_next_in,
            self.vt_next_out,
            self.xt,
            delta_plus,
            delta_minus,
            self.beta,
            self.gamma,
            self.tau,
            self.Rf,
            self.convex_hull
        )
        
        if torch.isnan(vt).any() or torch.isinf(vt).any():
            raise ValueError("NaN or Inf detected in objective function!")      
        # Ensure vt is a scalar
        vt_scalar = vt.item()  # Convert tensor to scalar

        if torch.isnan(vt_scalar) or torch.isinf(vt_scalar):
            raise ValueError("NaN or Inf detected in objective function!")
        
        return -vt_scalar  # IPOPT minimizes, so negate to maximize
    def gradient(self, params):
        # Convert params to a tensor with gradient tracking
        params_tensor = torch.tensor(params, dtype=torch.float32, requires_grad=True)
        delta_plus = params_tensor[:self.D]
        delta_minus = params_tensor[self.D:2 * self.D]
        
        # Compute the value function
        vt = bellman_equation(
            self.vt_next_in,
            self.vt_next_out,
            self.xt,
            delta_plus,
            delta_minus,
            self.beta,
            self.gamma,
            self.tau,
            self.Rf,
            self.convex_hull
        )
        
        # Compute gradients
        vt.backward()
        
        # Extract gradients
        grads = params_tensor.grad.detach().cpu().numpy()
        
        # Logging for debugging
        # logging.info(f"Gradients: {grads}")
        
        return -grads  # IPOPT minimizes, so negate gradients

    
    def compute_constraints(self, params_tensor):
        delta_plus = params_tensor[:self.D]
        delta_minus = params_tensor[self.D:2 * self.D]
        delta = delta_plus - delta_minus

        # Constraint 1 to D: x + delta >= 0 (each component)
        constraints_x_plus_delta = self.xt + delta  # Shape: [D]

        # Constraint D+1: bt >= 0
        bt = normalized_bond_holdings(self.xt, delta_plus, delta_minus, self.tau)  # Scalar

        # Constraint D+2: 1 - sum(x + delta) >= 0
        constraint_sum_le_1 = 1.0 - torch.sum(self.xt + delta, dim=-1)
        
        # Constraint D+3: sum(x + delta) >= 0
        constraint_sum_ge_0 = torch.sum(self.xt + delta, dim=-1)

        # Concatenate all scalar constraints into a 1D tensor in the correct order
        constraints_combined = torch.cat([
            constraints_x_plus_delta.view(-1),  # Constraints for each dimension
            bt.unsqueeze(0),                    # Convert scalar to tensor and ensure it is 1D
            constraint_sum_le_1.unsqueeze(0),   # Convert scalar to tensor
            constraint_sum_ge_0.unsqueeze(0)    # Convert scalar to tensor
        ])

        return constraints_combined   
    
    def constraints(self, params):
        # Convert params to a tensor with gradient tracking enabled
        params_tensor = torch.tensor(params, dtype=torch.float32)

        # Use the compute_constraints function to calculate constraints
        constraints_tensor = self.compute_constraints(params_tensor)

        # Convert constraints tensor to numpy array for IPOPT compatibility
        constraints_array = constraints_tensor.detach().cpu().numpy()

        # Logging for debugging
        # logging.info(f"Constraints Array Shape: {constraints_array.shape}")
        # logging.info(f"Constraints Array: {constraints_array}")

        return constraints_array    

    # WORKS
    def jacobian(self, params):
        # Convert parameters to a tensor with gradient tracking
        params_tensor = torch.tensor(params, dtype=torch.float32, requires_grad=True)

        # Compute all constraints as scalar values
        constraints_combined = self.compute_constraints(params_tensor)

        # Compute the Jacobian using torch.autograd.functional.jacobian
        jacobian_tensor = torch.autograd.functional.jacobian(
            lambda x: self.compute_constraints(x),
            params_tensor
        )

        # Logging for debugging
        # print(f"Jacobian Tensor Shape: {jacobian_tensor.shape}")
        # print(f"Jacobian Tensor: {jacobian_tensor}")

        # Ensure the Jacobian has shape (m, n)
        if jacobian_tensor.dim() == 2 and jacobian_tensor.shape == (self.m, self.n):
            return jacobian_tensor.detach().cpu().numpy().flatten()
        else:
            raise ValueError(f"Unexpected Jacobian shape: {jacobian_tensor.shape}")

def solve_bellman_with_ipopt(
    D, xt, vt_next_in, vt_next_out, t, T, beta, gamma, tau, Rf, mu, Sigma,
    convex_hull=None, ntr_mid_point=None, include_consumption=False, num_starts=10, drop_tolerance=0.2
):
    best_solution = None
    best_info = None
    best_obj_val = float('-inf')
    failed_attempts = 0
    max_failed_attempts = int(num_starts * (1.0 - drop_tolerance))

    print(f"xt: {xt}")

    def generate_feasible_initial_guess(xt, D, tau, include_consumption=False):
        while True:
            if torch.allclose(xt, torch.tensor([0.0, 0.0])):  # Close to (0.0, 0.0)
                delta_plus = torch.rand(D) * 0.1  # Small buys
                delta_minus = torch.zeros(D)  # No selling allowed

            elif torch.allclose(xt, torch.tensor([1.0, 0.0])):  # Close to (1.0, 0.0)
                delta_plus = torch.tensor([0.05, 0.0])  # Small buy in the first asset
                delta_minus = torch.tensor([0.05, 0.0])  # Small sell in the first asset

            elif torch.allclose(xt, torch.tensor([0.0, 1.0])):  # Close to (0.0, 1.0)
                delta_plus = torch.tensor([0.0, 0.05])  # Small buy in the second asset
                delta_minus = torch.tensor([0.0, 0.05])  # Small sell in the second asset

            else:
                # Normal case for other points
                delta_plus = torch.rand(D) * xt
                delta_minus = torch.rand(D) * (1 - xt)
            # Ensure that delta_plus and delta_minus are within the bounds
            delta_plus = torch.clamp(delta_plus, 0, 1)
            delta_minus = torch.clamp(delta_minus, 0, 1)

            # Compute delta
            delta = delta_plus - delta_minus

            # Compute transaction costs
            transaction_costs = tau * torch.sum(delta_plus - delta_minus)

            # Compute bond holdings (bt), ensuring non-negative bond holdings
            bt = 1.0 - torch.sum(xt + delta) - transaction_costs
            if bt < 0:
                continue  # Retry if bond holdings are negative

            # Optionally include consumption
            c_t = torch.tensor(0.0) if not include_consumption else torch.rand(1) * 0.05

            # Verify that x + delta >= 0
            x_plus_delta = xt + delta
            if torch.any(x_plus_delta < 0):
                continue  # Retry if any asset constraint is violated

            # Verify that 1 - sum(x + delta) >= 0
            if 1.0 - torch.sum(x_plus_delta) < 0:
                continue  # Retry if sum constraint is violated

            # Verify no shorting constraints: delta >= -xt
            if torch.any(delta < -xt):
                continue  # Retry if no shorting constraint is violated

            # Return the initial guess if all constraints are satisfied
            initial_guess = torch.cat([delta_plus, delta_minus])
            # Inside generate_feasible_initial_guess
            print(f"xt size: {xt.size()}")
            return initial_guess

    # Loop through multiple starting points
    for start_idx in range(num_starts):
        try:
            initial_guess = generate_feasible_initial_guess(xt, D, tau, include_consumption=False)        
            # logging.info(f"Initial Guess from point: {xt}")
            # logging.info(f"Initial Guess: {initial_guess}")
        
        except ValueError as e:
            logging.warning(f"Start {start_idx}: {e}")
            failed_attempts += 1
            if failed_attempts > max_failed_attempts:
                logging.error(f"Exceeded maximum allowed failed attempts: {max_failed_attempts}")
                return None, None, None, None, None
            continue

        # delta_plus = initial_guess[:D]
        # Debug: Log initial guess size
        # print(f"Problem variables: xt size {xt.size()}, D: {D}")
        # print(f"Initial guess size: {initial_guess.size()}")
        try:
            # Create the optimization problem
            prob = PortfolioOptimization(
                D,
                xt,
                vt_next_in,
                vt_next_out,
                t,
                T,
                beta,
                gamma,
                tau,
                Rf,
                mu,
                Sigma=Sigma,
                convex_hull=convex_hull,
                ntr_mid_point=ntr_mid_point,  # Pass ntr_mid_point
                include_consumption=include_consumption,
            )
            # Define variable bounds
            lb = np.zeros(2 * D)
            ub = np.ones(2 * D)        
       
        # except Exception as e:
        #     logging.error(f"Error setting up optimization problem: {e}")


            prob.add_option("tol", 1e-6)
            prob.add_option("max_iter", 700)
            prob.add_option("print_level", 2)
            prob.add_option("acceptable_tol", 1e-5)
            prob.add_option("honor_original_bounds", "yes")
            prob.add_option("mu_strategy", "adaptive")  # Adaptive step size strategy
            prob.add_option("mu_oracle", "quality-function")  # Control step quality            
            # prob.add_option("derivative_test", "none")            
            
            # prob.add_option("derivative_test", "first-order")
            # prob.add_option("derivative_test_tol", 1e-4)            
            
            # Optionally disable derivative checker if it's causing issues
            # prob.add_option("derivative_test", "none")
            
            # prob.add_option("barrier_tol_factor", 0.1)

            solution, info = prob.solve(initial_guess)

            # Check if this solution is better than the current best
            if info['status'] == 0 and (best_solution is None or info['obj_val'] > best_obj_val):
                best_solution = solution
                best_info = info
                best_obj_val = info['obj_val']
                # print(f"delta_plus: {best_solution[:D]}, delta_minus: {best_solution[D:2 * D]}, omega: {xt.cpu().numpy() + best_solution[:D] - best_solution[D:2 * D]}")

        except Exception as e:
            print(f"Optimization failed for start {start_idx}: {e}")
            failed_attempts += 1
            # If too many failures occur, drop this point
            if failed_attempts > max_failed_attempts:
                print(f"Exceeded maximum allowed failed attempts: {max_failed_attempts}")
                return None, None, None, None, None
            continue

    if best_solution is None:
        print(f"No optimizer solution found for point {xt}!")
        return None, None, None, None, None

    # print(f"delta_plus size: {delta_plus.size()}, delta_minus size: {delta_minus.size()}")
    # After finding the best solution, extract the variables
    idx = 0
    delta_plus_opt = best_solution[idx : idx + D]
    delta_minus_opt = best_solution[idx + D : idx + 2 * D]
    delta_opt = delta_plus_opt - delta_minus_opt
    # print(f"delta_plus_opt size: {delta_plus_opt.size()}, delta_minus_opt size: {delta_minus_opt.size()}")
    # print(f"delta_plus_opt size: {np.shape(delta_plus_opt)}, delta_minus_opt size: {np.shape(delta_minus_opt)}")
    # Compute omega_i_t and bond holdings (bt)
    omega_i_t = xt.cpu().numpy() + delta_opt
    bt = normalized_bond_holdings(
        xt, torch.tensor(delta_plus_opt), torch.tensor(delta_minus_opt), tau
    ).item()
    
    print(f"Delta+: {delta_plus_opt}, Delta-: {delta_minus_opt}, Delta: {delta_opt}, Omega: {omega_i_t}, bt: {bt}")
    return delta_plus_opt, delta_minus_opt, delta_opt, omega_i_t, bt

def approximate_ntr(vt_next_in, vt_next_out, D, t, T, beta, gamma, tau, Rf, mu, Sigma):
    # Step 1: Sample state points
    tilde_X_t = sample_state_points(D)
    N = len(tilde_X_t)
    tilde_omega_t = []

    for i in range(N):
        tilde_x_i_t = tilde_X_t[i]
        # Step 2: Solve optimization problem
        delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
            D, tilde_x_i_t, vt_next_in, vt_next_out, t, T, beta, gamma, tau, Rf, mu, Sigma
        )
        if delta_plus is not None:
            # Step 3: Compute NTR vertices
            tilde_omega_i_t = (tilde_x_i_t + delta).detach().cpu().numpy()
            tilde_omega_t.append(tilde_omega_i_t)

    # Step 4: Compute convex hull of the vertices to represent the NTR
    tilde_omega_t = np.array(tilde_omega_t)
    if len(tilde_omega_t) >= D + 1:
        convex_hull = ConvexHull(tilde_omega_t)
    else:
        convex_hull = None  # Cannot compute convex hull with fewer points

    return tilde_omega_t, convex_hull

def bayesian_active_learning(model, likelihood, state_space, num_new_points=10):
    model.eval()
    likelihood.eval()
    
    # Convert state_space to a tensor if needed
    # state_space = torch.tensor(state_space, dtype=torch.float32)

    # Compute predictive mean and variance across the state space
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        pred_dist = model(state_space)
        variance = pred_dist.variance
        # predictive_mean = pred_dist.mean

    # Select points with the highest variance
    _, top_var_indices = torch.topk(variance, num_new_points)
    new_samples = state_space[top_var_indices]

    return new_samples

# Initialize value function V
V = [[None, None] for _ in range(T + 1)]

# Set terminal value function
V[T][0] = V_terminal  # For inside NTR
V[T][1] = V_terminal  # For outside NTR

NTRs = [None for _ in range(T)]  # Store NTRs for each period

for t in reversed(range(T)):

    if t == T-3:
        break

    print(f"Time step {t}")
        # print size and shape of vt_next_in and vt_next_out
    # if isinstance(V[t + 1][0], gpytorch.models.ExactGP):
        # print(f"vt_next_in size: {V[t + 1][0].train_targets.size()}, vt_next_out size: {V[t + 1][1].train_targets.size()}")
    # else :
    if isinstance(V[t + 1][1], ExactGP):
        V[t + 1][1].eval()  # Put the model in evaluation mode
        with torch.no_grad():
            output = V[t + 1][1](torch.tensor([[0.5, 0.5]]))
        print(f"vt_next_in: {V[t + 1][1]}, Function with input: {output}, input: {torch.tensor([0.5, 0.5])}")



    # print(f"vt_next_in: {V[t + 1][1]}, Function with input:  {V[t + 1][1](torch.tensor([0.5, 0.5]))},input: {torch.tensor([0.5, 0.5])}")
    # Step 2a: Approximate NTR
    print("Step 2a: Approximate NTR")
    tilde_omega_t, convex_hull = approximate_ntr(V[t + 1][0], V[t + 1][1], D, t, T, beta, gamma, tau, Rf, mu, Sigma)
    NTRs[t] = convex_hull

    # Step 2b: Sample state points
    print("Step 2b: Sample state points")
    X_t = sample_state_points_simplex(D, N)
    data_in = []
    data_out = []

    for i in range(len(X_t)):
        x_i_t = X_t[i]
        # Step 2c: Solve optimization problem
        print(f"Step 2c: Solve optimization problem for point {x_i_t}")
        delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
            D, x_i_t, V[t + 1][0], V[t + 1][1], t, T, beta, gamma, tau, Rf, mu, Sigma,
            convex_hull=NTRs[t]
        )
        if delta_plus is None:
            continue  # Skip if optimization failed
        print(f"Time: {t}, Point: {x_i_t}, Delta+: {delta_plus}, Delta-: {delta_minus}, Delta: {delta}, Omega: {omega_i_t}, bt: {b_t}")

        # Compute value using Bellman equation
        v_i_t = bellman_equation(V[t + 1][0], V[t + 1][1], x_i_t, 
                                    torch.tensor(delta_plus), torch.tensor(delta_minus), beta, gamma, tau, Rf, convex_hull=NTRs[t])

        # Determine if the point is inside the NTR and append to the respective data set
        x_i_t_np = x_i_t.detach().cpu().numpy()
        in_ntr = is_in_ntr(x_i_t_np, convex_hull, delta_plus, delta_minus)
        if in_ntr:
            data_in.append((x_i_t_np, v_i_t.item()))
        else:
            data_out.append((x_i_t_np, v_i_t.item()))

    # # Step 2e: Train GPR models for inside and outside NTR
    # if data_in:
    #     train_x_in = torch.tensor([d[0] for d in data_in], dtype=torch.float32)
    #     train_y_in = torch.tensor([d[1] for d in data_in], dtype=torch.float32)
    #     model_in, likelihood_in = train_gp_model(train_x_in, train_y_in)
    #     V[t][0] = model_in
    # else:
    #     V[t][0] = V[t + 1][0]

    # if data_out:
    #     train_x_out = torch.tensor([d[0] for d in data_out], dtype=torch.float32)
    #     train_y_out = torch.tensor([d[1] for d in data_out], dtype=torch.float32)
    #     model_out, likelihood_out = train_gp_model(train_x_out, train_y_out)
    #     V[t][1] = model_out
    # else:
    #     V[t][1] = V[t + 1][1]
    
    # Step 2e: Train GPR models for inside and outside NTR
    print("Step 2e: Train GPR models for inside and outside NTR")
    if data_in:
        train_x_in = torch.tensor(np.array([d[0] for d in data_in]), dtype=torch.float32)
        print(f"train_x_in done ")
        train_y_in = torch.tensor([d[1] for d in data_in], dtype=torch.float32)
        print(f"train_y_in done ")
        model_in, likelihood_in = train_gp_model(train_x_in, train_y_in)
        V[t][0] = model_in
        print(f"train gp model_in done ")
        # Apply Bayesian Active Learning to add new high-uncertainty points
        new_samples_in = bayesian_active_learning(model_in, likelihood_in, X_t, num_new_points=15)
        for sample in new_samples_in:
            delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
                D, sample, V[t + 1][0], V[t + 1][1], t, T, beta, gamma, tau, Rf, mu, Sigma, convex_hull=NTRs[t]
            )
            if delta_plus is None:
                continue
            v_i_t = bellman_equation(V[t + 1][0], V[t + 1][1], sample, 
                                        torch.tensor(delta_plus), torch.tensor(delta_minus), beta, gamma, tau, Rf, convex_hull=NTRs[t])
            data_in.append((sample.detach().cpu().numpy(), v_i_t.item()))
        print(f"new_samples_in done")
    
    # Problem: Right now everything is being appended to data_out, so data_in is empty
    if data_out:
        train_x_out = torch.tensor(np.array([d[0] for d in data_out]), dtype=torch.float32)
        train_y_out = torch.tensor([d[1] for d in data_out], dtype=torch.float32)
        model_out, likelihood_out = train_gp_model(train_x_out, train_y_out)
        V[t][1] = model_out

        # Apply Bayesian Active Learning to add new high-uncertainty points
        new_samples_out = bayesian_active_learning(model_out, likelihood_out, X_t, num_new_points=15)
        for sample in new_samples_out:
            delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
                D, sample, V[t + 1][0], V[t + 1][1], t, T, beta, gamma, tau, Rf, mu, Sigma, convex_hull=NTRs[t]
            )
            if delta_plus is None:
                continue
            v_i_t = bellman_equation(V[t + 1][0], V[t + 1][1], sample, 
                                        torch.tensor(delta_plus), torch.tensor(delta_minus), beta, gamma, tau, Rf, convex_hull=NTRs[t])
            data_out.append((sample.detach().cpu().numpy(), v_i_t.item()))

    # Retrain GPR models with the new data
    if data_in:
        train_x_in = torch.tensor(np.array([d[0] for d in data_in]), dtype=torch.float32)
        train_y_in = torch.tensor([d[1] for d in data_in], dtype=torch.float32)
        model_in, likelihood_in = train_gp_model(train_x_in, train_y_in)
        V[t][0] = model_in

    if data_out:
        train_x_out = torch.tensor(np.array([d[0] for d in data_out]), dtype=torch.float32)
        train_y_out = torch.tensor([d[1] for d in data_out], dtype=torch.float32)
        model_out, likelihood_out = train_gp_model(train_x_out, train_y_out)
        V[t][1] = model_out    

In [91]:
import numpy as np
import torch
import torch.autograd as autograd

import gpytorch
from gpytorch.models import ExactGP
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, MaternKernel
from gpytorch.likelihoods import GaussianLikelihood
from gpytorch.mlls import ExactMarginalLogLikelihood

import cyipopt
from cyipopt import Problem
from scipy.spatial import ConvexHull

import logging
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Set up logging configuration
logging.basicConfig(filename='optimization_log.txt', 
                    level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Set random seed for reproducibility
np.random.seed(2001)
torch.manual_seed(2001)

# Parameters
T = 10  # Time horizon
D = 2  # Number of risky assets
r = 0.02  # Risk-free return in pct.
# Rf = np.exp(r)  # Risk-free return
Rf = r  # Risk-free return
tau = 0.002  # Transaction cost rate
beta = 0.975  # Discount factor
gamma = 3.5  # Risk aversion coefficient

# Risky assets - deterministic
mu = np.array([0.07, 0.07])
Sigma = np.array([[0.2, 0], [0, 0.2]])

# Include consumption flag
include_consumption = False  # Set to True to include consumption

# Parameters
N = 50  # Number of sample points

# Define the GPR model with ARD
class GPRegressionModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.MaternKernel(nu=1.5, ard_num_dims=train_x.shape[1])
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

def train_gp_model(train_x, train_y):
    likelihood = gpytorch.likelihoods.GaussianLikelihood(
        noise_constraint=gpytorch.constraints.GreaterThan(1e-6)
    )
    model = GPRegressionModel(train_x, train_y, likelihood)
    model.train()
    likelihood.train()

    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    training_iterations = 200
    for i in range(training_iterations):
        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)
        loss.backward()
        optimizer.step()

    return model, likelihood

def utility(var, gamma):
    if gamma == 1:
        return torch.log(var)  # Log utility for gamma = 1
    else:
        return (var ** (1.0 - gamma)) / (1 - gamma)  # CRRA utility

def safe_utility(var, gamma):
    var = torch.clamp(var, min=1e-10)
    return utility(var, gamma)

def normalized_bond_holdings(xt, delta_plus, delta_minus, tau):
    bt = 1.0 - torch.sum(xt + delta_minus - delta_plus) - torch.sum(tau * delta_plus - tau * delta_minus)
    bt = torch.clamp(bt, min=0.0, max=1.0)
    return bt  # Shape: [batch_size]

def normalized_state_dynamics(xt, delta_plus, delta_minus, Rt, bt, Rf):
    transaction_costs = torch.sum(tau * delta_plus - tau * delta_minus)
    pi_t1 = bt * Rf + torch.sum((xt + delta_plus - delta_minus) * Rt) - transaction_costs
    pi_t1 = torch.clamp(pi_t1, min=1e-8)  # Increased from 1e-8
    xt1 = ((xt + delta_plus - delta_minus) * Rt) / pi_t1.unsqueeze(-1)
    return pi_t1, xt1

def V_terminal(xT):
    terminal_utility = utility(1.0 - tau * torch.sum(xT, dim=-1), gamma)
    return terminal_utility  # Shape: [batch_size]

def bellman_equation(vt_next_in, vt_next_out, xt, delta_plus, delta_minus, beta, gamma, tau, Rf, convex_hull=None):
    """
    Computes the value function vt using the Bellman equation.

    Args:
        vt_next_in: Value function inside NTR (function or GP model)
        vt_next_out: Value function outside NTR (function or GP model)
        xt: Current state, tensor of shape [batch_size, D]
        delta_plus: Purchase adjustments, tensor of shape [batch_size, D]
        delta_minus: Sale adjustments, tensor of shape [batch_size, D]
        beta: Discount factor
        gamma: Risk aversion coefficient
        tau: Transaction cost rate
        Rf: Risk-free rate
        convex_hull: Convex hull of the NTR (optional)

    Returns:
        vt: Value function at time t, tensor of shape [batch_size]
    """    
    # Compute bond holdings
    bt = normalized_bond_holdings(xt, delta_plus, delta_minus, tau)

    # Asset returns
    Rt = torch.tensor(mu, dtype=torch.float32, device=xt.device).unsqueeze(0).expand(xt.size(0), -1)

    # Next period's wealth and state
    pi_t1, xt1 = normalized_state_dynamics(xt, delta_plus, delta_minus, Rt, bt, Rf)

    # Check for non-finite pi_t1
    if not torch.isfinite(pi_t1).all():
        logging.warning(f"Non-finite pi_t1 detected with values: {pi_t1}")
        return torch.full((xt.size(0),), float('inf'), device=xt.device)

    # Determine if next state is inside NTR
    in_ntr = is_in_ntr(xt1, convex_hull, delta_plus, delta_minus)

    # Evaluate the next period's value function
    xt1_input = xt1.detach()
    vt_next_vals = torch.zeros(xt.size(0), dtype=torch.float32, device=xt.device)

    # Ensure models are in evaluation mode
    if isinstance(vt_next_in, gpytorch.models.ExactGP):
        vt_next_in.eval()
    if isinstance(vt_next_out, gpytorch.models.ExactGP):
        vt_next_out.eval()

    with torch.no_grad():
        if isinstance(vt_next_in, gpytorch.models.ExactGP):
            vt_next_val_in = vt_next_in(xt1_input).mean.squeeze(-1)
        else:
            vt_next_val_in = V_terminal(xt1_input).squeeze(-1)

        if isinstance(vt_next_out, gpytorch.models.ExactGP):
            vt_next_val_out = vt_next_out(xt1_input).mean.squeeze(-1)
        else:
            vt_next_val_out = V_terminal(xt1_input).squeeze(-1)

        # Assign values based on NTR
        vt_next_vals[in_ntr] = vt_next_val_in[in_ntr]
        vt_next_vals[~in_ntr] = vt_next_val_out[~in_ntr]

    # Check for non-finite vt_next_vals
    if not torch.isfinite(vt_next_vals).all():
        logging.warning(f"Non-finite vt_next_vals detected with values: {vt_next_vals}")
        return torch.full((xt.size(0),), float('inf'), device=xt.device)

    # Compute current value function
    vt = beta * (pi_t1 ** (1.0 - gamma)) * vt_next_vals

    # Average over the batch if necessary
    if vt.numel() > 1:
        vt = vt.mean()

    # Check for non-finite vt
    if not torch.isfinite(vt).all():
        logging.warning(f"Non-finite vt detected with value: {vt}")
        return torch.full((xt.size(0),), float('inf'), device=xt.device)

    return vt

def sample_state_points(D):
    """
    Samples points at the vertices and midpoints of the state space simplex.

    Args:
        D: Dimension of the state space (number of assets)

    Returns:
        valid_points: Tensor of shape [num_points, D]
    """
    from itertools import product
    # Generate all combinations of 0 and 1 for D dimensions
    points = list(product([0.0, 1.0], repeat=D))
    # Add the midpoint
    midpoint = [0.5] * D
    points.append(midpoint)
    # Filter out points where the sum exceeds 1
    valid_points = [point for point in points if sum(point) <= 1.0]
    return torch.tensor(valid_points, dtype=torch.float32)  # Shape: [num_points, D]

def sample_state_points_simplex(D, N):
    """
    Samples N random points from the D-dimensional simplex.

    Args:
        D: Dimension of the state space
        N: Number of points to sample

    Returns:
        points: Tensor of shape [N, D]
    """
    # Generate random points in the simplex
    dirichlet_dist = torch.distributions.Dirichlet(torch.ones(D))
    points = dirichlet_dist.sample((N,))  # Shape: [N, D]
    return points  # Shape: [N, D]

def is_in_ntr(x, convex_hull, delta_plus=None, delta_minus=None, epsilon_ntr=1e-4):
    """
    Determines whether each point in x is inside the NTR.

    Args:
        x: State points, tensor of shape [batch_size, D]
        convex_hull: ConvexHull object representing the NTR
        delta_plus: Purchase adjustments, tensor of shape [batch_size, D] (optional)
        delta_minus: Sale adjustments, tensor of shape [batch_size, D] (optional)
        epsilon_ntr: Tolerance for the NTR

    Returns:
        in_ntr: Boolean tensor of shape [batch_size], True if inside NTR
    """
    if convex_hull is None:
        return torch.zeros(x.size(0), dtype=torch.bool, device=x.device)

    # Ensure we are using tensors throughout the computation
    x = x.detach()  # Ensure no gradients are tracked
    device = x.device

    # Extract convex hull equations and perform tensor operations
    equations_A = torch.tensor(convex_hull.equations[:, :-1], dtype=torch.float32, device=device)
    equations_b = torch.tensor(convex_hull.equations[:, -1], dtype=torch.float32, device=device)

    inequalities = torch.matmul(x, equations_A.T) + equations_b.unsqueeze(0)  # Shape: [batch_size, num_constraints]
    in_convex_hull = torch.all(inequalities <= epsilon_ntr, dim=1)  # Shape: [batch_size]

    if delta_plus is not None and delta_minus is not None:
        delta = delta_plus - delta_minus
        delta_policy = torch.all(torch.abs(delta) < epsilon_ntr, dim=-1)  # Shape: [batch_size]
        return torch.logical_or(in_convex_hull, delta_policy)  # No need to convert to NumPy

    return in_convex_hull  # All operations are now in PyTorch

def MertonPoint(mu, Sigma, r, gamma):
    # Compute the Merton portfolio weights
    Lambda = np.diag(np.sqrt(np.diag(Sigma)))
    Lambda_Sigma_Lambda = np.dot(Lambda, np.dot(Sigma, Lambda))
    Lambda_Sigma_Lambda_inv = np.linalg.inv(Lambda_Sigma_Lambda)
    mu_r = mu - r
    pi = np.dot(Lambda_Sigma_Lambda_inv, mu_r / gamma)
    return pi

class PortfolioOptimization(cyipopt.Problem):
    def __init__(
        self,
        D,
        xt,
        vt_next_in,
        vt_next_out,
        t,
        T,
        beta,
        gamma,
        tau,
        Rf,
        mu,
        Sigma,
        convex_hull=None,
        include_consumption=False,
        ntr_mid_point=None
    ):
        self.D = D
        self.xt = xt.detach().clone()  # Shape: [D]
        self.vt_next_in = vt_next_in
        self.vt_next_out = vt_next_out
        self.t = t
        self.T = T
        self.beta = beta
        self.gamma = gamma
        self.tau = tau
        self.Rf = Rf
        self.mu = mu
        self.Sigma = Sigma
        self.convex_hull = convex_hull
        self.include_consumption = include_consumption
        self.ntr_mid_point = ntr_mid_point

        if not isinstance(xt, torch.Tensor):
            print(f"XT IS NOT A TENSOR, xt type: {type(xt)}")

        # Number of variables: delta_plus, delta_minus
        self.n = 2 * D

        # Number of constraints: D constraints from xt + delta >= 0, and 3 scalar constraints
        self.m = D + 3

        # Variable bounds
        lb = np.zeros(self.n)
        ub = np.ones(self.n)

        # Constraint bounds
        cl = np.zeros(self.m)
        cu = np.full(self.m, np.inf)  # All constraints are inequalities (>= 0)

        super().__init__(n=self.n, m=self.m, problem_obj=self, lb=lb, ub=ub, cl=cl, cu=cu)

    def objective(self, params):
        """
        Objective function for the optimization problem.

        Args:
            params: Numpy array of shape [2*D], decision variables [delta_plus, delta_minus]

        Returns:
            obj_value: Scalar value of the objective function (negative of vt)
        """

        idx = 0

        # Convert params to a tensor with gradient tracking
        params_tensor = torch.tensor(params, dtype=torch.float32, requires_grad=True)
        delta_plus = params_tensor[idx : idx+self.D] # Shape: [1, D]
        delta_minus = params_tensor[idx + self.D : idx + 2 * self.D] # Shape: [1, D]
        
        # Compute the value function
        vt = bellman_equation(
            self.vt_next_in,
            self.vt_next_out,
            self.xt,
            delta_plus,
            delta_minus,
            self.beta,
            self.gamma,
            self.tau,
            self.Rf,
            self.convex_hull
        ) # vt: [1]
        
        if torch.isnan(vt).any() or torch.isinf(vt).any():
            raise ValueError("NaN or Inf detected in objective function!")      
        # # Ensure vt is a scalar
        # vt_scalar = vt.item()  # Convert tensor to scalar

        # if torch.isnan(vt_scalar) or torch.isinf(vt_scalar):
        #     raise ValueError("NaN or Inf detected in objective function!")
        
        # return -vt_scalar  # IPOPT minimizes, so negate to maximize
    
        vt_scalar = vt.squeeze(0)  # Shape: [], scalar tensor
        obj_value = -vt_scalar.item()  # Convert to float
        return obj_value

    def gradient(self, params):
        """
        Gradient of the objective function.

        Args:
            params: Numpy array of shape [2*D], decision variables [delta_plus, delta_minus]

        Returns:
            grads: Numpy array of shape [2*D], gradient of the objective function
        """

        # Convert params to a tensor with gradient tracking
        params_tensor = torch.tensor(params, dtype=torch.float32, requires_grad=True)

        delta_plus = params_tensor[:self.D]
        delta_minus = params_tensor[self.D:2 * self.D]
        
        # Compute the value function
        vt = bellman_equation(
            self.vt_next_in,
            self.vt_next_out,
            self.xt,
            delta_plus,
            delta_minus,
            self.beta,
            self.gamma,
            self.tau,
            self.Rf,
            self.convex_hull
        )
        
        # Compute gradients
        vt.backward()        
        # Extract gradients
        grads = params_tensor.grad.detach().cpu().numpy()
        return -grads  # IPOPT minimizes, so negate gradients
        # vt_scalar = vt.squeeze(0)  # Shape: [], scalar tensor
        # vt_scalar.backward()
        # grads = params_tensor.grad.detach().numpy()  # Shape: [2*D]
        # return -grads  # Negative because IPOPT minimizes
    
    def compute_constraints(self, params_tensor):
        """
        Compute the constraints for the optimization problem.

        Args:
            params_tensor: Torch tensor of shape [2*D], decision variables [delta_plus, delta_minus]

        Returns:
            constraints_tensor: Torch tensor of shape [m], representing the constraints
        """
        assert isinstance(params_tensor, torch.Tensor), f"params_tensor must be a torch.Tensor, got {type(params_tensor)}"
        
        # Extract delta_plus and delta_minus from the parameter vector
        delta_plus = params_tensor[:self.D]
        delta_minus = params_tensor[self.D:2 * self.D]
        delta = delta_plus - delta_minus
        xt = self.xt

        # Constraint 1: xt + delta >= 0 (element-wise for each asset)
        constraints_xt_delta = xt + delta  # Shape: [D]

        # Constraint 2: bond holdings bt >= 0
        bt = normalized_bond_holdings(
            xt.unsqueeze(0),  # Add batch dimension
            delta_plus.unsqueeze(0),  # Add batch dimension
            delta_minus.unsqueeze(0),  # Add batch dimension
            self.tau
        ).squeeze(0)  # Remove batch dimension, scalar

        # Constraint 3: sum(xt + delta) <= 1
        sum_allocations = torch.sum(xt + delta)  # Scalar
        constraint_sum_leq_1 = 1.0 - sum_allocations  # Scalar

        # Constraint 4: sum(xt + delta) >= 0 (redundant but included)
        constraint_sum_geq_0 = sum_allocations  # Scalar

        # Concatenate all constraints into a single tensor
        constraints_tensor = torch.cat([
            constraints_xt_delta,         # Shape: [D] (asset allocation constraints)
            bt.unsqueeze(0),             # Shape: [1] (bond holdings constraint)
            constraint_sum_leq_1.unsqueeze(0),  # Shape: [1] (sum allocation <= 1)
            constraint_sum_geq_0.unsqueeze(0)   # Shape: [1] (sum allocation >= 0)
        ])

        # Ensure the tensor does not have NaNs or infinities
        constraints_tensor = torch.nan_to_num(constraints_tensor, nan=1e9, posinf=1e9, neginf=-1e9)
        return constraints_tensor
    
    def constraints(self, params):
        """
        Computes the constraints for the optimization problem.

        Args:
            params: Numpy array of shape [2*D], decision variables [delta_plus, delta_minus]

        Returns:
            constraints_array: Numpy array of shape [m], representing the constraints
        """
        try:
            # Convert NumPy array to PyTorch tensor
            params_tensor = torch.tensor(params, dtype=torch.float32, device='cpu')
            assert isinstance(params_tensor, torch.Tensor), f"params_tensor must be a torch.Tensor, got {type(params_tensor)}"
            
            # Compute constraints using PyTorch tensors
            constraints_tensor = self.compute_constraints(params_tensor)
            
            # Convert constraints back to NumPy array for IPOPT
            constraints_array = constraints_tensor.detach().cpu().numpy()
            return constraints_array
        except Exception as e:
            logging.error(f"Error in constraints method: {e}", exc_info=True)
            # Return large positive values to indicate infeasibility
            return np.full(self.m, 1e9)   

    # WORKS
    def jacobian(self, params):
        """
        Computes the Jacobian of the constraints.

        Args:
            params: Numpy array of shape [2*D], decision variables [delta_plus, delta_minus]

        Returns:
            jacobian_flat: Numpy array of shape [m*n], representing the flattened Jacobian matrix
        """
        try:
            # Convert parameters to a tensor with gradient tracking
            params_tensor = torch.tensor(params, dtype=torch.float32, requires_grad=True)
            assert isinstance(params_tensor, torch.Tensor), f"params_tensor must be a torch.Tensor, got {type(params_tensor)}"
            
            # Compute all constraints as tensor
            constraints_combined = self.compute_constraints(params_tensor)
            
            # Compute the Jacobian using torch.autograd.functional.jacobian
            jacobian_tensor = torch.autograd.functional.jacobian(
                lambda x: self.compute_constraints(x),
                params_tensor
            )
            
            # Ensure the Jacobian has shape (m, n)
            if jacobian_tensor.dim() == 2 and jacobian_tensor.shape == (self.m, self.n):
                jacobian_flat = jacobian_tensor.view(-1).detach().cpu().numpy()
                return jacobian_flat        
            else:
                raise ValueError(f"Unexpected Jacobian shape: {jacobian_tensor.shape}")
        except Exception as e:
            logging.error(f"Error in jacobian method: {e}", exc_info=True)
            # Return a flat array of zeros or another fallback as appropriate
            return np.zeros(self.m * self.n)
        
def solve_bellman_with_ipopt(
    D, xt, vt_next_in, vt_next_out, t, T, beta, gamma, tau, Rf, mu, Sigma,
    convex_hull=None, ntr_mid_point=None, include_consumption=False, num_starts=10, drop_tolerance=0.2
):
    best_solution = None
    best_info = None
    best_obj_val = float('-inf')
    failed_attempts = 0
    max_failed_attempts = int(num_starts * (1.0 - drop_tolerance))

    logging.info(f"Solving Bellman equation for xt: {xt}")

    def generate_feasible_initial_guess(xt, D, tau, include_consumption=False, max_attempts=1000):
        attempts = 0
        while attempts < max_attempts:        
            if torch.allclose(xt, torch.tensor([0.0, 0.0], dtype=torch.float32)):
                delta_plus = torch.rand(D) * 0.1  # Small buys
                delta_minus = torch.zeros(D)       # No selling allowed

            elif torch.allclose(xt, torch.tensor([1.0, 0.0], dtype=torch.float32)):
                delta_plus = torch.tensor([0.0, 0.09], dtype=torch.float32)  # Small buy in the first asset
                delta_minus = torch.tensor([0.2, 0.0], dtype=torch.float32) # Small sell in the first asset

            elif torch.allclose(xt, torch.tensor([0.0, 1.0], dtype=torch.float32)):
                delta_plus = torch.tensor([0.09, 0.0], dtype=torch.float32)  # Small buy in the second asset
                delta_minus = torch.tensor([0.0, 0.2], dtype=torch.float32) # Small sell in the second asset

            else:
                delta_plus = torch.rand(D) * (1 - xt)
                delta_minus = torch.rand(D) * xt

            delta_plus = torch.clamp(delta_plus, 0, 1)
            delta_minus = torch.clamp(delta_minus, 0, 1)
            delta = delta_plus - delta_minus

            transaction_costs = tau * torch.sum(delta_plus + delta_minus)
            bt = 1.0 - torch.sum(xt + delta) - transaction_costs
            if bt < 0:
                attempts += 1
                continue

            x_plus_delta = xt + delta
            if torch.any(x_plus_delta < 0) or 1.0 - torch.sum(x_plus_delta) < 0:
                attempts += 1
                continue

            # Ensure that the initial guess is returned as a tensor
            initial_guess = torch.cat([delta_plus, delta_minus])
            return initial_guess  # Return a tensor

        raise ValueError("Failed to generate a feasible initial guess after maximum attempts")

    # Loop through multiple starting points
    for start_idx in range(num_starts):
        try:
            initial_guess = generate_feasible_initial_guess(xt, D, tau, include_consumption=False)
            logging.debug(f"Start {start_idx}: Initial guess generated.")
        except ValueError as e:
            logging.warning(f"Start {start_idx}: {e}")
            failed_attempts += 1
            if failed_attempts > max_failed_attempts:
                logging.error(f"Exceeded maximum allowed failed attempts: {max_failed_attempts}")
                return None, None, None, None, None
            continue

        try:
            # Create the optimization problem
            prob = PortfolioOptimization(
                D,
                xt,
                vt_next_in,
                vt_next_out,
                t,
                T,
                beta,
                gamma,
                tau,
                Rf,
                mu,
                Sigma=Sigma,
                convex_hull=convex_hull,
                ntr_mid_point=ntr_mid_point,
                include_consumption=include_consumption,
            )

            # Set IPOPT options
            prob.add_option("tol", 1e-8)
            prob.add_option("max_iter", 1000)
            prob.add_option("print_level", 3)
            # prob.add_option("acceptable_tol", 1e-5)
            prob.add_option("honor_original_bounds", "yes")
            prob.add_option("mu_strategy", "adaptive")        # Adaptive step size strategy
            prob.add_option("mu_oracle", "quality-function")  # Control step quality            

            # Convert initial_guess to NumPy array
            initial_guess_np = initial_guess.cpu().numpy()
            logging.debug(f"Start {start_idx}: Initial guess converted to NumPy array.")

            solution, info = prob.solve(initial_guess_np)
            logging.info(f"Start {start_idx}: Optimization status: {info['status']}, Objective value: {info['obj_val']}")

            # Check if the solution is valid
            if solution is None:
                logging.warning(f"Start {start_idx}: Solver returned None solution.")
                failed_attempts += 1
                if failed_attempts > max_failed_attempts:
                    logging.error(f"Exceeded maximum allowed failed attempts: {max_failed_attempts}")
                    return None, None, None, None, None
                continue

            if len(solution) != 2 * D:
                logging.warning(f"Start {start_idx}: Unexpected solution length: {len(solution)}. Expected: {2 * D}")
                failed_attempts += 1
                if failed_attempts > max_failed_attempts:
                    logging.error(f"Exceeded maximum allowed failed attempts: {max_failed_attempts}")
                    return None, None, None, None, None
                continue

            # Check if this solution is better than the current best
            if info['status'] == 0 and (best_solution is None or info['obj_val'] > best_obj_val):
                best_solution = solution
                best_info = info
                best_obj_val = info['obj_val']
                logging.info(f"Start {start_idx}: New best solution found with obj_val: {best_obj_val}")

        except Exception as e:
            print(f"Optimization failed for start {start_idx}: {e}")
            logging.error(f"Optimization failed for start {start_idx}: {e}", exc_info=True)
            failed_attempts += 1
            if failed_attempts > max_failed_attempts:
                print(f"Exceeded maximum allowed failed attempts: {max_failed_attempts}")
                logging.error(f"Exceeded maximum allowed failed attempts: {max_failed_attempts}")
                return None, None, None, None, None
            continue

    if best_solution is None:
        print(f"No optimizer solution found for point {xt}!")
        logging.error(f"No optimizer solution found for point {xt}!")
        return None, None, None, None, None

    try:
        # After finding the best solution, extract the variables
        idx = 0
        delta_plus_opt = best_solution[idx : idx + D]
        delta_minus_opt = best_solution[idx + D : idx + 2 * D]
        delta_opt = delta_plus_opt - delta_minus_opt

        # Convert delta_plus_opt and delta_minus_opt to tensors
        delta_plus_tensor = torch.tensor(delta_plus_opt, dtype=torch.float32)
        delta_minus_tensor = torch.tensor(delta_minus_opt, dtype=torch.float32)

        # Compute omega_i_t and bond holdings (bt)
        omega_i_t = xt.cpu().numpy() + delta_opt
        bt = normalized_bond_holdings(
            xt, delta_plus_tensor, delta_minus_tensor, tau
        ).item()

        # logging.info(f"Best solution found: Delta+: {delta_plus_opt}, Delta-: {delta_minus_opt}, Delta: {delta_opt}, Omega: {omega_i_t}, bt: {bt}")
        print(f"Best solution found. Point {xt},  Delta+: {delta_plus_opt}, Delta-: {delta_minus_opt}, Delta: {delta_opt}, Omega: {omega_i_t}, bt: {bt}")
        return delta_plus_opt, delta_minus_opt, delta_opt, omega_i_t, bt
    except Exception as e:
        logging.error(f"Error processing best solution: {e}", exc_info=True)
        return None, None, None, None, None

def approximate_ntr(vt_next_in, vt_next_out, D, t, T, beta, gamma, tau, Rf, mu, Sigma):
    """
    Approximates the Non-Trading Region (NTR) at time t.

    Args:
        vt_next_in: Value function inside NTR at time t+1
        vt_next_out: Value function outside NTR at time t+1
        D: Number of assets
        t: Current time
        T: Terminal time
        beta, gamma, tau, Rf, mu, Sigma: Model parameters

    Returns:
        tilde_omega_t: NTR vertices, numpy array of shape [num_points, D]
        convex_hull: ConvexHull object representing the NTR
    """
    # Step 1: Sample state points at vertices and midpoints
    tilde_X_t = sample_state_points(D)  # Shape: [num_points, D]
    N = tilde_X_t.size(0)
    tilde_omega_t = []

    for i in range(N):
        tilde_x_i_t = tilde_X_t[i:i+1]  # Shape: [1, D]
        delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
            D, tilde_x_i_t.squeeze(0), vt_next_in, vt_next_out, t, T, beta, gamma, tau, Rf, mu, Sigma
        )
        if delta_plus is None:
            print(f"Delta is None for point {tilde_x_i_t}")
        if delta_plus is not None:
            tilde_omega_i_t = (tilde_x_i_t.squeeze(0) + delta)  # Shape: [D]
            tilde_omega_t.append(tilde_omega_i_t.detach().numpy())

    # Construct convex hull
    if len(tilde_omega_t) >= D + 1:
        tilde_omega_t = np.vstack(tilde_omega_t)  # Shape: [num_points, D]
        convex_hull = ConvexHull(tilde_omega_t)
    else:
        convex_hull = None

    return tilde_omega_t, convex_hull

def bayesian_active_learning(model, likelihood, state_space, num_new_points=10):
    """
    Applies Bayesian Active Learning to select new points with the highest uncertainty.

    Args:
        model: Trained GP model
        likelihood: Likelihood associated with the GP model
        state_space: Tensor of state points, shape [num_points, D]
        num_new_points: Number of new points to select

    Returns:
        new_samples: Tensor of selected state points, shape [num_new_points, D]
    """
    model.eval()
    likelihood.eval()

    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        pred_dist = model(state_space)
        variance = pred_dist.variance  # Shape: [num_points]

    _, top_var_indices = torch.topk(variance, num_new_points)
    new_samples = state_space[top_var_indices]  # Shape: [num_new_points, D]

    return new_samples
# Initialize value function V
V = [[None, None] for _ in range(T + 1)]

# Set terminal value function
V[T][0] = V_terminal  # For inside NTR
V[T][1] = V_terminal  # For outside NTR

NTRs = [None for _ in range(T)]  # Store NTRs for each period

for t in reversed(range(T)):

    if t == T-3:
        break

    print(f"Time step {t}")
        # print size and shape of vt_next_in and vt_next_out
    # if isinstance(V[t + 1][0], gpytorch.models.ExactGP):
        # print(f"vt_next_in size: {V[t + 1][0].train_targets.size()}, vt_next_out size: {V[t + 1][1].train_targets.size()}")
    # else :
    if isinstance(V[t + 1][1], ExactGP):
        V[t + 1][1].eval()  # Put the model in evaluation mode
        with torch.no_grad():
            output = V[t + 1][1](torch.tensor([[0.5, 0.5]]))
        print(f"vt_next_in: {V[t + 1][1]}, Function with input: {output}, input: {torch.tensor([0.5, 0.5])}")



    # print(f"vt_next_in: {V[t + 1][1]}, Function with input:  {V[t + 1][1](torch.tensor([0.5, 0.5]))},input: {torch.tensor([0.5, 0.5])}")
    # Step 2a: Approximate NTR
    print("Step 2a: Approximate NTR")
    tilde_omega_t, convex_hull = approximate_ntr(V[t + 1][0], V[t + 1][1], D, t, T, beta, gamma, tau, Rf, mu, Sigma)
    NTRs[t] = convex_hull

    # Step 2b: Sample state points
    print("Step 2b: Sample state points")
    X_t = sample_state_points_simplex(D, N)
    data_in = []
    data_out = []

for i in range(len(X_t)):
    x_i_t = X_t[i]
    # Step 2c: Solve optimization problem
    logging.info(f"Step 2c: Solve optimization problem for point {x_i_t}")
    delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
        D, x_i_t, V[t + 1][0], V[t + 1][1], t, T, beta, gamma, tau, Rf, mu, Sigma,
        convex_hull=NTRs[t]
    )
    if delta_plus is None:
        logging.warning(f"Step 2c: Optimization failed for point {x_i_t}. Skipping.")
        continue  # Skip if optimization failed
    logging.info(f"Time: {t}, Point: {x_i_t}, Delta+: {delta_plus}, Delta-: {delta_minus}, Delta: {delta}, Omega: {omega_i_t}, bt: {b_t}")

    # Compute value using Bellman equation
    v_i_t = bellman_equation(
        V[t + 1][0],
        V[t + 1][1],
        x_i_t, 
        torch.tensor(delta_plus, dtype=torch.float32, device='cpu'), 
        torch.tensor(delta_minus, dtype=torch.float32, device='cpu'), 
        beta, gamma, tau, Rf, convex_hull=NTRs[t]
    )

    # Determine if the point is inside the NTR and append to the respective data set
    in_ntr = is_in_ntr(x_i_t, convex_hull, torch.tensor(delta_plus, dtype=torch.float32, device='cpu'), torch.tensor(delta_minus, dtype=torch.float32, device='cpu'))
    if in_ntr.item():
        data_in.append((x_i_t, v_i_t.item()))
    else:
        data_out.append((x_i_t, v_i_t.item()))

    # # Step 2e: Train GPR models for inside and outside NTR
    # if data_in:
    #     train_x_in = torch.tensor([d[0] for d in data_in], dtype=torch.float32)
    #     train_y_in = torch.tensor([d[1] for d in data_in], dtype=torch.float32)
    #     model_in, likelihood_in = train_gp_model(train_x_in, train_y_in)
    #     V[t][0] = model_in
    # else:
    #     V[t][0] = V[t + 1][0]

    # if data_out:
    #     train_x_out = torch.tensor([d[0] for d in data_out], dtype=torch.float32)
    #     train_y_out = torch.tensor([d[1] for d in data_out], dtype=torch.float32)
    #     model_out, likelihood_out = train_gp_model(train_x_out, train_y_out)
    #     V[t][1] = model_out
    # else:
    #     V[t][1] = V[t + 1][1]
    
    # Step 2e: Train GPR models for inside and outside NTR
    print("Step 2e: Train GPR models for inside and outside NTR")
    if data_in:
        train_x_in = torch.tensor(np.array([d[0] for d in data_in]), dtype=torch.float32)
        print(f"train_x_in done ")
        train_y_in = torch.tensor([d[1] for d in data_in], dtype=torch.float32)
        print(f"train_y_in done ")
        model_in, likelihood_in = train_gp_model(train_x_in, train_y_in)
        V[t][0] = model_in
        print(f"train gp model_in done ")
        # Apply Bayesian Active Learning to add new high-uncertainty points
        new_samples_in = bayesian_active_learning(model_in, likelihood_in, X_t, num_new_points=15)
        for sample in new_samples_in:
            delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
                D, sample, V[t + 1][0], V[t + 1][1], t, T, beta, gamma, tau, Rf, mu, Sigma, convex_hull=NTRs[t]
            )
            if delta_plus is None:
                continue
            v_i_t = bellman_equation(V[t + 1][0], V[t + 1][1], sample, 
                                        torch.tensor(delta_plus), torch.tensor(delta_minus), beta, gamma, tau, Rf, convex_hull=NTRs[t])
            data_in.append((sample.detach().cpu().numpy(), v_i_t.item()))
        print(f"new_samples_in done")
    
    # Problem: Right now everything is being appended to data_out, so data_in is empty
    if data_out:
        train_x_out = torch.tensor(np.array([d[0] for d in data_out]), dtype=torch.float32)
        train_y_out = torch.tensor([d[1] for d in data_out], dtype=torch.float32)
        model_out, likelihood_out = train_gp_model(train_x_out, train_y_out)
        V[t][1] = model_out

        # Apply Bayesian Active Learning to add new high-uncertainty points
        new_samples_out = bayesian_active_learning(model_out, likelihood_out, X_t, num_new_points=15)
        for sample in new_samples_out:
            delta_plus, delta_minus, delta, omega_i_t, b_t = solve_bellman_with_ipopt(
                D, sample, V[t + 1][0], V[t + 1][1], t, T, beta, gamma, tau, Rf, mu, Sigma, convex_hull=NTRs[t]
            )
            if delta_plus is None:
                continue
            v_i_t = bellman_equation(V[t + 1][0], V[t + 1][1], sample, 
                                        torch.tensor(delta_plus), torch.tensor(delta_minus), beta, gamma, tau, Rf, convex_hull=NTRs[t])
            data_out.append((sample.detach().cpu().numpy(), v_i_t.item()))

    # Retrain GPR models with the new data
    if data_in:
        train_x_in = torch.tensor(np.array([d[0] for d in data_in]), dtype=torch.float32)
        train_y_in = torch.tensor([d[1] for d in data_in], dtype=torch.float32)
        model_in, likelihood_in = train_gp_model(train_x_in, train_y_in)
        V[t][0] = model_in

    if data_out:
        train_x_out = torch.tensor(np.array([d[0] for d in data_out]), dtype=torch.float32)
        train_y_out = torch.tensor([d[1] for d in data_out], dtype=torch.float32)
        model_out, likelihood_out = train_gp_model(train_x_out, train_y_out)
        V[t][1] = model_out    

Time step 9
Step 2a: Approximate NTR
Total number of variables............................:        4
                     variables with only lower bounds:        0
                variables with lower and upper bounds:        4
                     variables with only upper bounds:        0
Total number of equality constraints.................:        0
Total number of inequality constraints...............:        5
        inequality constraints with only lower bounds:        5
   inequality constraints with lower and upper bounds:        0
        inequality constraints with only upper bounds:        0


Number of Iterations....: 10

                                   (scaled)                 (unscaled)
Objective...............:   5.1414375150411895e-01    3.9389858245849602e+01
Dual infeasibility......:   9.0828122800701294e-10    6.9585731060422753e-08
Constraint violation....:   0.0000000000000000e+00    0.0000000000000000e+00
Variable bound violation:   0.0000000000000000e+00   

RuntimeError: Index put requires the source and destination dtypes match, got Float for the destination and Double for the source.

In [88]:

def is_in_ntr(x, convex_hull, delta_plus=None, delta_minus=None, epsilon_ntr=1e-4):
    """
    Determines whether each point in x is inside the NTR.

    Args:
        x: State points, tensor of shape [batch_size, D]
        convex_hull: ConvexHull object representing the NTR
        delta_plus: Purchase adjustments, tensor of shape [batch_size, D] (optional)
        delta_minus: Sale adjustments, tensor of shape [batch_size, D] (optional)
        epsilon_ntr: Tolerance for the NTR

    Returns:
        in_ntr: Boolean tensor of shape [batch_size], True if inside NTR
    """
    if convex_hull is None:
        return torch.zeros(x.size(0), dtype=torch.bool, device=x.device)

    # Ensure we are using tensors throughout the computation
    x = x.detach()  # Ensure no gradients are tracked
    device = x.device

    # Extract convex hull equations and perform tensor operations
    equations_A = torch.tensor(convex_hull.equations[:, :-1], dtype=torch.float32, device=device)
    equations_b = torch.tensor(convex_hull.equations[:, -1], dtype=torch.float32, device=device)

    inequalities = torch.matmul(x, equations_A.T) + equations_b.unsqueeze(0)  # Shape: [batch_size, num_constraints]
    in_convex_hull = torch.all(inequalities <= epsilon_ntr, dim=1)  # Shape: [batch_size]

    if delta_plus is not None and delta_minus is not None:
        delta = delta_plus - delta_minus
        delta_policy = torch.all(torch.abs(delta) < epsilon_ntr, dim=-1)  # Shape: [batch_size]
        return torch.logical_or(in_convex_hull, delta_policy)  # No need to convert to NumPy

    return in_convex_hull  # All operations are now in PyTorch

is_in_ntr(x=torch.tensor([[0.5, 0.5]]), convex_hull=None, delta_plus=torch.tensor([[0.5, 0.5]]), delta_minus=torch.tensor([[0.5, 0.5]]))

tensor([False])

In [39]:
# Parameters
T = 10  # Time horizon
D = 2  # Number of risky assets
r = 0.02  # Risk-free return in pct.
Rf = np.exp(r)  # Risk-free return
Rf = r  # Risk-free return
tau = 1.00125  # Transaction cost rate
beta = 0.975  # Discount factor
gamma = 3.0  # Risk aversion coefficient

# Risky assets - deterministic
mu = np.array([0.07, 0.07])
Sigma = np.array([[0.2, 0], [0, 0.2]])

solve_bellman_with_ipopt(D, torch.tensor([0.4, 0.4]), V[0][0], V[0][1], 9, T, beta, gamma, tau, Rf, mu, Sigma)
#delta_plus_opt, delta_minus_opt, delta_opt, omega_i_t, bt

Best solution found. Point tensor([0.4000, 0.4000]),  Delta+: [0.60413131 0.6041313 ], Delta-: [0.5041313  0.50413128], Delta: [0.10000001 0.10000002], Omega: [0.50000001 0.50000003], bt: 0.0


(array([0.60413131, 0.6041313 ]),
 array([0.5041313 , 0.50413128]),
 array([0.10000001, 0.10000002]),
 array([0.50000001, 0.50000003]),
 0.0)

In [85]:
def test_bond_holdings(xt, delta_plus, delta_minus, tau):
    bt = 1.0 - torch.sum(xt + delta_minus - delta_plus) - torch.sum(tau * delta_plus - tau * delta_minus)
    # bt = torch.clamp(bt, min=0.0, max = 1.0)
    return bt, torch.sum(- tau * delta_plus - tau * delta_minus)

def test_state_dynamics(xt, delta_plus, delta_minus, Rt, bt, Rf):
    transaction_costs = torch.sum(tau * delta_plus - tau * delta_minus)
    pi_t1 = bt * Rf + torch.sum((xt + delta_plus - delta_minus) * Rt) - transaction_costs
    pi_t1 = torch.clamp(pi_t1, min=1e-8)  # Increased from 1e-8
    xt1 = ((xt + delta_plus - delta_minus) * Rt) / pi_t1.unsqueeze(-1)
    return pi_t1, xt1 # Shapes: [batch_size], [batch_size, D]
    # return np.shape(pi_t1), np.shape(xt1) # Shapes: [batch_size], [batch_size, D]


# test_bond_holdings(torch.tensor([0.0, 0.0]), torch.tensor([0.1, 0.1]), torch.tensor([0.1, 0.1]), 1.0)
test_state_dynamics(torch.tensor([0.0, 0.0]), torch.tensor([0.1, 0.1]), torch.tensor([0.0, 0.0]), torch.tensor([0.07, 0.07]), 1.0, 0.02)


(tensor(0.0338), tensor([0.2071, 0.2071]))

In [66]:
torch.sum(torch.tensor([0.001 , 0.001]))

tensor(0.0020)

In [None]:
def test_state_dynamics_wealth(xt, delta_plus, delta_minus, Rt, bt, Rf):
    delta = delta_plus - delta_minus
    transaction_costs = tau * torch.sum(delta_plus + delta_minus, dim=-1)
    pi_t1 = bt * Rf + torch.sum((xt + delta) * Rt, dim=-1)
    pi_t1 = torch.clamp(pi_t1, min=1e-4)  # Increased from 1e-8
    xt1 = ((xt + delta) * Rt) / pi_t1.unsqueeze(-1)
    return pi_t1, xt1 ,transaction_costs # Shapes: [batch_size], [batch_size, D]

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

hull = convex_hull

if hull is not None:
    vertices = hull.points  # Vertices are stored in the 'points' attribute of ConvexHull
    D = vertices.shape[1]  # Dimension of the state space
    plt.figure()

    if D == 2:
        # 2D plot
        for simplex in hull.simplices:
            plt.plot(vertices[simplex, 0], vertices[simplex, 1], 'k-')
        plt.fill(vertices[hull.vertices, 0], vertices[hull.vertices, 1], 'lightgray', alpha=0.5)
        plt.scatter(vertices[:, 0], vertices[:, 1], color='red')  # Plot the vertices
        plt.title(f'NTR at time {t}')
        plt.xlabel('State dimension 1')
        plt.ylabel('State dimension 2')
        plt.xlim(0, 1)
        plt.ylim(0, 1)

    
    plt.show()

else:
    print(f"Not enough vertices to form an NTR at time {t}")

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

def plot_ntr_at_time(NTR_history, t):
    hull = NTR_history[t]
    
    if hull is not None:
        vertices = hull.points  # Vertices are stored in the 'points' attribute of ConvexHull
        D = vertices.shape[1]  # Dimension of the state space
        plt.figure()

        if D == 2:
            # 2D plot
            for simplex in hull.simplices:
                plt.plot(vertices[simplex, 0], vertices[simplex, 1], 'k-')
            plt.fill(vertices[hull.vertices, 0], vertices[hull.vertices, 1], 'lightgray', alpha=0.5)
            plt.scatter(vertices[:, 0], vertices[:, 1], color='red')  # Plot the vertices
            plt.title(f'NTR at time {t}')
            plt.xlabel('State dimension 1')
            plt.ylabel('State dimension 2')
            plt.xlim(0, 1)
            plt.ylim(0, 1)
        
        elif D == 3:
            # 3D plot
            ax = plt.axes(projection='3d')
            ax.scatter(vertices[:, 0], vertices[:, 1], vertices[:, 2], color='red')
            ax.add_collection3d(Poly3DCollection(vertices[hull.simplices], facecolors='lightgray', edgecolors='k', alpha=0.4))
            ax.set_xlabel('State dimension 1')
            ax.set_ylabel('State dimension 2')
            ax.set_zlabel('State dimension 3')
            plt.title(f'NTR at time {t}')
            ax.set_xlim(0, 1)
            ax.set_ylim(0, 1)
            ax.set_zlim(0, 1)
        
        plt.show()

    else:
        print(f"Not enough vertices to form an NTR at time {t}")

# Example: Plot NTR at time t=1
plot_ntr_at_time(NTRs, 5)