From 9e3a8216a6639f41e3430773aa06f82ff390f797 Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Wed, 9 Jul 2025 13:05:24 +0200 Subject: [PATCH 01/14] update to TrustRegion --- popt/loop/optimize.py | 74 ++++++------- popt/update_schemes/linesearch.py | 17 ++- popt/update_schemes/trust_region.py | 160 +++++++++++++--------------- 3 files changed, 128 insertions(+), 123 deletions(-) diff --git a/popt/loop/optimize.py b/popt/loop/optimize.py index 43f99c6..6fa80a4 100644 --- a/popt/loop/optimize.py +++ b/popt/loop/optimize.py @@ -166,48 +166,48 @@ def run_loop(self): self.save() # Check if max iterations was reached - if self.iteration > self.max_iter: + if self.iteration >= self.max_iter: self.optimize_result['message'] = 'Iterations stopped due to max iterations reached!' else: if not isinstance(self.msg, str): self.msg = '' self.optimize_result['message'] = self.msg - # Logging some info to screen - logger.info(' Optimization converged in %d iterations ', self.iteration-1) - logger.info(' Optimization converged with final obj_func = %.4f', - np.mean(self.optimize_result['fun'])) - logger.info(' Total number of function evaluations = %d', self.optimize_result['nfev']) - logger.info(' Total number of jacobi evaluations = %d', self.optimize_result['njev']) - if self.start_time is not None: - logger.info(' Total elapsed time = %.2f minutes', (time.perf_counter()-self.start_time)/60) - logger.info(' ============================================') - - # Test for convergence of outer epf loop - epf_not_converged = False - if self.epf: - if self.epf_iteration > self.epf['max_epf_iter']: # max epf_iterations set to 10 - logger.info(f' -----> EPF-EnOpt: maximum epf iterations reached') # print epf info - break - p = np.abs(previous_state-self.mean_state) / (np.abs(previous_state) + 1.0e-9) - conv_crit = self.epf['conv_crit'] - if np.any(p > conv_crit): - epf_not_converged = True - previous_state = self.mean_state - self.epf['r'] *= self.epf['r_factor'] # increase penalty factor - self.obj_func_tol *= self.epf['tol_factor'] # decrease tolerance - self.obj_func_values = self.fun(self.mean_state, **self.epf) - self.iteration = 0 - self.epf_iteration += 1 - optimize_result = ot.get_optimize_result(self) - ot.save_optimize_results(optimize_result) - self.nfev += 1 - self.iteration = +1 - r = self.epf['r'] - logger.info(f' -----> EPF-EnOpt: {self.epf_iteration}, {r} (outer iteration, penalty factor)') # print epf info - else: - logger.info(f' -----> EPF-EnOpt: converged, no variables changed more than {conv_crit*100} %') # print epf info - final_obj_no_penalty = str(round(float(self.fun(self.mean_state)),4)) - logger.info(f' -----> EPF-EnOpt: objective value without penalty = {final_obj_no_penalty}') # print epf info + # Logging some info to screen + logger.info(' Optimization converged in %d iterations ', self.iteration-1) + logger.info(' Optimization converged with final obj_func = %.4f', + np.mean(self.optimize_result['fun'])) + logger.info(' Total number of function evaluations = %d', self.optimize_result['nfev']) + logger.info(' Total number of jacobi evaluations = %d', self.optimize_result['njev']) + if self.start_time is not None: + logger.info(' Total elapsed time = %.2f minutes', (time.perf_counter()-self.start_time)/60) + logger.info(' ============================================') + + # Test for convergence of outer epf loop + epf_not_converged = False + if self.epf: + if self.epf_iteration > self.epf['max_epf_iter']: # max epf_iterations set to 10 + logger.info(f' -----> EPF-EnOpt: maximum epf iterations reached') # print epf info + break + p = np.abs(previous_state-self.mean_state) / (np.abs(previous_state) + 1.0e-9) + conv_crit = self.epf['conv_crit'] + if np.any(p > conv_crit): + epf_not_converged = True + previous_state = self.mean_state + self.epf['r'] *= self.epf['r_factor'] # increase penalty factor + self.obj_func_tol *= self.epf['tol_factor'] # decrease tolerance + self.obj_func_values = self.fun(self.mean_state, **self.epf) + self.iteration = 0 + self.epf_iteration += 1 + optimize_result = ot.get_optimize_result(self) + ot.save_optimize_results(optimize_result) + self.nfev += 1 + self.iteration = +1 + r = self.epf['r'] + logger.info(f' -----> EPF-EnOpt: {self.epf_iteration}, {r} (outer iteration, penalty factor)') # print epf info + else: + logger.info(f' -----> EPF-EnOpt: converged, no variables changed more than {conv_crit*100} %') # print epf info + final_obj_no_penalty = str(round(float(self.fun(self.mean_state)),4)) + logger.info(f' -----> EPF-EnOpt: objective value without penalty = {final_obj_no_penalty}') # print epf info def save(self): """ diff --git a/popt/update_schemes/linesearch.py b/popt/update_schemes/linesearch.py index b8fe92f..956cd9c 100644 --- a/popt/update_schemes/linesearch.py +++ b/popt/update_schemes/linesearch.py @@ -13,6 +13,7 @@ # Internal imports from popt.misc_tools import optim_tools as ot from popt.loop.optimize import Optimize +from popt.update_schemes import optimizers def LineSearch(fun, x, jac, method='GD', hess=None, args=(), bounds=None, callback=None, **options): ''' @@ -203,7 +204,7 @@ def __init__(self, fun, x, jac, method='GD', hess=None, args=(), bounds=None, c self.saveit = options.get('saveit', True) # Check method - valid_methods = ['GD', 'BFGS', 'Newton'] + valid_methods = ['GD', 'BFGS', 'Newton', 'Adam'] if not self.method in valid_methods: raise ValueError(f"'{self.method}' is not a valid method. Valid methods are: {valid_methods}") @@ -373,6 +374,20 @@ def calc_update(self, iter_resamp=0): pk = - np.matmul(self.Hk_inv, self.jk) if self.method == 'Newton': pk = - np.matmul(la.inv(self.Hk), self.jk) + if self.method == 'Adam': + if self.iteration == 1: + pk = - self.jk + else: + optimizer = optimizers.Adam(1) + pk = - optimizer.apply_update(np.zeros_like(self.xk), self.jk, iter=self.iteration-1)[1] + optimizer.restore_parameters() + + # remove components that point out of the hybercube given by [lb,ub] + lb = np.array(self.bounds)[:, 0] + ub = np.array(self.bounds)[:, 1] + for i in range(self.xk.size): + if (self.xk[i] <= lb[i] and pk[i] < 0) or (self.xk[i] >= ub[i] and pk[i] > 0): + pk[i] = 0 # Set step_size step_size = self._set_step_size(pk) diff --git a/popt/update_schemes/trust_region.py b/popt/update_schemes/trust_region.py index 5272d75..9ed6e5e 100644 --- a/popt/update_schemes/trust_region.py +++ b/popt/update_schemes/trust_region.py @@ -11,8 +11,12 @@ from popt.misc_tools import optim_tools as ot from popt.loop.optimize import Optimize +# Impors from scipy +from scipy.optimize._trustregion_ncg import CGSteihaugSubproblem +from scipy.optimize._trustregion_exact import IterativeSubproblem -def TrustRegion(fun, x, jac, hess, args=(), bounds=None, callback=None, **options): + +def TrustRegion(fun, x, jac, hess, method='iterative', args=(), bounds=None, callback=None, **options): ''' Trust region optimization algorithm. @@ -29,6 +33,10 @@ def TrustRegion(fun, x, jac, hess, args=(), bounds=None, callback=None, **option hess : callable Hessian of objective function. The calling signature is `hess(x, *args)`. + + method : str, optional + Method to use for solving the trust-region subproblem. Options are 'iterative' or 'CG-Steihaug'. + Default is 'iterative'. args : tuple, optional Extra arguments passed to the objective function and its derivatives (Jacobian, Hessian). @@ -110,12 +118,12 @@ def TrustRegion(fun, x, jac, hess, args=(), bounds=None, callback=None, **option - nfev: number of function evaluations - njev: number of jacobian evaluations ''' - tr_obj = TrustRegionClass(fun, x, jac, hess, args, bounds, callback, **options) + tr_obj = TrustRegionClass(fun, x, jac, hess, method, args, bounds, callback, **options) return tr_obj.optimize_result class TrustRegionClass(Optimize): - def __init__(self, fun, x, jac, hess, args=(), bounds=None, callback=None, **options): + def __init__(self, fun, x, jac, hess, method='iterative', args=(), bounds=None, callback=None, **options): # Initialize the parent class super().__init__(**options) @@ -125,6 +133,7 @@ def __init__(self, fun, x, jac, hess, args=(), bounds=None, callback=None, **opt self.xk = x self.jacobian = jac self.hessian = hess + self.method = method self.args = args self.bounds = bounds self.options = options @@ -144,12 +153,18 @@ def __init__(self, fun, x, jac, hess, args=(), bounds=None, callback=None, **opt self.resample = options.get('resample', 3) self.saveit = options.get('saveit', True) self.rho_tol = options.get('rho_tol', 1e-6) - self.eta1 = options.get('eta1', 0.001) - self.eta2 = options.get('eta2', 0.1) - self.gam1 = options.get('gam1', 0.7) - self.gam2 = options.get('gam2', 1.5) + self.eta1 = options.get('eta1', 0.1) # reduce raduis if rho < 10% + self.eta2 = options.get('eta2', 0.5) # increase radius if rho > 50% + self.gam1 = options.get('gam1', 0.5) # reduce by 50% + self.gam2 = options.get('gam2', 1.5) # increase by 50% self.rho = 0.0 + # Check if method is valid + if self.method not in ['iterative', 'CG-Steihaug']: + self.method = 'iterative' + raise ValueError(f'Method {self.method} is not valid!. Method is set to "iterative"') + + if not self.restart: self.start_time = time.perf_counter() @@ -242,17 +257,66 @@ def _log(self, msg): if self.logger is not None: self.logger.info(msg) + def solve_subproblem(self, g, B, delta): + """ + Solve the trust region subproblem using the iterative method. + (A big thanks to copilot for the help with this implementation) + + Parameters: + g (numpy.ndarray): Gradient vector at the current point. + B (numpy.ndarray): Hessian matrix at the current point. + delta (float): Trust region radius. + + Returns: + pk (numpy.ndarray): Step direction. + pk_hits_boundary (bool): True if the step hits the boundary of the trust region. + """ + + # Define quadratic model + quad = lambda p: self.fk + np.dot(g,p) + np.dot(p,np.dot(B,p))/2 + + + if self.method == 'iterative': + subproblem = IterativeSubproblem( + x=self.xk, + fun=quad, + jac=lambda _: g, + hess=lambda _: B, + ) + pk, pk_hits_boundary = subproblem.solve(tr_radius=delta) + + elif self.method == 'CG-Steihaug': + subproblem = CGSteihaugSubproblem( + x=self.xk, + fun=quad, + jac=lambda _: g, + hess=lambda _: B, + ) + pk, pk_hits_boundary = subproblem.solve(trust_radius=delta) + + else: + raise ValueError(f"Method {self.method} is not valid!") + + return pk, pk_hits_boundary + + def calc_update(self, iter_resamp=0): # Initialize variables for this step success = True # Solve subproblem - self._log('Solving trust region subproblem using the CG-Steihaug method') - sk = self.solve_sub_problem_CG_Steihaug(self.jk, self.Hk, self.trust_radius) + self._log('Solving trust region subproblem') + sk, hits_boundary = self.solve_subproblem(self.jk, self.Hk, self.trust_radius) + + # truncate sk to respect bounds + if self.bounds is not None: + lb = np.array(self.bounds)[:, 0] + ub = np.array(self.bounds)[:, 1] + sk = np.clip(sk, lb - self.xk, ub - self.xk) # Calculate the actual function value - xk_new = ot.clip_state(self.xk + sk, self.bounds) + xk_new = self.xk + sk fun_new = self._fun(xk_new) # Calculate rho @@ -283,7 +347,7 @@ def calc_update(self, iter_resamp=0): # update the trust region radius delta_old = self.trust_radius - if self.rho >= self.eta2: + if (self.rho >= self.eta2) and hits_boundary: delta_new = min(self.gam2*delta_old, self.trust_radius_max) elif self.eta1 <= self.rho < self.eta2: delta_new = delta_old @@ -328,80 +392,6 @@ def calc_update(self, iter_resamp=0): return success - - def solve_sub_problem_CG_Steihaug(self, g, B, delta): - """ - Solve the trust region subproblem using Steihaug's Conjugate Gradient method. - (A big thanks to copilot for the help with this implementation) - - Parameters: - g (numpy.ndarray): Gradient vector at the current point. - B (numpy.ndarray): Hessian matrix at the current point. - delta (float): Trust region radius. - tol (float): Tolerance for convergence. - max_iter (int): Maximum number of iterations. - - Returns: - p (numpy.ndarray): Solution vector. - """ - z = np.zeros_like(g) - r = g - d = -g - - # Set same default tolerance as scipy - tol = min(0.5, la.norm(g)**2)*la.norm(g) - - if la.norm(g) <= tol: - return z - - # make quadratic model - mc = lambda s: self.fk + np.dot(g,s) + np.dot(s,np.dot(B,s))/2 - - while True: - dBd = np.dot(d, np.dot(B,d)) - - if dBd <= 0: - # Solve the quadratic equation: (p + tau*d)**2 = delta**2 - tau_lo, tau_hi = self.get_tau_at_delta(z, d, delta) - p_lo = z + tau_lo*d - p_hi = z + tau_hi*d - - if mc(p_lo) < mc(p_hi): - return p_lo - else: - return p_hi - - alpha = np.dot(r,r)/dBd - z_new = z + alpha*d - - if la.norm(z_new) >= delta: - # Solve the quadratic equation: (p + tau*d)**2 = delta**2, for tau > 0 - _ , tau = self.get_tau_at_delta(z, d, delta) - return z + tau * d - - r_new = r + alpha*np.dot(B,d) - - if la.norm(r_new) < tol: - return z_new - - beta = np.dot(r_new,r_new)/np.dot(r,r) - d = -r_new + beta*d - r = r_new - z = z_new - - - def get_tau_at_delta(self, p, d, delta): - """ - Solve the quadratic equation: (p + tau*d)**2 = delta**2, for tau > 0 - """ - a = np.dot(d,d) - b = 2*np.dot(p,d) - c = np.dot(p,p) - delta**2 - tau_lo = -b/(2*a) - np.sqrt(b**2 - 4*a*c)/(2*a) - tau_hi = -b/(2*a) + np.sqrt(b**2 - 4*a*c)/(2*a) - return tau_lo, tau_hi - - From 400c79a6c571d06bca3ca9a57f6b2fb8953c2abc Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Wed, 16 Jul 2025 14:13:39 +0200 Subject: [PATCH 02/14] some design changes to TrustRegion --- popt/update_schemes/trust_region.py | 99 +++++++++++++++++++---------- 1 file changed, 67 insertions(+), 32 deletions(-) diff --git a/popt/update_schemes/trust_region.py b/popt/update_schemes/trust_region.py index 9ed6e5e..cd616f9 100644 --- a/popt/update_schemes/trust_region.py +++ b/popt/update_schemes/trust_region.py @@ -44,9 +44,10 @@ def TrustRegion(fun, x, jac, hess, method='iterative', args=(), bounds=None, cal bounds : sequence, optional Bounds for variables. Each element of the sequence must be a tuple of two scalars, representing the lower and upper bounds for that variable. Use None for one of the bounds if there are no bounds. + Bounds are handle by clipping the state to the bounds before evaluating the objective function and its derivatives. callback: callable, optional - A callable called after each successful iteration. The class instance of LineSearch + A callable called after each successful iteration. The class instance is passed as the only argument to the callback function: callback(self) **options : keyword arguments, optional @@ -66,6 +67,9 @@ def TrustRegion(fun, x, jac, hess, method='iterative', args=(), bounds=None, cal Minimum trust-region radius. Optimization is terminated if trust_radius = trust_radius_min. Default is trust_radius/100. + trust_radius_cuts: int + Number of allowed trust-region radius reductions if a step is not successful. Default is 4. + rho_tol: float Tolerance for rho (ratio of actual to predicted reduction). Default is 1e-6. @@ -81,6 +85,13 @@ def TrustRegion(fun, x, jac, hess, method='iterative', args=(), bounds=None, cal eta2 = 0.1 \n gam1 = 0.7 \n gam2 = 1.5 \n + + saveit: bool + If True, save the optimization results to a file. Default is True. + + convergence_criteria: callable + A callable that takes the current optimization object as an argument and returns True if the optimization should stop. + It can be used to implement custom convergence criteria. Default is None. save_folder: str Name of folder to save the results to. Defaul is ./ (the current directory). @@ -94,9 +105,9 @@ def TrustRegion(fun, x, jac, hess, method='iterative', args=(), bounds=None, cal hess0: ndarray Hessian value of the initial control. - resample: int - Number of jacobian re-computations allowed if a line search fails. Default is 4. - (useful if jacobian is stochastic) + resample: bool + If True, resample the Jacobian and Hessian if a step is not successful. Default is False. + (Only makes sense if the Jacobian and Hessian are stochastic). savedata: list[str] Further specification of which class variables to save to the result files. @@ -144,13 +155,21 @@ def __init__(self, fun, x, jac, hess, method='iterative', args=(), bounds=None, else: self.callback = None + # Custom convergence criteria (callable) + convergence_criteria = options.get('convergence_criteria', None) + if callable(convergence_criteria): + self.convergence_criteria = self.convergence_criteria + else: + self.convergence_criteria = None + # Set options for trust-region radius - self.trust_radius = options.get('trust_radius', 1.0) - self.trust_radius_max = options.get('trust_radius_max', 10*self.trust_radius) - self.trust_radius_min = options.get('trust_radius_min', self.trust_radius/100) + self.trust_radius = options.get('trust_radius', 1.0) + self.trust_radius_max = options.get('trust_radius_max', 10*self.trust_radius) + self.trust_radius_min = options.get('trust_radius_min', self.trust_radius/100) + self.trust_radius_cuts = options.get('trust_radius_cuts', 4) # Set other options - self.resample = options.get('resample', 3) + self.resample = options.get('resample', False) self.saveit = options.get('saveit', True) self.rho_tol = options.get('rho_tol', 1e-6) self.eta1 = options.get('eta1', 0.1) # reduce raduis if rho < 10% @@ -222,15 +241,17 @@ def _hess(self, x): return h def update_results(self): - res = {'fun': self.fk, - 'x': self.xk, - 'jac': self.jk, - 'hess': self.Hk, - 'nfev': self.nfev, - 'njev': self.njev, - 'nit': self.iteration, - 'trust_radius': self.trust_radius, - 'save_folder': self.options.get('save_folder', './')} + res = { + 'fun': self.fk, + 'x': self.xk, + 'jac': self.jk, + 'hess': self.Hk, + 'nfev': self.nfev, + 'njev': self.njev, + 'nit': self.iteration, + 'trust_radius': self.trust_radius, + 'save_folder': self.options.get('save_folder', './') + } for a, arg in enumerate(self.args): res[f'args[{a}]'] = arg @@ -300,7 +321,7 @@ def solve_subproblem(self, g, B, delta): return pk, pk_hits_boundary - def calc_update(self, iter_resamp=0): + def calc_update(self, inner_iter=0): # Initialize variables for this step success = True @@ -321,7 +342,7 @@ def calc_update(self, iter_resamp=0): # Calculate rho actual_reduction = self.fk - fun_new - predicted_reduction = - np.dot(self.jk, sk) - 0.5*np.dot(sk, np.dot(self.Hk, sk)) + predicted_reduction = - np.dot(self.jk, sk) - np.dot(sk, np.dot(self.Hk, sk))/2 self.rho = actual_reduction/predicted_reduction if self.rho > self.rho_tol: @@ -355,12 +376,19 @@ def calc_update(self, iter_resamp=0): delta_new = self.gam1*delta_old # Log new trust-radius - self.trust_radius = delta_new + self.trust_radius = np.clip(delta_new, self.trust_radius_min, self.trust_radius_max) if not (delta_old == delta_new): self._log(f'Trust-radius updated: {delta_old:<10.4e} --> {delta_new:<10.4e}') + # Check for custom convergence + if callable(self.convergence_criteria): + if self.convergence_criteria(self): + self._log('Custom convergence criteria met. Stopping optimization.') + success = False + return success + # check for convergence - if (self.trust_radius < self.trust_radius_min) or (self.iteration==self.max_iter): + if self.iteration==self.max_iter: success = False else: # Calculate the jacobian and hessian @@ -371,21 +399,28 @@ def calc_update(self, iter_resamp=0): self.iteration += 1 else: - if iter_resamp < self.resample: - - iter_resamp += 1 + if inner_iter < self.trust_radius_cuts: + + # Log the failure + self._log(f'Step not successful: rho < {self.rho_tol:<10.4e}') + + # Reduce trust region radius to 75% of current value + self._log('Reducing trust-radius by 75%') + self.trust_radius = 0.25*self.trust_radius - # Calculate the jacobian and hessian - self._log('Resampling gradient and hessian') - self.jk = self._jac(self.xk) - self.Hk = self._hess(self.xk) + if self.trust_radius < self.trust_radius_min: + self._log(f'Trust radius {self.trust_radius} is below minimum {self.trust_radius_min}. Stopping optimization.') + success = False + return success - # Reduce trust region radius to 50% of current value - self._log('Reducing trust-radius by 50%') - self.trust_radius = 0.5*self.trust_radius + # Check for resampling of Jac and Hess + if self.resample: + self._log('Resampling gradient and hessian') + self.jk = self._jac(self.xk) + self.Hk = self._hess(self.xk) # Recursivly call function - success = self.calc_update(iter_resamp=iter_resamp) + success = self.calc_update(inner_iter=inner_iter+1) else: success = False From 98211a043dd0469c9c03eb89921693ef46ffdac0 Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Tue, 5 Aug 2025 13:57:56 +0200 Subject: [PATCH 03/14] decoupled GenOpt from Ensemble --- popt/update_schemes/linesearch.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/popt/update_schemes/linesearch.py b/popt/update_schemes/linesearch.py index 956cd9c..7bf1081 100644 --- a/popt/update_schemes/linesearch.py +++ b/popt/update_schemes/linesearch.py @@ -204,7 +204,7 @@ def __init__(self, fun, x, jac, method='GD', hess=None, args=(), bounds=None, c self.saveit = options.get('saveit', True) # Check method - valid_methods = ['GD', 'BFGS', 'Newton', 'Adam'] + valid_methods = ['GD', 'BFGS', 'Newton'] if not self.method in valid_methods: raise ValueError(f"'{self.method}' is not a valid method. Valid methods are: {valid_methods}") @@ -374,13 +374,6 @@ def calc_update(self, iter_resamp=0): pk = - np.matmul(self.Hk_inv, self.jk) if self.method == 'Newton': pk = - np.matmul(la.inv(self.Hk), self.jk) - if self.method == 'Adam': - if self.iteration == 1: - pk = - self.jk - else: - optimizer = optimizers.Adam(1) - pk = - optimizer.apply_update(np.zeros_like(self.xk), self.jk, iter=self.iteration-1)[1] - optimizer.restore_parameters() # remove components that point out of the hybercube given by [lb,ub] lb = np.array(self.bounds)[:, 0] From 6152b7f6e0822fc91c79393c76a25e1589d27d5d Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Tue, 5 Aug 2025 14:00:39 +0200 Subject: [PATCH 04/14] decoupled GenOpt from Ensemble --- popt/loop/ensemble.py | 7 -- popt/loop/{base.py => ensemble_base.py} | 108 +++++++++++++++--------- popt/loop/generalized_ensemble.py | 31 ++++--- 3 files changed, 82 insertions(+), 64 deletions(-) rename popt/loop/{base.py => ensemble_base.py} (57%) diff --git a/popt/loop/ensemble.py b/popt/loop/ensemble.py index 77ff998..507ad8c 100644 --- a/popt/loop/ensemble.py +++ b/popt/loop/ensemble.py @@ -10,7 +10,6 @@ from popt.misc_tools import optim_tools as ot from pipt.misc_tools import analysis_tools as at from ensemble.ensemble import Ensemble as PETEnsemble -from popt.loop.extensions import GenOptExtension class Ensemble(PETEnsemble): @@ -137,12 +136,6 @@ def __set__variable(var_name=None, defalut=None): self.bias_weights = np.ones(self.num_samples) / self.num_samples # initialize with equal weights self.bias_points = None # this is the points used to estimate the bias correction - # Setup GenOpt - self.genopt = GenOptExtension(self.get_state(), - self.get_cov(), - func=self.function, - ne=self.num_samples) - def get_state(self): """ Returns diff --git a/popt/loop/base.py b/popt/loop/ensemble_base.py similarity index 57% rename from popt/loop/base.py rename to popt/loop/ensemble_base.py index 33d4497..2500fc2 100644 --- a/popt/loop/base.py +++ b/popt/loop/ensemble_base.py @@ -9,63 +9,81 @@ from popt.misc_tools import optim_tools as ot from pipt.misc_tools import analysis_tools as at from ensemble.ensemble import Ensemble as PETEnsemble +from simulator.simple_models import noSimulation -class EnsembleOptimizationBase(PETEnsemble): +class EnsembleOptimizationBaseClass(PETEnsemble): ''' Base class for the popt ensemble ''' - def __init__(self, kwargs_ens, sim, obj_func): + def __init__(self, options, simulator, objective): ''' Parameters ---------- - kwargs_ens : dict + options : dict Options for the ensemble class - sim : callable - The forward simulator (e.g. flow) + simulator : callable + The forward simulator (e.g. flow). If None, no simulation is performed. - obj_func : callable + objective : callable The objective function (e.g. npv) ''' + if simulator is None: + sim = noSimulation() + else: + sim = simulator # Initialize PETEnsemble - super().__init__(kwargs_ens, sim) - - self.save_prediction = kwargs_ens.get('save_prediction', None) - self.num_models = kwargs_ens.get('num_models', 1) - self.transform = kwargs_ens.get('transform', False) - self.num_samples = self.ne + super().__init__(options, sim) - # Get bounds and varaince - self.upper_bound = [] - self.lower_bound = [] + # Unpack some options + self.save_prediction = options.get('save_prediction', None) + self.num_models = options.get('num_models', 1) + self.transform = options.get('transform', False) + self.num_samples = self.ne + + # Define some variables + self.lb = [] + self.ub = [] self.bounds = [] self.cov = np.array([]) - for name in self.prior_info.keys(): - self.state[name] = np.asarray(self.prior_info[name]['mean']) - num_state_var = len(self.state[name]) - value_cov = self.prior_info[name]['variance'] * np.ones((num_state_var,)) - if 'limits' in self.prior_info[name].keys(): - lb = self.prior_info[name]['limits'][0] - ub = self.prior_info[name]['limits'][1] - self.lower_bound.append(lb) - self.upper_bound.append(ub) + + # Get bounds and varaince, and initialize state + for key in self.prior_info.keys(): + variable = self.prior_info[key] + + # mean + self.state[key] = np.asarray(variable['mean']) + + # Covariance + dim = self.state[key].size + cov = variable['variance']*np.ones(dim) + + if 'limits' in variable.keys(): + lb, ub = variable['limits'] + self.lb(lb) + self.ub(ub) + + # transform cov to [0, 1] if transform is True if self.transform: - value_cov = value_cov / (ub - lb)**2 - np.clip(value_cov, 0, 1, out=value_cov) - self.bounds += num_state_var*[(0, 1)] + cov = np.clip(cov/(ub - lb)**2, 0, 1, out=cov) + self.bounds += dim*[(0, 1)] else: - self.bounds += num_state_var*[(lb, ub)] - self.cov = np.append(self.cov, value_cov) + self.bounds += dim*[(lb, ub)] else: - self.bounds += num_state_var*[(None, None)] + self.bounds += dim*[(None, None)] + + # Add to covariance + self.cov = np.append(self.cov, cov) - - self._scale_state() + # Make cov full covariance matrix self.cov = np.diag(self.cov) + # Scale the state to [0, 1] if transform is True + self._scale_state() + # Set objective function (callable) - self.obj_func = obj_func + self.obj_func = objective # Objective function values self.state_func_values = None @@ -78,8 +96,13 @@ def get_state(self): x : numpy.ndarray Control vector as ndarray, shape (number of controls, number of perturbations) """ - x = ot.aug_optim_state(self.state, list(self.state.keys())) - return x + return ot.aug_optim_state(self.state, list(self.state.keys())) + + def vec_to_state(self, x): + """ + Converts a control vector to the internal state representation. + """ + return ot.update_optim_state(x, self.state, list(self.state.keys())) def get_bounds(self): """ @@ -112,7 +135,10 @@ def function(self, x, *args): else: self.ne = x.shape[1] - self.state = ot.update_optim_state(x, self.state, list(self.state.keys())) # go from nparray to dict + # convert x to state + self.state = self.vec_to_state(x) # go from nparray to dict + + # run the simulation self._invert_scale_state() # ensure that state is in [lb,ub] run_success = self.calc_prediction(save_prediction=self.save_prediction) # calculate flow data self._scale_state() # scale back to [0, 1] @@ -147,17 +173,17 @@ def _scale_state(self): """ Transform the internal state from [lb, ub] to [0, 1] """ - if self.transform and (self.upper_bound and self.lower_bound): + if self.transform and (self.lb and self.ub): for i, key in enumerate(self.state): - self.state[key] = (self.state[key] - self.lower_bound[i])/(self.upper_bound[i] - self.lower_bound[i]) + self.state[key] = (self.state[key] - self.lb[i])/(self.ub[i] - self.lb[i]) np.clip(self.state[key], 0, 1, out=self.state[key]) def _invert_scale_state(self): """ Transform the internal state from [0, 1] to [lb, ub] """ - if self.transform and (self.upper_bound and self.lower_bound): + if self.transform and (self.lb and self.ub): for i, key in enumerate(self.state): if self.transform: - self.state[key] = self.lower_bound[i] + self.state[key]*(self.upper_bound[i] - self.lower_bound[i]) - np.clip(self.state[key], self.lower_bound[i], self.upper_bound[i], out=self.state[key]) \ No newline at end of file + self.state[key] = self.lb[i] + self.state[key]*(self.ub[i] - self.lb[i]) + np.clip(self.state[key], self.lb[i], self.ub[i], out=self.state[key]) \ No newline at end of file diff --git a/popt/loop/generalized_ensemble.py b/popt/loop/generalized_ensemble.py index cacbe99..81809c2 100644 --- a/popt/loop/generalized_ensemble.py +++ b/popt/loop/generalized_ensemble.py @@ -10,33 +10,32 @@ # Internal imports from popt.misc_tools import optim_tools as ot from pipt.misc_tools import analysis_tools as at -from popt.loop.base import EnsembleOptimizationBase +from popt.loop.ensemble_base import EnsembleOptimizationBaseClass -class GeneralizedEnsemble(EnsembleOptimizationBase): +class GeneralizedEnsemble(EnsembleOptimizationBaseClass): - def __init__(self, kwargs_ens, sim, obj_func): + def __init__(self, options, simulator, objective): ''' Parameters ---------- - kwargs_ens : dict + options : dict Options for the ensemble class - sim : callable - The forward simulator (e.g. flow) + simulator : callable + The forward simulator (e.g. flow). If None, no simulation is performed. - obj_func : callable + objective : callable The objective function (e.g. npv) ''' - super().__init__(kwargs_ens, sim, obj_func) - - self.dim = self.get_state().size + super().__init__(options, simulator, objective) # construct corr matrix std = np.sqrt(np.diag(self.cov)) self.corr = self.cov/np.outer(std, std) + self.dim = std # choose marginal - marginal = kwargs_ens.get('marginal', 'Beta') + marginal = options.get('marginal', 'BetaMC') if marginal in ['Beta', 'BetaMC', 'Logistic', 'TruncGaussian', 'Gaussian']: @@ -45,7 +44,7 @@ def __init__(self, kwargs_ens, sim, obj_func): if marginal == 'Beta': self.margs = Beta() - self.theta = kwargs_ens.get('theta', np.array([[20.0, 20.0] for _ in range(self.dim)])) + self.theta = options.get('theta', np.array([[20.0, 20.0] for _ in range(self.dim)])) self.eps = self.var2eps() self.grad_scale = 1/(2*self.eps) self.hess_scale = 1/(4*self.eps**2) @@ -56,20 +55,20 @@ def __init__(self, kwargs_ens, sim, obj_func): var = np.diag(self.cov) self.margs = BetaMC(lb, ub, 0.1*np.sqrt(var[0])) default_theta = np.array([var_to_concentration(state[i], var[i], lb[i], ub[i]) for i in range(self.dim)]) - self.theta = kwargs_ens.get('theta', default_theta) + self.theta = options.get('theta', default_theta) elif marginal == 'Logistic': self.margs = Logistic() - self.theta = kwargs_ens.get('theta', self.margs.var_to_scale(np.diag(self.cov))) + self.theta = options.get('theta', self.margs.var_to_scale(np.diag(self.cov))) elif marginal == 'TruncGaussian': lb, ub = np.array(self.bounds).T self.margs = TruncGaussian(lb,ub) - self.theta = kwargs_ens.get('theta', np.sqrt(np.diag(self.cov))) + self.theta = options.get('theta', np.sqrt(np.diag(self.cov))) elif marginal == 'Gaussian': self.margs = Gaussian() - self.theta = kwargs_ens.get('theta', np.sqrt(np.diag(self.cov))) + self.theta = options.get('theta', np.sqrt(np.diag(self.cov))) def get_theta(self): return self.theta From 8ff8d5f20eab6cd1a672cd3073d65e8e7be9aaa6 Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Thu, 7 Aug 2025 10:19:10 +0200 Subject: [PATCH 05/14] Cleaned up code duplication and renamed some stuff --- popt/loop/ensemble_base.py | 104 ++++++---- .../{ensemble.py => ensemble_gaussian.py} | 188 +----------------- ...ed_ensemble.py => ensemble_generalized.py} | 0 popt/loop/extensions.py | 1 + 4 files changed, 71 insertions(+), 222 deletions(-) rename popt/loop/{ensemble.py => ensemble_gaussian.py} (70%) rename popt/loop/{generalized_ensemble.py => ensemble_generalized.py} (100%) diff --git a/popt/loop/ensemble_base.py b/popt/loop/ensemble_base.py index 2500fc2..951a2be 100644 --- a/popt/loop/ensemble_base.py +++ b/popt/loop/ensemble_base.py @@ -8,10 +8,10 @@ # Internal imports from popt.misc_tools import optim_tools as ot from pipt.misc_tools import analysis_tools as at -from ensemble.ensemble import Ensemble as PETEnsemble +from ensemble.ensemble import Ensemble as SupEnsemble from simulator.simple_models import noSimulation -class EnsembleOptimizationBaseClass(PETEnsemble): +class EnsembleOptimizationBaseClass(SupEnsemble): ''' Base class for the popt ensemble ''' @@ -33,7 +33,7 @@ def __init__(self, options, simulator, objective): else: sim = simulator - # Initialize PETEnsemble + # Initialize the PET Ensemble super().__init__(options, sim) # Unpack some options @@ -41,32 +41,44 @@ def __init__(self, options, simulator, objective): self.num_models = options.get('num_models', 1) self.transform = options.get('transform', False) self.num_samples = self.ne - - # Define some variables + + # Set objective function (callable) + self.obj_func = objective + self.state_func_values = None + self.ens_func_values = None + + # Initialize prior + self._initialize_state_info() # Initialize cov, bounds, and state + self._scale_state() # Scale self.state to [0, 1] if transform is True + + def _initialize_state_info(self): + ''' + Initialize covariance and bounds based on prior information. + ''' + self.cov = np.array([]) self.lb = [] self.ub = [] self.bounds = [] - self.cov = np.array([]) - - # Get bounds and varaince, and initialize state + for key in self.prior_info.keys(): variable = self.prior_info[key] - + # mean self.state[key] = np.asarray(variable['mean']) # Covariance dim = self.state[key].size - cov = variable['variance']*np.ones(dim) - + var = variable['variance']*np.ones(dim) + if 'limits' in variable.keys(): lb, ub = variable['limits'] - self.lb(lb) - self.ub(ub) - - # transform cov to [0, 1] if transform is True + self.lb.append(lb) + self.ub.append(ub) + + # transform var to [0, 1] if transform is True if self.transform: - cov = np.clip(cov/(ub - lb)**2, 0, 1, out=cov) + var = var/(ub - lb)**2 + var = np.clip(var, 0, 1, out=var) self.bounds += dim*[(0, 1)] else: self.bounds += dim*[(lb, ub)] @@ -74,20 +86,11 @@ def __init__(self, options, simulator, objective): self.bounds += dim*[(None, None)] # Add to covariance - self.cov = np.append(self.cov, cov) - + self.cov = np.append(self.cov, var) + self.dim = self.cov.shape[0] + # Make cov full covariance matrix self.cov = np.diag(self.cov) - - # Scale the state to [0, 1] if transform is True - self._scale_state() - - # Set objective function (callable) - self.obj_func = objective - - # Objective function values - self.state_func_values = None - self.ens_func_values = None def get_state(self): """ @@ -98,6 +101,15 @@ def get_state(self): """ return ot.aug_optim_state(self.state, list(self.state.keys())) + def get_cov(self): + """ + Returns + ------- + cov : numpy.ndarray + Covariance matrix, shape (number of controls, number of controls) + """ + return self.cov + def vec_to_state(self, x): """ Converts a control vector to the internal state representation. @@ -114,7 +126,7 @@ def get_bounds(self): return self.bounds - def function(self, x, *args): + def function(self, x, *args, **kwargs): """ This is the main function called during optimization. @@ -130,29 +142,41 @@ def function(self, x, *args): """ self._aux_input() - if len(x.shape) == 1: - self.ne = self.num_models - else: - self.ne = x.shape[1] + # check for ensmble + if len(x.shape) == 1: self.ne = self.num_models + else: self.ne = x.shape[1] - # convert x to state - self.state = self.vec_to_state(x) # go from nparray to dict + # convert x (nparray) to state (dict) + self.state = self.vec_to_state(x) # run the simulation self._invert_scale_state() # ensure that state is in [lb,ub] + self._set_multilevel_state(self.state, x) # set multilevel state if applicable run_success = self.calc_prediction(save_prediction=self.save_prediction) # calculate flow data + self._set_multilevel_state(self.state, x) # For some reason this has to be done again after calc_prediction self._scale_state() # scale back to [0, 1] + + # Evaluate the objective function if run_success: - func_values = self.obj_func(self.pred_data, self.sim.input_dict, self.sim.true_order) + func_values = self.obj_func( + self.pred_data, + input_dict=self.sim.input_dict, + true_order=self.sim.true_order, + **kwargs + ) else: func_values = np.inf # the simulations have crashed - if len(x.shape) == 1: - self.state_func_values = func_values - else: - self.ens_func_values = func_values + if len(x.shape) == 1: self.state_func_values = func_values + else: self.ens_func_values = func_values return func_values + + def _set_multilevel_state(self, state, x): + if 'multilevel' in self.keys_en.keys() and len(x.shape) > 1: + en_size = ot.get_list_element(self.keys_en['multilevel'], 'en_size') + self.state = ot.toggle_ml_state(self.state, en_size) + def _aux_input(self): """ diff --git a/popt/loop/ensemble.py b/popt/loop/ensemble_gaussian.py similarity index 70% rename from popt/loop/ensemble.py rename to popt/loop/ensemble_gaussian.py index 62f7389..f4bf832 100644 --- a/popt/loop/ensemble.py +++ b/popt/loop/ensemble_gaussian.py @@ -5,14 +5,13 @@ from copy import deepcopy - # Internal imports from popt.misc_tools import optim_tools as ot from pipt.misc_tools import analysis_tools as at -from ensemble.ensemble import Ensemble as PETEnsemble +from popt.loop.ensemble_base import EnsembleOptimizationBaseClass -class Ensemble(PETEnsemble): +class GaussianEnsemble(EnsembleOptimizationBaseClass): """ Class to store control states and evaluate objective functions. @@ -41,7 +40,7 @@ class Ensemble(PETEnsemble): """ - def __init__(self, keys_en, sim, obj_func): + def __init__(self, options, simulator, objective): """ Parameters ---------- @@ -63,57 +62,7 @@ def __init__(self, keys_en, sim, obj_func): """ # Initialize PETEnsemble - super(Ensemble, self).__init__(keys_en, sim) - - def __set__variable(var_name=None, defalut=None): - if var_name in keys_en: - return keys_en[var_name] - else: - return defalut - - # Set number of models (default 1) - self.num_models = __set__variable('num_models', 1) - - # Set transform flag (defalult True) - self.transform = __set__variable('transform', True) - - # Number of samples to compute gradient - self.num_samples = self.ne - - # Save pred data? - self.save_prediction = __set__variable('save_prediction', None) - - # We need the limits to convert between [0, 1] and [lb, ub], - # and we need the bounds as list of (min, max) pairs - # Also set the state and covarianve equal to the values provided in the input. - self.upper_bound = [] - self.lower_bound = [] - self.bounds = [] - self.cov = np.array([]) - for name in self.prior_info.keys(): - self.state[name] = np.asarray(self.prior_info[name]['mean']) - num_state_var = len(self.state[name]) - value_cov = self.prior_info[name]['variance'] * np.ones((num_state_var,)) - if 'limits' in self.prior_info[name].keys(): - lb = self.prior_info[name]['limits'][0] - ub = self.prior_info[name]['limits'][1] - self.lower_bound.append(lb) - self.upper_bound.append(ub) - if self.transform: - value_cov = value_cov / (ub - lb)**2 - np.clip(value_cov, 0, 1, out=value_cov) - self.bounds += num_state_var*[(0, 1)] - else: - self.bounds += num_state_var*[(lb, ub)] - else: - self.bounds += num_state_var*[(None, None)] - self.cov = np.append(self.cov, value_cov) - - self._scale_state() - self.cov = np.diag(self.cov) - - # Set objective function (callable) - self.obj_func = obj_func + super().__init__(options, simulator, objective) # Objective function values self.state_func_values = None @@ -135,36 +84,6 @@ def __set__variable(var_name=None, defalut=None): self.bias_factors = None # this is J(x_j,m_j)/J(x_j,m) self.bias_weights = np.ones(self.num_samples) / self.num_samples # initialize with equal weights self.bias_points = None # this is the points used to estimate the bias correction - - def get_state(self): - """ - Returns - ------- - x : numpy.ndarray - Control vector as ndarray, shape (number of controls, number of perturbations) - """ - x = ot.aug_optim_state(self.state, list(self.state.keys())) - return x - - def get_cov(self): - """ - Returns - ------- - cov : numpy.ndarray - Covariance matrix, shape (number of controls, number of controls) - """ - - return self.cov - - def get_bounds(self): - """ - Returns - ------- - bounds : list - (min, max) pairs for each element in x. None is used to specify no bound. - """ - - return self.bounds def get_final_state(self, return_dict=False): """ @@ -186,56 +105,6 @@ def get_final_state(self, return_dict=False): x = self.get_state() return x - def function(self, x, *args, **kwargs): - """ - This is the main function called during optimization. - - Parameters - ---------- - x : ndarray - Control vector, shape (number of controls, number of perturbations) - - Returns - ------- - obj_func_values : numpy.ndarray - Objective function values, shape (number of perturbations, ) - """ - self._aux_input() - - if len(x.shape) == 1: - self.ne = self.num_models - else: - self.ne = x.shape[1] - - self.state = ot.update_optim_state(x, self.state, list(self.state.keys())) # go from nparray to dict - self._invert_scale_state() # ensure that state is in [lb,ub] - - # Here we need to account for the possibility of having a multilevel ensemble and make a list of levels - if 'multilevel' in self.keys_en.keys() and len(x.shape) > 1: - en_size = ot.get_list_element(self.keys_en['multilevel'], 'en_size') - self.state = ot.toggle_ml_state(self.state, en_size) - - run_success = self.calc_prediction() # calculate flow data - - # Here we need to account for the possibility of having a multilevel ensemble and remove list of levels - if 'multilevel' in self.keys_en.keys() and len(x.shape) > 1: - en_size = ot.get_list_element(self.keys_en['multilevel'], 'en_size') - self.state = ot.toggle_ml_state(self.state, en_size) - - self._scale_state() # scale back to [0, 1] - if run_success: - func_values = self.obj_func(self.pred_data, input_dict=self.sim.input_dict, - true_order=self.sim.true_order, **kwargs) - else: - func_values = np.inf # the simulations have crashed - - if len(x.shape) == 1: - self.state_func_values = func_values - else: - self.ens_func_values = func_values - - return func_values - def gradient(self, x, *args, **kwargs): r""" Calculate the preconditioned gradient associated with ensemble, defined as: @@ -393,17 +262,6 @@ def hessian(self, x=None, *args): hessian = level_hessian[0] return hessian - ''' - def genopt_gradient(self, x, *args): - self.genopt.update_distribution(*args) - gradient = self.genopt.ensemble_gradient(func=self.function, - x=x, - ne=self.num_samples) - return gradient - - def genopt_mutation_gradient(self, x=None, *args, **kwargs): - return self.genopt.ensemble_mutation_gradient(return_ensembles=kwargs['return_ensembles']) - ''' def calc_ensemble_weights(self, x, *args, **kwargs): r""" @@ -527,49 +385,15 @@ def _gen_state_ensemble(self): cov = cov_blocks[i] temp_state_en = np.random.multivariate_normal(mean, cov, self.ne).transpose() shifted_ensemble = np.array([mean]).T + temp_state_en - np.array([np.mean(temp_state_en, 1)]).T - if self.upper_bound and self.lower_bound: + if self.lb and self.ub: if self.transform: np.clip(shifted_ensemble, 0, 1, out=shifted_ensemble) else: - np.clip(shifted_ensemble, self.lower_bound[i], self.upper_bound[i], out=shifted_ensemble) + np.clip(shifted_ensemble, self.lb[i], self.ub[i], out=shifted_ensemble) state_en[statename] = shifted_ensemble return state_en - def _aux_input(self): - """ - Set the auxiliary input used for multiple geological realizations - """ - - nr = 1 # nr is the ratio of samples over models - if self.num_models > 1: - if np.remainder(self.num_samples, self.num_models) == 0: - nr = int(self.num_samples / self.num_models) - self.aux_input = list(np.repeat(np.arange(self.num_models), nr)) - else: - print('num_samples must be a multiplum of num_models!') - sys.exit(0) - return nr - - def _scale_state(self): - """ - Transform the internal state from [lb, ub] to [0, 1] - """ - if self.transform and (self.upper_bound and self.lower_bound): - for i, key in enumerate(self.state): - self.state[key] = (self.state[key] - self.lower_bound[i])/(self.upper_bound[i] - self.lower_bound[i]) - np.clip(self.state[key], 0, 1, out=self.state[key]) - - def _invert_scale_state(self): - """ - Transform the internal state from [0, 1] to [lb, ub] - """ - if self.transform and (self.upper_bound and self.lower_bound): - for i, key in enumerate(self.state): - if self.transform: - self.state[key] = self.lower_bound[i] + self.state[key]*(self.upper_bound[i] - self.lower_bound[i]) - np.clip(self.state[key], self.lower_bound[i], self.upper_bound[i], out=self.state[key]) - def _bias_correction(self, state): """ Calculate bias correction. Currently, the bias correction is a constant independent of the state diff --git a/popt/loop/generalized_ensemble.py b/popt/loop/ensemble_generalized.py similarity index 100% rename from popt/loop/generalized_ensemble.py rename to popt/loop/ensemble_generalized.py diff --git a/popt/loop/extensions.py b/popt/loop/extensions.py index 5bcba7d..2dc7536 100644 --- a/popt/loop/extensions.py +++ b/popt/loop/extensions.py @@ -6,6 +6,7 @@ # Internal imports from popt.misc_tools import optim_tools as ot +# NB! THIS FILE IS NOT USED ANYMORE __all__ = ['GenOptExtension'] From 235948d0e4b81002c2fc8f92a21692f43f38b46e Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Thu, 7 Aug 2025 13:33:14 +0200 Subject: [PATCH 06/14] comments --- popt/loop/__init__.py | 2 +- popt/loop/ensemble_base.py | 2 ++ popt/loop/ensemble_gaussian.py | 1 + popt/loop/ensemble_generalized.py | 4 +++- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/popt/loop/__init__.py b/popt/loop/__init__.py index ead116b..5ded178 100644 --- a/popt/loop/__init__.py +++ b/popt/loop/__init__.py @@ -1 +1 @@ -"""Main loop for running optimization.""" +"""Main loop for running optimization.""" \ No newline at end of file diff --git a/popt/loop/ensemble_base.py b/popt/loop/ensemble_base.py index 951a2be..e5363a2 100644 --- a/popt/loop/ensemble_base.py +++ b/popt/loop/ensemble_base.py @@ -11,6 +11,8 @@ from ensemble.ensemble import Ensemble as SupEnsemble from simulator.simple_models import noSimulation +__all__ = ['EnsembleOptimizationBaseClass'] + class EnsembleOptimizationBaseClass(SupEnsemble): ''' Base class for the popt ensemble diff --git a/popt/loop/ensemble_gaussian.py b/popt/loop/ensemble_gaussian.py index f4bf832..2d334ca 100644 --- a/popt/loop/ensemble_gaussian.py +++ b/popt/loop/ensemble_gaussian.py @@ -10,6 +10,7 @@ from pipt.misc_tools import analysis_tools as at from popt.loop.ensemble_base import EnsembleOptimizationBaseClass +__all__ = ['GaussianEnsemble'] class GaussianEnsemble(EnsembleOptimizationBaseClass): """ diff --git a/popt/loop/ensemble_generalized.py b/popt/loop/ensemble_generalized.py index 81809c2..c786627 100644 --- a/popt/loop/ensemble_generalized.py +++ b/popt/loop/ensemble_generalized.py @@ -12,6 +12,8 @@ from pipt.misc_tools import analysis_tools as at from popt.loop.ensemble_base import EnsembleOptimizationBaseClass +__all__ = ['GeneralizedEnsemble'] + class GeneralizedEnsemble(EnsembleOptimizationBaseClass): def __init__(self, options, simulator, objective): @@ -32,7 +34,7 @@ def __init__(self, options, simulator, objective): # construct corr matrix std = np.sqrt(np.diag(self.cov)) self.corr = self.cov/np.outer(std, std) - self.dim = std + self.dim = std.size # choose marginal marginal = options.get('marginal', 'BetaMC') From 1d0988f0dfce6d324fa675a712b104101552c579 Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Wed, 20 Aug 2025 13:01:42 +0200 Subject: [PATCH 07/14] improved and cleaned up LineSearch --- popt/update_schemes/line_search_step.py | 295 +++++++++++++ popt/update_schemes/linesearch.py | 549 +++++++++++------------- 2 files changed, 534 insertions(+), 310 deletions(-) create mode 100644 popt/update_schemes/line_search_step.py diff --git a/popt/update_schemes/line_search_step.py b/popt/update_schemes/line_search_step.py new file mode 100644 index 0000000..5207940 --- /dev/null +++ b/popt/update_schemes/line_search_step.py @@ -0,0 +1,295 @@ +# This is a an implementation of the Line Search Algorithm (Alg. 3.5) in Numerical Optimization from Nocedal 2006. + +import numpy as np +from functools import cache +from scipy.optimize._linesearch import _quadmin, _cubicmin + + +def line_search(step_size, xk, pk, fun, jac, fk=None, jk=None, **kwargs): + ''' + Line search algorithm to find step size alpha that satisfies the Wolfe conditions. + + Parameters + ---------- + step_size : float + Initial step size to start the line search. + + xk : ndarray + Current point in the optimization process. + + pk : ndarray + Search direction. + + fun : callable + Objective function + + jac : callable + Gradient of the objective function + + fk : float, optional + Function value at xk. If None, it will be computed. + + jk : ndarray, optional + Gradient at xk. If None, it will be computed. + + **kwargs : dict + Additional parameters for the line search, such as: + - amax : float, maximum step size (default: 1000) + - maxiter : int, maximum number of iterations (default: 10) + - c1 : float, sufficient decrease condition (default: 1e-4) + - c2 : float, curvature condition (default: 0.9) + + Returns + ------- + alpha : float + Step size that satisfies the Wolfe conditions. + + fval : float + Function value at the new point xk + step_size*pk. + + jval : ndarray + Gradient at the new point xk + step_size*pk. + + nfev : int + Number of function evaluations. + + njev : int + Number of gradient evaluations. + ''' + + global ls_nfev + global ls_njev + ls_nfev = 0 + ls_njev = 0 + + # Unpack some kwargs + amax = kwargs.get('amax', 1000) + maxiter = kwargs.get('maxiter', 10) + c1 = kwargs.get('c1', 1e-4) + c2 = kwargs.get('c2', 0.9) + + # assertions + assert step_size <= amax, "Initial step size must be less than or equal to amax." + + # Define phi and derivative of phi + @cache + def phi(alpha): + global ls_nfev + if (alpha == 0): + if (fk is None): + phi.fun_val = fun(xk) + ls_nfev += 1 + else: + phi.fun_val = fk + else: + phi.fun_val = fun(xk + alpha*pk) + ls_nfev += 1 + return phi.fun_val + + @cache + def dphi(alpha): + global ls_njev + if (alpha == 0): + if (jk is None): + dphi.jac_val = jac(xk) + ls_njev += 1 + else: + dphi.jac_val = jk + else: + dphi.jac_val = jac(xk + alpha*pk) + ls_njev += 1 + return np.dot(dphi.jac_val, pk) + + # Define initial values of phi and dphi + phi_0 = phi(0) + dphi_0 = dphi(0) + + # Start loop + a = [0, step_size] + for i in range(1, maxiter+1): + # Evaluate phi(ai) + phi_i = phi(a[i]) + + # Check for sufficient decrease + if (phi_i > phi_0 + c1*a[i]*dphi_0) or (phi_i >= phi(a[i-1]) and i>0): + # Call zoom function + step_size = zoom(a[i-1], a[i], phi, dphi, phi_0, dphi_0, maxiter+1-i, c1, c2) + return step_size, phi.fun_val, dphi.jac_val, ls_nfev, ls_njev + + # Evaluate dphi(ai) + dphi_i = dphi(a[i]) + + # Check curvature condition + if abs(dphi_i) <= -c2*dphi_0: + step_size = a[i] + return step_size, phi.fun_val, dphi.jac_val, ls_nfev, ls_njev + + # Check for posetive derivative + if dphi_i >= 0: + # Call zoom function + step_size = zoom(a[i], a[i-1], phi, dphi, phi_0, dphi_0, maxiter+1-i, c1, c2) + return step_size, phi.fun_val, dphi.jac_val, ls_nfev, ls_njev + + # Increase ai + a.append(min(2*a[i], amax)) + + # If we reached this point, the line search failed + return None, None, None, ls_nfev, ls_njev + + +def zoom(alo, ahi, f, df, f0, df0, maxiter, c1, c2): + '''Zoom function for line search algorithm. (This is the same as for scipy)''' + + phi_lo = f(alo) + phi_hi = f(ahi) + dphi_lo = df(alo) + + for j in range(maxiter): + + tol_cubic = 0.2*(ahi-alo) + tol_quad = 0.1*(ahi-alo) + + if (j > 0): + # cubic interpolation for alo, phi(alo), dphi(alo) and ahi, phi(ahi) + aj = _cubicmin(alo, phi_lo, dphi_lo, ahi, phi_hi, aold, phi_old) + if (j == 0) or (aj is None) or (aj < alo + tol_cubic) or (aj > ahi - tol_cubic): + # quadratic interpolation for alo, phi(alo), dphi(alo) and ahi, phi(ahi) + aj = _quadmin(alo, phi_lo, dphi_lo, ahi, phi_hi) + + # Ensure aj is within bounds + if (aj is None) or (aj < alo + tol_quad) or (aj > ahi - tol_quad): + aj = alo + 0.5*(ahi - alo) + + # Evaluate phi(aj) + phi_j = f(aj) + + # Check for sufficient decrease + if (phi_j > f0 + c1*aj*df0) or (phi_j >= phi_lo): + # store old values + aold = ahi + phi_old = phi_hi + # update ahi + ahi = aj + phi_hi = phi_j + else: + # check curvature condition + dphi_j = df(aj) + if abs(dphi_j) <= -c2*df0: + return aj + + if dphi_j*(ahi-alo) >= 0: + # store old values + aold = ahi + phi_old = phi_hi + # update alo + ahi = alo + phi_hi = phi_lo + else: + # store old values + aold = alo + phi_old = phi_lo + + alo = aj + phi_lo = phi_j + dphi_lo = dphi_j + + # If we reached this point, the line search failed + return None + + + + +def line_search_backtracking(step_size, xk, pk, fun, jac, fk=None, jk=None, **kwargs): + ''' + Backtracking line search algorithm to find step size alpha that satisfies the Wolfe conditions. + + Parameters + ---------- + step_size : float + Initial step size to start the line search. + + xk : ndarray + Current point in the optimization process. + + pk : ndarray + Search direction. + + fun : callable + Objective function + + jac : callable + Gradient of the objective function + + fk : float, optional + Function value at xk. If None, it will be computed. + + jk : ndarray, optional + Gradient at xk. If None, it will be computed. + + **kwargs : dict + Additional parameters for the line search, such as: + - rho : float, backtracking factor (default: 0.5) + - maxiter : int, maximum number of iterations (default: 10) + - c1 : float, sufficient decrease condition (default: 1e-4) + - c2 : float, curvature condition (default: 0.9) + + Returns + ------- + alpha : float + Step size that satisfies the Wolfe conditions. + + fval : float + Function value at the new point xk + step_size*pk. + + jval : ndarray + Gradient at the new point xk + step_size*pk. + + nfev : int + Number of function evaluations. + + njev : int + Number of gradient evaluations. + ''' + + global ls_nfev + global ls_njev + ls_nfev = 0 + ls_njev = 0 + + # Unpack some kwargs + rho = kwargs.get('rho', 0.5) + maxiter = kwargs.get('maxiter', 10) + c1 = kwargs.get('c1', 1e-4) + + # Define phi and derivative of phi + @cache + def phi(alpha): + global ls_nfev + if (alpha == 0): + if (fk is None): + fun_val = fun(xk) + ls_nfev += 1 + else: + fun_val = fk + else: + fun_val = fun(xk + alpha*pk) + ls_nfev += 1 + return fun_val + + + # run the backtracking line search loop + for i in range(maxiter): + # Evaluate phi(alpha) + phi_i = phi(step_size) + + # Check for sufficient decrease + if (phi_i <= phi(0) + c1*step_size*np.dot(jk, pk)): + # Evaluate jac at new point + jac_new = jac(xk + step_size*pk) + return step_size, phi_i, jac_new, ls_nfev, ls_njev + + # Reduce step size + step_size *= rho + + # If we reached this point, the line search failed + return None, None, None, ls_nfev, ls_njev \ No newline at end of file diff --git a/popt/update_schemes/linesearch.py b/popt/update_schemes/linesearch.py index 7bf1081..df4a0a8 100644 --- a/popt/update_schemes/linesearch.py +++ b/popt/update_schemes/linesearch.py @@ -13,7 +13,13 @@ # Internal imports from popt.misc_tools import optim_tools as ot from popt.loop.optimize import Optimize -from popt.update_schemes import optimizers +from popt.update_schemes.line_search_step import line_search, line_search_backtracking + +# some symbols for logger +subk = '\u2096' +jac_inf_symbol = f'‖jac(x{subk})‖\u221E' +fun_xk_symbol = f'fun(x{subk})' + def LineSearch(fun, x, jac, method='GD', hess=None, args=(), bounds=None, callback=None, **options): ''' @@ -48,72 +54,83 @@ def LineSearch(fun, x, jac, method='GD', hess=None, args=(), bounds=None, callba A callable called after each successful iteration. The class instance of LineSearch is passed as the only argument to the callback function: callback(self) - **options: keyword arguments, optional + **options: + keyword arguments, optional LineSearch Options (**options) ------------------------------ - maxiter: int + - maxiter: int, Maximum number of iterations. Default is 20. - step_size: float - Step-size for optimizer. Default is 0.25/inf-norm(jac(x0)). - - step_size_maxiter: int + - lsmaxiter: int, Maximum number of iterations for the line search. Default is 10. + + - step_size: float, + Step-size for optimizer. Default is 0.25/inf-norm(jac(x0)). - step_size_max: float - Maximum step-size. Default is 1e5 + - step_size_max: float, + Maximum step-size. Default is 1e5. If bounds are specified, + the maximum step-size is set to the maximum step-size allowed by the bounds. - step_size_adapt: int + - step_size_adapt: int, Set method for choosing initial step-size for each iteration. If 0, step_size value is used. If 1, Equation (3.6) from "Numercal Optimization" [1] is used. If 2, the equation above Equation (3.6) is used. Default is 0. - c1: float + - c1: float, Tolerance parameter for the Armijo condition. Default is 1e-4. - c2: float + - c2: float, Tolerance parameter for the Curvature condition. Default is 0.9. - xtol: float + - xtol: float, Optimization stop whenever |dx|>> from popt.update_schemes.linesearch import LineSearch >>> x0 = np.random.uniform(-3, 3, 2) >>> kwargs = {'maxiter': 100, - 'line_search_maxiter': 10, + 'lsmaxiter': 10, 'step_size_adapt': 1, 'saveit': False} >>> res = LineSearch(fun=rosen, x=x0, jac=rosen_der, method='BFGS', **kwargs) @@ -182,20 +199,27 @@ def __init__(self, fun, x, jac, method='GD', hess=None, args=(), bounds=None, c self.callback = callback else: self.callback = None + + # Custom convergence criteria (callable) + convergence_criteria = options.get('convergence_criteria', None) + if callable(convergence_criteria): + self.convergence_criteria = self.convergence_criteria + else: + self.convergence_criteria = None # Set options for step-size self.step_size = options.get('step_size', None) self.step_size_max = options.get('step_size_max', 1e5) self.step_size_adapt = options.get('step_size_adapt', 0) - # Set options for line-search method - self.line_search_kwargs = { + # Set options for line-search + self.lskwargs = { 'c1': options.get('c1', 1e-4), 'c2': options.get('c2', 0.9), + 'rho': options.get('rho', 0.5), 'amax': self.step_size_max, - 'xtol': options.get('xtol', 1e-8), - 'maxiter': options.get('line_search_maxiter', 10), - 'method' : options.get('line_search_method', 1) + 'maxiter': options.get('lsmaxiter', 10), + 'method' : options.get('lsmethod', 1) } # Set other options @@ -203,6 +227,11 @@ def __init__(self, fun, x, jac, method='GD', hess=None, args=(), bounds=None, c self.resample = options.get('resample', 0) self.saveit = options.get('saveit', True) + # set tolerance for convergence + self.xtol = options.get('xtol', 1e-8) # tolerance for control vector + self.ftol = options.get('ftol', 1e-4) # relative tolerance for function value + self.gtol = options.get('gtol', 1e-5) # tolerance for inf-norm of jacobian + # Check method valid_methods = ['GD', 'BFGS', 'Newton'] if not self.method in valid_methods: @@ -241,12 +270,12 @@ def __init__(self, fun, x, jac, method='GD', hess=None, args=(), bounds=None, c self.optimize_result = self.update_results() if self.saveit: ot.save_optimize_results(self.optimize_result) - if self.logger is not None: self.logger.info(f' ====== Running optimization - Line search ({method}) ======') - self.logger.info('Specified options\n'+pprint.pformat(OptimizeResult(self.options))) - self.logger.info(f' {"iter.":<10} {"fun":<15} {"step-size":<15} {"|grad|":<15}') - self.logger.info(f' {self.iteration:<10} {self.fk:<15.4e} {0.0:<15.4e} {la.norm(self.jk):<15.4e}') + self.logger.info('\nSPECIFIED OPTIONS:\n'+pprint.pformat(OptimizeResult(self.options))) + self.logger.info('') + self.logger.info(f' {"iter.":<10} {fun_xk_symbol:<15} {jac_inf_symbol:<15} {"step-size":<15}') + self.logger.info(f' {self.iteration:<10} {self.fk:<15.4e} {la.norm(self.jk, np.inf):<15.4e} {0:<15.4e}') self.logger.info('') self.run_loop() @@ -268,6 +297,11 @@ def _jac(self, x): g = self.jacobian(x) else: g = self.jacobian(x, *self.args) + + # project gradient onto the feasible set + if self.bounds is not None: + g = - self._project_pk(-g, x) + return g def _hess(self, x): @@ -281,78 +315,13 @@ def _hess(self, x): h = self.hessian(x, *self.args) return make_matrix_psd(h) - def update_results(self): - - res = {'fun': self.fk, - 'x': self.xk, - 'jac': self.jk, - 'hess': self.Hk, - 'hess_inv': self.Hk_inv, - 'nfev': self.nfev, - 'njev': self.njev, - 'nit': self.iteration, - 'step-size': self.step_size, - 'method': self.method, - 'save_folder': self.options.get('save_folder', './')} - - for a, arg in enumerate(self.args): - res[f'args[{a}]'] = arg - - if 'savedata' in self.options: - # Make sure "SAVEDATA" gives a list - if isinstance(self.options['savedata'], list): - savedata = self.options['savedata'] - else: - savedata = [self.options['savedata']] - - # Loop over variables to store in save list - for save_typ in savedata: - if save_typ in locals(): - res[save_typ] = eval('{}'.format(save_typ)) - elif hasattr(self, save_typ): - res[save_typ] = eval(' self.{}'.format(save_typ)) - else: - print(f'Cannot save {save_typ}!\n\n') - - return OptimizeResult(res) - def _set_step_size(self, pk): - ''' Sets the step-size ''' - - # If first iteration - if self.iteration == 1: - if self.step_size is None: - self.step_size = 0.25/la.norm(pk, np.inf) - alpha = self.step_size - else: - alpha = self.step_size - else: - if np.dot(pk, self.jk) != 0 and self.step_size_adapt != 0: - if self.step_size_adapt == 1: - alpha = 2*(self.fk - self.f_old)/np.dot(pk, self.jk) - if self.step_size_adapt == 2: - slope_old = np.dot(self.p_old, self.j_old) - slope_new = np.dot(pk, self.jk) - alpha = self.step_size*slope_old/slope_new - else: - alpha = self.step_size - - if alpha < 0: - alpha = abs(alpha) - - #if self.method in ['BFGS', 'Newton']: - # From "Numerical Optimization" - # alpha = min(1, 1.01*alpha) - - return min(alpha, self.step_size_max) - - def calc_update(self, iter_resamp=0): # Initialize variables for this step success = False - # If in resampling mode, compute jacobian + # If in resampling mode, compute jacobian # Else, jacobian from in __init__ or from latest line_search is used if self.jk is None: self.jk = self._jac(self.xk) @@ -375,41 +344,50 @@ def calc_update(self, iter_resamp=0): if self.method == 'Newton': pk = - np.matmul(la.inv(self.Hk), self.jk) - # remove components that point out of the hybercube given by [lb,ub] - lb = np.array(self.bounds)[:, 0] - ub = np.array(self.bounds)[:, 1] - for i in range(self.xk.size): - if (self.xk[i] <= lb[i] and pk[i] < 0) or (self.xk[i] >= ub[i] and pk[i] > 0): - pk[i] = 0 + # porject search direction onto the feasible set + if self.bounds is not None: + pk = self._project_pk(pk, self.xk) # Set step_size - step_size = self._set_step_size(pk) - - # Set maximum step-size if self.bounds is not None: - mean_bound_range = np.mean([b[1]-b[0] for b in self.bounds]) - step_size_max = mean_bound_range/np.linalg.norm(pk) - self.line_search_kwargs['amax'] = step_size_max + self.step_size_max = self._set_max_step_size(pk, self.xk) + self.lskwargs['amax'] = self.step_size_max + step_size = self._set_step_size(pk, self.step_size_max) # Perform line-search self.logger.info('Performing line search...') - ls_res = line_search( - fun=self._fun, - jac=self._jac, - xk=self.xk, - pk=pk, - ak=step_size, - fk=self.fk, - gk=self.jk, - logger=self.logger, - **self.line_search_kwargs - ) - step_size, f_new, f_old, j_new, self.msg = ls_res + if self.lskwargs['method'] == 0: + ls_res = line_search_backtracking( + step_size=step_size, + xk=self.xk, + pk=pk, + fun=self._fun, + jac=self._jac, + fk=self.fk, + jk=self.jk, + **self.lskwargs + ) + else: + ls_res = line_search( + step_size=step_size, + xk=self.xk, + pk=pk, + fun=self._fun, + jac=self._jac, + fk=self.fk, + jk=self.jk, + **self.lskwargs + ) + step_size, f_new, j_new, _, _ = ls_res if not (step_size is None): - + + # Save old values x_old = self.xk j_old = self.jk + f_old = self.fk + + # Update control x_new = ot.clip_state(x_old + step_size*pk, self.bounds) # Update state @@ -421,6 +399,7 @@ def calc_update(self, iter_resamp=0): self.j_old = j_old self.f_old = f_old self.p_old = pk + sk = x_new - x_old # Call the callback function if callable(self.callback): @@ -428,14 +407,11 @@ def calc_update(self, iter_resamp=0): # Update BFGS if self.method == 'BFGS': - sk = x_new - x_old - yk = j_new - j_old - rho = 1/np.dot(yk,sk) - id_mat = np.eye(sk.size) + yk = j_new - j_old + if self.iteration == 1: + self.Hk_inv = np.dot(yk,sk)/np.dot(yk,yk) * np.eye(sk.size) - matrix1 = (id_mat - rho*np.outer(sk, yk)) - matrix2 = (id_mat - rho*np.outer(yk, sk)) - self.Hk_inv = matrix1@self.Hk_inv@matrix2 + rho*np.outer(sk, sk) + self.Hk_inv = bfgs_update(self.Hk_inv, sk, yk) # Update status success = True @@ -448,9 +424,36 @@ def calc_update(self, iter_resamp=0): # Write logging info if self.logger is not None: self.logger.info('') - self.logger.info(f' {"iter.":<10} {"fun":<15} {"step-size":<15} {"|grad|":<15}') - self.logger.info(f' {self.iteration:<10} {self.fk:<15.4e} {step_size:<15.4e} {la.norm(self.jk):<15.4e}') + self.logger.info(f' {"iter.":<10} {fun_xk_symbol:<15} {jac_inf_symbol:<15} {"step-size":<15}') + self.logger.info(f' {self.iteration:<10} {self.fk:<15.4e} {la.norm(self.jk, np.inf):<15.4e} {step_size:<15.4e}') self.logger.info('') + + # Check for convergence + if (la.norm(sk, np.inf) < self.xtol): + self.msg = 'Convergence criteria met: |dx| < xtol' + self.logger.info(self.msg) + success = False + return success + if (np.abs(self.fk - f_old) < self.ftol * np.abs(f_old)): + self.msg = 'Convergence criteria met: |f(x+dx) - f(x)| < ftol * |f(x)|' + self.logger.info(self.msg) + success = False + return success + if (la.norm(self.jk, np.inf) < self.gtol): + self.msg = f'Convergence criteria met: {jac_inf_symbol} < gtol' + self.logger.info(self.msg) + success = False + return success + + # Check for custom convergence + if callable(self.convergence_criteria): + if self.convergence_criteria(self): + self.logger.info('Custom convergence criteria met. Stopping optimization.') + success = False + return success + + if self.step_size_adapt == 2: + self.step_size = step_size # Update iteration self.iteration += 1 @@ -469,195 +472,122 @@ def calc_update(self, iter_resamp=0): success = False return success + + def update_results(self): + + res = {'fun': self.fk, + 'x': self.xk, + 'jac': self.jk, + 'hess': self.Hk, + 'hess_inv': self.Hk_inv, + 'nfev': self.nfev, + 'njev': self.njev, + 'nit': self.iteration, + 'step-size': self.step_size, + 'method': self.method, + 'save_folder': self.options.get('save_folder', './')} + + for a, arg in enumerate(self.args): + res[f'args[{a}]'] = arg + if 'savedata' in self.options: + # Make sure "SAVEDATA" gives a list + if isinstance(self.options['savedata'], list): + savedata = self.options['savedata'] + else: + savedata = [self.options['savedata']] -def line_search(fun, jac, xk, pk, ak, fk=None, gk=None, c1=0.0001, c2=0.9, maxiter=10, **kwargs): - ''' - Performs a single line search step - ''' - line_search_step = LineSearchStepBase( - fun, - jac, - xk, - pk, - ak, - fk, - gk, - c1, - c2, - maxiter, - **kwargs - ) - return line_search_step() - -class LineSearchStepBase: - - def __init__(self, fun, jac, xk, pk, ak, fk=None, gk=None, c1=0.0001, c2=0.9, maxiter=10, **kwargs): - self.fun = fun - self.jac = jac - self.xk = xk - self.pk = pk - self.ak = ak - self.fk = fk - self.gk = gk - self.c1 = c1 - self.c2 = c2 - self.maxiter = maxiter - self.msg = '' - - # kwargs - self.amax = kwargs.get('amax', 1e5) - self.amin = kwargs.get('amin', 0.0) - self.xtol = kwargs.get('xtol', 1e-8) - self.method = kwargs.get('method', 1) - self.logger = kwargs.get('logger', None) - - # If c2 is None, the curvature condition is not used - if self.c2 is None: - self.c2 = np.inf - self.method = 0 - - # Check for initial values - if self.fk is None: - self.phi0 = self.phi(0, eval=False) - else: - self.phi0 = self.fk + # Loop over variables to store in save list + for save_typ in savedata: + if save_typ in locals(): + res[save_typ] = eval('{}'.format(save_typ)) + elif hasattr(self, save_typ): + res[save_typ] = eval(' self.{}'.format(save_typ)) + else: + print(f'Cannot save {save_typ}!\n\n') + + return OptimizeResult(res) + + def _set_step_size(self, pk, amax): + ''' Sets the step-size ''' + + # If first iteration + if (self.iteration == 1): + if (self.step_size is None): + self.step_size = 0.25/la.norm(pk, np.inf) + alpha = self.step_size + else: + alpha = self.step_size - if self.gk is None: - self.dphi0 = self.dphi(0, eval=False) else: - self.dphi0 = np.dot(self.pk, self.gk) + if (self.step_size_adapt == 1) and (np.dot(pk, self.jk) != 0): + alpha = 2*(self.fk - self.f_old)/np.dot(pk, self.jk) + elif (self.step_size_adapt == 2) and (np.dot(pk, self.jk) == 0): + slope_old = np.dot(self.p_old, self.j_old) + slope_new = np.dot(pk, self.jk) + alpha = self.step_size*slope_old/slope_new + else: + alpha = self.step_size + if alpha < 0: + alpha = abs(alpha) - def __call__(self): + if alpha >= amax: + alpha = 0.75*amax - if self.method == 0: - step_size, fnew = self._line_search_alpha_cut(step_size=self.ak) - - if self.method == 1: - step_size, fnew = self._line_search_alpha_interpol(step_size=self.ak) - - if self.method == 2: - dcsrch = DCSRCH( - self.phi, - self.dphi, - self.c1, - self.c2, - self.xtol, - self.amin, - self.amax - ) - dcsrch_res = dcsrch( - self.ak, - phi0=self.phi0, - derphi0=self.dphi0, - maxiter=self.maxiter - ) - step_size, fnew, _, self.msg = dcsrch_res - self.msg = str(self.msg) - - if step_size is None: - if self.msg is None: - self.msg = 'Line search did not find a solution' - return None, None, None, None, self.msg - elif la.norm(step_size*self.pk) <= self.xtol: - self.msg = f'|dx| < {self.xtol}' - return None, None, None, None, self.msg - else: - step_size = min(step_size, self.amax) - self.msg = 'Line search was successful' - return step_size, fnew, self.phi0, self.jac_val, self.msg + return alpha + + def _project_pk(self, pk, xk): + ''' Projects the jacobian onto the feasible set defined by bounds ''' + lb = np.array(self.bounds)[:, 0] + ub = np.array(self.bounds)[:, 1] + for i, pk_val in enumerate(pk): + if (xk[i] <= lb[i] and pk_val < 0) or (xk[i] >= ub[i] and pk_val > 0): + pk[i] = 0 + return pk + + def _set_max_step_size(self, pk, xk): + lb = np.array(self.bounds)[:, 0] + ub = np.array(self.bounds)[:, 1] + amax = [] + for i, pk_val in enumerate(pk): + if pk_val < 0: + amax.append((lb[i] - xk[i])/pk_val) + elif pk_val > 0: + amax.append((ub[i] - xk[i])/pk_val) + else: + amax.append(np.inf) + amax = min(amax) + return amax - def _line_search_alpha_cut(self, step_size): - ak = step_size - for i in range(self.maxiter): - phi_new = self.phi(ak) - # Check Armijo Condition - if phi_new < self.phi0 + self.c1*ak*self.dphi0: - dphi_new = self.dphi(ak) +def bfgs_update(Hk, sk, yk): + """ + Perform the BFGS update of the inverse Hessian approximation. - # Curvature condition - if abs(dphi_new) <= abs(self.c2*self.dphi0): - return ak, phi_new - - ak = ak/2 - - return None, None - - def _line_search_alpha_interpol(self, step_size): - ak = step_size + Parameters: + - Hk: np.ndarray, current inverse Hessian approximation (n x n) + - sk: np.ndarray, step vector (x_{k+1} - x_k), shape (n,) + - yk: np.ndarray, gradient difference (grad_{k+1} - grad_k), shape (n,) - # Some lists - alpha = [0.0] - phi = [self.phi0] - dphi = [self.dphi0] + Returns: + - Hk_new: np.ndarray, updated inverse Hessian approximation + """ + sk = sk.reshape(-1, 1) + yk = yk.reshape(-1, 1) + rho = 1.0 / (yk.T @ sk) - for i in range(1, self.maxiter+1): - - # Append lists - alpha.append(ak) - phi.append(self.phi(ak)) - dphi.append(self.dphi(ak)) - - # Check Armijo Condition - if phi[i] > self.phi0 + self.c1*alpha[i]*self.dphi0 or (phi[i] >= phi[i-1] and i>1): - step_size_new, phi_new = self._zoom(alpha[i-1], alpha[i], phi[i-1], phi[i], dphi[i-1]) - return step_size_new, phi_new - - if abs(dphi[i]) < - self.c2*self.dphi0: - return alpha[i], phi[i] - - # Check Curvature condition - if dphi[i] >= 0: - step_size_new, phi_new = self._zoom(alpha[i], alpha[i-1], phi[i], phi[i-1], dphi[i]) - return step_size_new, phi_new - - if alpha[i] >= self.amax: - return None, None - else: - ak = ak*2 - - return None, None + if rho <= 0: + raise ValueError("Non-positive curvature detected. BFGS update skipped.") + I = np.eye(Hk.shape[0]) + Vk = I - rho * sk @ yk.T + Hk_new = Vk @ Hk @ Vk.T + rho * sk @ sk.T - def log(self, msg): - if self.logger is None: - print(msg) - else: - self.logger.info(msg) - - @cache - def phi(self, a, eval=True): - if eval: - self.log(' Evaluating Armijo Condition') - return self.fun(self.xk + a*self.pk) - - @cache - def dphi(self, a, eval=True): - if eval: - self.log(' Evaluating Curvature Condition') - jval = self.jac(self.xk + a*self.pk) - self.jac_val = jval - return np.dot(self.pk, jval) - - def _zoom(self, a_lo, a_hi, phi_lo, phi_hi, dphi_low): - alpha_new, phi_new, _ = _zoom(a_lo=a_lo, - a_hi=a_hi, - phi_lo=phi_lo, - phi_hi=phi_hi, - derphi_lo=dphi_low, - phi=self.phi, - derphi=self.dphi, - phi0=self.phi0, - derphi0=self.dphi0, - c1=self.c1, - c2=self.c2, - extra_condition=lambda *args: True) - return alpha_new, phi_new + return Hk_new def get_near_psd(A): @@ -688,7 +618,6 @@ def make_matrix_psd(A, maxiter=100): return None - From af046a0b0d4212ca1f85d1f3b48a2287530fe384 Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Thu, 21 Aug 2025 13:12:54 +0200 Subject: [PATCH 08/14] added Newton-CG to LineSearch --- popt/update_schemes/linesearch.py | 147 ++++++++++++++++------------ popt/update_schemes/trust_region.py | 76 +++++++------- 2 files changed, 122 insertions(+), 101 deletions(-) diff --git a/popt/update_schemes/linesearch.py b/popt/update_schemes/linesearch.py index df4a0a8..e5137f8 100644 --- a/popt/update_schemes/linesearch.py +++ b/popt/update_schemes/linesearch.py @@ -17,8 +17,10 @@ # some symbols for logger subk = '\u2096' +sup2 = '\u00b2' jac_inf_symbol = f'‖jac(x{subk})‖\u221E' fun_xk_symbol = f'fun(x{subk})' +nabla_symbol = "\u2207" def LineSearch(fun, x, jac, method='GD', hess=None, args=(), bounds=None, callback=None, **options): @@ -39,7 +41,7 @@ def LineSearch(fun, x, jac, method='GD', hess=None, args=(), bounds=None, callba method: str Which optimization method to use. Default is 'GD' for 'Gradient Descent'. Other options are 'BFGS' for the 'Broyden–Fletcher–Goldfarb–Shanno' method, - and 'Newton' for the Newton method. + and 'Newton-CG'. hess: callable, optional Hessian function, hess(x, *args). Default is None. @@ -233,14 +235,12 @@ def __init__(self, fun, x, jac, method='GD', hess=None, args=(), bounds=None, c self.gtol = options.get('gtol', 1e-5) # tolerance for inf-norm of jacobian # Check method - valid_methods = ['GD', 'BFGS', 'Newton'] + valid_methods = ['GD', 'BFGS', 'Newton-CG'] if not self.method in valid_methods: raise ValueError(f"'{self.method}' is not a valid method. Valid methods are: {valid_methods}") - # Make sure hessian is callable if mehtod='Newton' - if (self.method == 'Newton') and (not callable(self.hessian)): - warnings.warn('Newton’s method requires a hessian, method changed to BFGS') - self.method = 'BFGS' + if (self.method == 'Newton-CG') and (self.hessian is None): + print(f'Warning: No hessian function provided. Finite difference approximation is used: {nabla_symbol}{sup2}f(x{subk})d ≈ ({nabla_symbol}f(x{subk}+hd)-{nabla_symbol}f(x{subk}))/h') # Calculate objective function of startpoint if not self.restart: @@ -267,7 +267,7 @@ def __init__(self, fun, x, jac, method='GD', hess=None, args=(), bounds=None, c self.p_old = None # Initial results - self.optimize_result = self.update_results() + self.optimize_result = self.get_intermediate_results() if self.saveit: ot.save_optimize_results(self.optimize_result) if self.logger is not None: @@ -313,7 +313,7 @@ def _hess(self, x): h = self.hessian(x) else: h = self.hessian(x, *self.args) - return make_matrix_psd(h) + return h def calc_update(self, iter_resamp=0): @@ -341,8 +341,8 @@ def calc_update(self, iter_resamp=0): pk = - self.jk if self.method == 'BFGS': pk = - np.matmul(self.Hk_inv, self.jk) - if self.method == 'Newton': - pk = - np.matmul(la.inv(self.Hk), self.jk) + if self.method == 'Newton-CG': + pk = newton_cg(self.jk, Hk=self.Hk, xk=self.xk, jac=self._jac, eps=1e-4) # porject search direction onto the feasible set if self.bounds is not None: @@ -408,16 +408,14 @@ def calc_update(self, iter_resamp=0): # Update BFGS if self.method == 'BFGS': yk = j_new - j_old - if self.iteration == 1: - self.Hk_inv = np.dot(yk,sk)/np.dot(yk,yk) * np.eye(sk.size) - + if self.iteration == 1: self.Hk_inv = np.dot(yk,sk)/np.dot(yk,yk) * np.eye(sk.size) self.Hk_inv = bfgs_update(self.Hk_inv, sk, yk) # Update status success = True # Save Results - self.optimize_result = self.update_results() + self.optimize_result = self.get_intermediate_results() if self.saveit: ot.save_optimize_results(self.optimize_result) @@ -473,23 +471,19 @@ def calc_update(self, iter_resamp=0): return success - - def update_results(self): - - res = {'fun': self.fk, - 'x': self.xk, - 'jac': self.jk, - 'hess': self.Hk, - 'hess_inv': self.Hk_inv, - 'nfev': self.nfev, - 'njev': self.njev, - 'nit': self.iteration, - 'step-size': self.step_size, - 'method': self.method, - 'save_folder': self.options.get('save_folder', './')} - - for a, arg in enumerate(self.args): - res[f'args[{a}]'] = arg + def get_intermediate_results(self): + + # Define default results + results = { + 'fun': self.fk, + 'x': self.xk, + 'jac': self.jk, + 'nfev': self.nfev, + 'njev': self.njev, + 'nit': self.iteration, + 'method': self.method, + 'save_folder': self.options.get('save_folder', './') + } if 'savedata' in self.options: # Make sure "SAVEDATA" gives a list @@ -498,16 +492,20 @@ def update_results(self): else: savedata = [self.options['savedata']] + if 'args' in savedata: + for a, arg in enumerate(self.args): + results[f'args[{a}]'] = arg + # Loop over variables to store in save list - for save_typ in savedata: - if save_typ in locals(): - res[save_typ] = eval('{}'.format(save_typ)) - elif hasattr(self, save_typ): - res[save_typ] = eval(' self.{}'.format(save_typ)) + for variable in savedata: + if variable in locals(): + results[variable] = eval('{}'.format(variable)) + elif hasattr(self, variable): + results[variable] = eval('self.{}'.format(variable)) else: - print(f'Cannot save {save_typ}!\n\n') + print(f'Cannot save {variable}!\n\n') - return OptimizeResult(res) + return OptimizeResult(results) def _set_step_size(self, pk, amax): ''' Sets the step-size ''' @@ -589,33 +587,56 @@ def bfgs_update(Hk, sk, yk): return Hk_new +def newton_cg(gk, Hk=None, maxiter=None, **kwargs): + print('\nRunning Newton-CG subroutine...') -def get_near_psd(A): - eigval, eigvec = np.linalg.eig((A + A.T)/2) - eigval[eigval < 0] = 1.0 - return eigvec.dot(np.diag(eigval)).dot(eigvec.T) + if Hk is None: + jac = kwargs.get('jac') + eps = kwargs.get('eps', 1e-4) + xk = kwargs.get('xk') + + # define a finite difference approximation of the Hessian times a vector + def Hessd(d): + return (jac(xk + eps*d) - gk)/eps + + if maxiter is None: + maxiter = 20*gk.size # Same dfault as in scipy + + tol = min(0.5, np.sqrt(la.norm(gk)))*la.norm(gk) + z = 0 + r = gk + d = -r + + for j in range(maxiter): + print('iteration: ', j) + if Hk is None: + Hd = Hessd(d) + else: + Hd = np.matmul(Hk, d) + + dTHd = np.dot(d, Hd) + + if dTHd <= 0: + print('Negative curvature detected, terminating subroutine') + print('\n') + if j == 0: + return -gk + else: + return z + + rold = r + a = np.dot(r,r)/dTHd + z = z + a*d + r = r + a*Hd + + if la.norm(r) < tol: + print('Subroutine converged') + print('\n') + return z + + b = np.dot(r, r)/np.dot(rold, rold) + d = -r + b*d -def make_matrix_psd(A, maxiter=100): - # Set beta to Frobenius norm of A - beta = np.linalg.norm(A, 'fro') - - # Initialize tau - if np.min(np.diag(A)) > 0: - tau = 0 - else: - tau = beta/2 - - for _ in range(maxiter): - try: - M = A + tau*np.eye(A.shape[0]) - # Attempt Cholesky - np.linalg.cholesky(A + tau*np.eye(A.shape[0])) - return M - except np.linalg.LinAlgError: - # Set new tau - tau = max(2*tau, beta/2) - - return None diff --git a/popt/update_schemes/trust_region.py b/popt/update_schemes/trust_region.py index cd616f9..b23f6b6 100644 --- a/popt/update_schemes/trust_region.py +++ b/popt/update_schemes/trust_region.py @@ -239,44 +239,6 @@ def _hess(self, x): else: h = self.hessian(x, *self.args) return h - - def update_results(self): - res = { - 'fun': self.fk, - 'x': self.xk, - 'jac': self.jk, - 'hess': self.Hk, - 'nfev': self.nfev, - 'njev': self.njev, - 'nit': self.iteration, - 'trust_radius': self.trust_radius, - 'save_folder': self.options.get('save_folder', './') - } - - for a, arg in enumerate(self.args): - res[f'args[{a}]'] = arg - - if 'savedata' in self.options: - # Make sure "SAVEDATA" gives a list - if isinstance(self.options['savedata'], list): - savedata = self.options['savedata'] - else: - savedata = [self.options['savedata']] - - # Loop over variables to store in save list - for save_typ in savedata: - if save_typ in locals(): - res[save_typ] = eval('{}'.format(save_typ)) - elif hasattr(self, save_typ): - res[save_typ] = eval(' self.{}'.format(save_typ)) - else: - print(f'Cannot save {save_typ}!\n\n') - - return OptimizeResult(res) - - def _log(self, msg): - if self.logger is not None: - self.logger.info(msg) def solve_subproblem(self, g, B, delta): """ @@ -426,6 +388,44 @@ def calc_update(self, inner_iter=0): success = False return success + + def update_results(self): + res = { + 'fun': self.fk, + 'x': self.xk, + 'jac': self.jk, + 'hess': self.Hk, + 'nfev': self.nfev, + 'njev': self.njev, + 'nit': self.iteration, + 'trust_radius': self.trust_radius, + 'save_folder': self.options.get('save_folder', './') + } + + for a, arg in enumerate(self.args): + res[f'args[{a}]'] = arg + + if 'savedata' in self.options: + # Make sure "SAVEDATA" gives a list + if isinstance(self.options['savedata'], list): + savedata = self.options['savedata'] + else: + savedata = [self.options['savedata']] + + # Loop over variables to store in save list + for save_typ in savedata: + if save_typ in locals(): + res[save_typ] = eval('{}'.format(save_typ)) + elif hasattr(self, save_typ): + res[save_typ] = eval(' self.{}'.format(save_typ)) + else: + print(f'Cannot save {save_typ}!\n\n') + + return OptimizeResult(res) + + def _log(self, msg): + if self.logger is not None: + self.logger.info(msg) From ab6543ea2aa73d3544e7b2ec2019ad88dc4c13e8 Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Mon, 1 Sep 2025 09:51:56 +0200 Subject: [PATCH 09/14] improved logging for LineSearch --- popt/update_schemes/line_search_step.py | 12 ++++++++++++ popt/update_schemes/linesearch.py | 11 ++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/popt/update_schemes/line_search_step.py b/popt/update_schemes/line_search_step.py index 5207940..3f4af38 100644 --- a/popt/update_schemes/line_search_step.py +++ b/popt/update_schemes/line_search_step.py @@ -68,6 +68,12 @@ def line_search(step_size, xk, pk, fun, jac, fk=None, jk=None, **kwargs): c1 = kwargs.get('c1', 1e-4) c2 = kwargs.get('c2', 0.9) + # check for logger in kwargs + global logger + logger = kwargs.get('logger', None) + if logger is None: + logger = print + # assertions assert step_size <= amax, "Initial step size must be less than or equal to amax." @@ -82,6 +88,7 @@ def phi(alpha): else: phi.fun_val = fk else: + logger(' Evaluating Armijo condition') phi.fun_val = fun(xk + alpha*pk) ls_nfev += 1 return phi.fun_val @@ -96,6 +103,7 @@ def dphi(alpha): else: dphi.jac_val = jk else: + logger(' Evaluating curvature condition') dphi.jac_val = jac(xk + alpha*pk) ls_njev += 1 return np.dot(dphi.jac_val, pk) @@ -107,6 +115,8 @@ def dphi(alpha): # Start loop a = [0, step_size] for i in range(1, maxiter+1): + logger(f'Line search iteration: {i-1}') + # Evaluate phi(ai) phi_i = phi(a[i]) @@ -134,6 +144,7 @@ def dphi(alpha): a.append(min(2*a[i], amax)) # If we reached this point, the line search failed + logger('Line search failed to find a suitable step size \n') return None, None, None, ls_nfev, ls_njev @@ -145,6 +156,7 @@ def zoom(alo, ahi, f, df, f0, df0, maxiter, c1, c2): dphi_lo = df(alo) for j in range(maxiter): + logger(f'Line search iteration: {j+1}') tol_cubic = 0.2*(ahi-alo) tol_quad = 0.1*(ahi-alo) diff --git a/popt/update_schemes/linesearch.py b/popt/update_schemes/linesearch.py index e5137f8..e723f78 100644 --- a/popt/update_schemes/linesearch.py +++ b/popt/update_schemes/linesearch.py @@ -221,7 +221,8 @@ def __init__(self, fun, x, jac, method='GD', hess=None, args=(), bounds=None, c 'rho': options.get('rho', 0.5), 'amax': self.step_size_max, 'maxiter': options.get('lsmaxiter', 10), - 'method' : options.get('lsmethod', 1) + 'method' : options.get('lsmethod', 1), + 'logger' : self.logger.info } # Set other options @@ -355,7 +356,7 @@ def calc_update(self, iter_resamp=0): step_size = self._set_step_size(pk, self.step_size_max) # Perform line-search - self.logger.info('Performing line search...') + self.logger.info('Performing line search.............') if self.lskwargs['method'] == 0: ls_res = line_search_backtracking( step_size=step_size, @@ -556,9 +557,9 @@ def _set_max_step_size(self, pk, xk): elif pk_val > 0: amax.append((ub[i] - xk[i])/pk_val) else: - amax.append(np.inf) - amax = min(amax) - return amax + continue + + return max(amax) From 237b80a1f0886c72ccdbc78531ebf95479014623 Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Wed, 3 Sep 2025 10:57:11 +0200 Subject: [PATCH 10/14] dummy message for github check --- popt/update_schemes/linesearch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/popt/update_schemes/linesearch.py b/popt/update_schemes/linesearch.py index e723f78..20f8fc1 100644 --- a/popt/update_schemes/linesearch.py +++ b/popt/update_schemes/linesearch.py @@ -23,6 +23,7 @@ nabla_symbol = "\u2207" + def LineSearch(fun, x, jac, method='GD', hess=None, args=(), bounds=None, callback=None, **options): ''' A Line Search Optimizer. From e29c159cc034ee7559285e2c1a8d951bf4ee3252 Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Wed, 3 Sep 2025 11:06:51 +0200 Subject: [PATCH 11/14] empty commit --- popt/update_schemes/linesearch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/popt/update_schemes/linesearch.py b/popt/update_schemes/linesearch.py index 20f8fc1..e723f78 100644 --- a/popt/update_schemes/linesearch.py +++ b/popt/update_schemes/linesearch.py @@ -23,7 +23,6 @@ nabla_symbol = "\u2207" - def LineSearch(fun, x, jac, method='GD', hess=None, args=(), bounds=None, callback=None, **options): ''' A Line Search Optimizer. From 32abdce95f93868f239c510562a8d94263862b33 Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Wed, 3 Sep 2025 13:28:27 +0200 Subject: [PATCH 12/14] udpate BFGS to skip update if negetive curvature --- popt/update_schemes/linesearch.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/popt/update_schemes/linesearch.py b/popt/update_schemes/linesearch.py index e723f78..57a2a09 100644 --- a/popt/update_schemes/linesearch.py +++ b/popt/update_schemes/linesearch.py @@ -5,17 +5,14 @@ import warnings from numpy import linalg as la -from functools import cache from scipy.optimize import OptimizeResult -from scipy.optimize._dcsrch import DCSRCH -from scipy.optimize._linesearch import _zoom # Internal imports from popt.misc_tools import optim_tools as ot from popt.loop.optimize import Optimize from popt.update_schemes.line_search_step import line_search, line_search_backtracking -# some symbols for logger +# Some symbols for logger subk = '\u2096' sup2 = '\u00b2' jac_inf_symbol = f'‖jac(x{subk})‖\u221E' @@ -580,7 +577,8 @@ def bfgs_update(Hk, sk, yk): rho = 1.0 / (yk.T @ sk) if rho <= 0: - raise ValueError("Non-positive curvature detected. BFGS update skipped.") + print('Non-positive curvature detected. BFGS update skipped....') + return Hk I = np.eye(Hk.shape[0]) Vk = I - rho * sk @ yk.T From 4ae14f76339094b190cd06f20b0cbbbb02fe4f3b Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Mon, 8 Sep 2025 09:52:14 +0200 Subject: [PATCH 13/14] made input more elegant and cleaned up popt structure --- ensemble/ensemble.py | 349 ++++++------------ input_output/organize.py | 8 + input_output/read_config.py | 11 +- pipt/loop/assimilation.py | 34 +- pipt/update_schemes/enrml.py | 82 ++-- pipt/update_schemes/esmda.py | 38 +- popt/cost_functions/ecalc_npv.py | 4 +- popt/cost_functions/ecalc_pareto_npv.py | 4 +- popt/cost_functions/npv.py | 4 +- popt/cost_functions/ren_npv.py | 4 +- popt/update_schemes/enopt.py | 2 +- popt/update_schemes/genopt.py | 4 +- popt/update_schemes/linesearch.py | 84 +---- popt/update_schemes/smcopt.py | 2 +- popt/update_schemes/subroutines/__init__.py | 3 + popt/update_schemes/{ => subroutines}/cma.py | 2 + .../{ => subroutines}/optimizers.py | 2 + .../subroutines.py} | 96 ++++- 18 files changed, 294 insertions(+), 439 deletions(-) create mode 100644 popt/update_schemes/subroutines/__init__.py rename popt/update_schemes/{ => subroutines}/cma.py (99%) rename popt/update_schemes/{ => subroutines}/optimizers.py (99%) rename popt/update_schemes/{line_search_step.py => subroutines/subroutines.py} (79%) diff --git a/ensemble/ensemble.py b/ensemble/ensemble.py index 304b240..d4dc46a 100644 --- a/ensemble/ensemble.py +++ b/ensemble/ensemble.py @@ -80,7 +80,7 @@ def __init__(self, keys_en, sim, redund_sim=None): # If it is a restart run, we do not need to initialize anything, only load the self info. that exists in the # pickle save file. If it is not a restart run, we initialize everything below. - if 'restart' in self.keys_en and self.keys_en['restart'] == 'yes': + if ('restart' in self.keys_en) and (self.keys_en['restart'] == 'yes'): # Initiate a restart run self.logger.info('\033[92m--- Restart run initiated! ---\033[92m') # Check if the pickle save file exists in folder @@ -117,7 +117,7 @@ def __init__(self, keys_en, sim, redund_sim=None): self.disable_tqdm = False # extract information that is given for the prior model - self._ext_prior_info() + self.prior_info = self._extract_prior_info() # Calculate initial ensemble if IMPORTSTATICVAR has not been given in init. file. # Prior info. on state variables must be given by PRIOR_ keyword. @@ -172,243 +172,116 @@ def _ext_ml_info(self): self.error_comp_scheme = self.keys_en['multilevel'][i][2] self.ML_corr_done = False - def _ext_prior_info(self): - """ + def _extract_prior_info(self) -> dict: + ''' Extract prior information on STATE from keyword(s) PRIOR_. - """ - # Parse prior info on each state entered in STATE. - # Store names given in STATE - if not isinstance(self.keys_en['state'], list): # Single string - state_names = [self.keys_en['state']] - else: # List - state_names = self.keys_en['state'] - - # Check if PRIOR_ exists for each entry in STATE + ''' + + # Get state names as list + state_names = self.keys_en['state'] + if not isinstance(state_names, list): state_names = [state_names] + + # Check if PRIOR_ exists for each entry in state for name in state_names: - assert 'prior_' + name in self.keys_en, \ + assert f'prior_{name}' in self.keys_en, \ 'PRIOR_{0} is missing! This keyword is needed to make initial ensemble for {0} entered in ' \ 'STATE'.format(name.upper()) + + # define dict to store prior information in + prior_info = {name: None for name in state_names} - # Init. prior info variable - self.prior_info = {keys: None for keys in state_names} - - # Loop over each prior keyword and make an initial. ensemble for each state in STATE, - # which is subsequently stored in the state dictionary. If 3D grid dimensions are inputted, information for - # each layer must be inputted, else the single information will be copied to all layers. - grid_dim = np.array([0, 0]) + # loop over state priors for name in state_names: - # initiallize an empty dictionary inside the dictionary. - self.prior_info[name] = {} - # List the option names inputted in prior keyword - # opt_list = list(zip(*self.keys_da['prior_'+name])) - mean = None - self.prior_info[name]['mean'] = mean - vario = [None] - self.prior_info[name]['vario'] = vario - aniso = [None] - self.prior_info[name]['aniso'] = aniso - angle = [None] - self.prior_info[name]['angle'] = angle - corr_length = [None] - self.prior_info[name]['corr_length'] = corr_length - self.prior_info[name]['nx'] = self.prior_info[name]['ny'] = self.prior_info[name]['nz'] = None - - # Extract info. from the prior keyword - for i, opt in enumerate(list(zip(*self.keys_en['prior_' + name]))[0]): - if opt == 'vario': # Variogram model - if not isinstance(self.keys_en['prior_' + name][i][1], list): - vario = [self.keys_en['prior_' + name][i][1]] - else: - vario = self.keys_en['prior_' + name][i][1] - elif opt == 'mean': # Mean - mean = self.keys_en['prior_' + name][i][1] - elif opt == 'var': # Variance - if not isinstance(self.keys_en['prior_' + name][i][1], list): - variance = [self.keys_en['prior_' + name][i][1]] - else: - variance = self.keys_en['prior_' + name][i][1] - elif opt == 'aniso': # Anisotropy factor - if not isinstance(self.keys_en['prior_' + name][i][1], list): - aniso = [self.keys_en['prior_' + name][i][1]] - else: - aniso = self.keys_en['prior_' + name][i][1] - elif opt == 'angle': # Anisotropy angle - if not isinstance(self.keys_en['prior_' + name][i][1], list): - angle = [self.keys_en['prior_' + name][i][1]] - else: - angle = self.keys_en['prior_' + name][i][1] - elif opt == 'range': # Correlation length - if not isinstance(self.keys_en['prior_' + name][i][1], list): - corr_length = [self.keys_en['prior_' + name][i][1]] + prior = self.keys_en[f'prior_{name}'] + + # Check if is a list (old way) + if isinstance(prior, list): + # list of lists - old way of inputting prior information + prior_dict = {} + for i, opt in enumerate(list(zip(*prior))[0]): + if opt == 'limits': + prior_dict[opt] = prior[i][1:] else: - corr_length = self.keys_en['prior_' + name][i][1] - elif opt == 'grid': # Grid dimensions - grid_dim = self.keys_en['prior_' + name][i][1] - elif opt == 'limits': # Truncation values - limits = self.keys_en['prior_' + name][i][1:] - elif opt == 'active': # Number of active cells (single number) - active = self.keys_en['prior_' + name][i][1] - - # Check if mean needs to be loaded, or if loaded - if type(mean) is str: - assert mean.endswith('.npz'), 'File name does not end with \'.npz\'!' - load_file = np.load(mean) + prior_dict[opt] = prior[i][1] + prior = prior_dict + else: + assert isinstance(prior, dict), 'PRIOR_{0} must be a dictionary or list of lists!'.format(name.upper()) + + + # load mean if in file + if isinstance(prior['mean'], str): + assert prior['mean'].endswith('.npz'), 'File name does not end with \'.npz\'!' + load_file = np.load(prior['mean']) assert len(load_file.files) == 1, \ 'More than one variable located in {0}. Only the mean vector can be stored in the .npz file!' \ - .format(mean) - mean = load_file[load_file.files[0]] + .format(prior['mean']) + prior['mean'] = load_file[load_file.files[0]] else: # Single number inputted, make it a list if not already - if not isinstance(mean, list): - mean = [mean] - - # Check if limits exists - try: - limits - except NameError: - limits = None - - # check if active exists - try: - active - except NameError: - active = None - - # Extract x- and y-dim - nx = int(grid_dim[0]) - ny = int(grid_dim[1]) - - # Check if 3D grid inputted. If so, we check if info. has been given on all layers. In the case it has - # not been given, we just copy the info. given. - if len(grid_dim) == 3 and grid_dim[2] > 1: # 3D - nz = int(grid_dim[2]) - - # Check mean when values have been inputted directly (not when mean has been loaded) - if isinstance(mean, list) and len(mean) < nz: - # Check if it is more than one entry and give error - assert len(mean) == 1, \ - 'Information from MEAN has been given for {0} layers, whereas {1} is needed!' \ - .format(len(mean), nz) - - # Only 1 entry; copy this to all layers - print( - '\033[1;33mSingle entry for MEAN will be copied to all {0} layers\033[1;m'.format(nz)) - self.prior_info[name]['mean'] = mean * nz - - else: - self.prior_info[name]['mean'] = mean - - # Check variogram model - if len(vario) < nz: - # Check if it is more than one entry and give error - assert len(vario) == 1, \ - 'Information from VARIO has been given for {0} layers, whereas {1} is needed!' \ - .format(len(vario), nz) - - # Only 1 entry; copy this to all layers - print( - '\033[1;33mSingle entry for VARIO will be copied to all {0} layers\033[1;m'.format(nz)) - self.prior_info[name]['vario'] = vario * nz - - else: - self.prior_info[name]['vario'] = vario - - # Variance - if len(variance) < nz: - # Check if it is more than one entry and give error - assert len(variance) == 1, \ - 'Information from VAR has been given for {0} layers, whereas {1} is needed!' \ - .format(len(variance), nz) - - # Only 1 entry; copy this to all layers - print( - '\033[1;33mSingle entry for VAR will be copied to all {0} layers\033[1;m'.format(nz)) - self.prior_info[name]['variance'] = variance * nz - - else: - self.prior_info[name]['variance'] = variance - - # Aniso factor - if len(aniso) < nz: - # Check if it is more than one entry and give error - assert len(aniso) == 1, \ - 'Information from ANISO has been given for {0} layers, whereas {1} is needed!' \ - .format(len(aniso), nz) - - # Only 1 entry; copy this to all layers - print( - '\033[1;33mSingle entry for ANISO will be copied to all {0} layers\033[1;m'.format(nz)) - self.prior_info[name]['aniso'] = aniso * nz - - else: - self.prior_info[name]['aniso'] = aniso - - # Aniso factor - if len(angle) < nz: - # Check if it is more than one entry and give error - assert len(angle) == 1, \ - 'Information from ANGLE has been given for {0} layers, whereas {1} is needed!' \ - .format(len(angle), nz) - - # Only 1 entry; copy this to all layers - print( - '\033[1;33mSingle entry for ANGLE will be copied to all {0} layers\033[1;m'.format(nz)) - self.prior_info[name]['angle'] = angle * nz + if not isinstance(prior['mean'], list): + prior['mean'] = [prior['mean']] + + # loop over keys in prior + for key in prior.keys(): + # ensure that entry is a list + if (not isinstance(prior[key], list)) and (key != 'mean'): + prior[key] = [prior[key]] + + # change the name of some keys + prior['variance'] = prior.pop('var', None) + prior['corr_length'] = prior.pop('range', None) + + # process grid + if 'grid' in prior: + grid_dim = prior['grid'] + + # check if 3D-grid + if (len(grid_dim) == 3) and (grid_dim[2] > 1): + nz = int(grid_dim[2]) + prior['nz'] = nz + prior['nx'] = int(grid_dim[0]) + prior['ny'] = int(grid_dim[1]) + + + # Check mean when values have been inputted directly (not when mean has been loaded) + mean = prior['mean'] + if isinstance(mean, list) and len(mean) < nz: + # Check if it is more than one entry and give error + assert len(mean) == 1, \ + 'Information from MEAN has been given for {0} layers, whereas {1} is needed!' \ + .format(len(mean), nz) + + # Only 1 entry; copy this to all layers + print( + '\033[1;33mSingle entry for MEAN will be copied to all {0} layers\033[1;m'.format(nz)) + prior['mean'] = mean * nz + + #check if info. has been given on all layers. In the case it has not been given, we just copy the info. given. + for key in ['vario', 'variance', 'aniso', 'angle', 'corr_length']: + if key in prior.keys(): + val = prior[key] + if len(val) < nz: + # Check if it is more than one entry and give error + assert len(val) == 1, \ + 'Information from {0} has been given for {1} layers, whereas {2} is needed!' \ + .format(key.upper(), len(val), nz) + + # Only 1 entry; copy this to all layers + print( + '\033[1;33mSingle entry for {0} will be copied to all {1} layers\033[1;m'.format(key.upper(), nz)) + prior[key] = val * nz else: - self.prior_info[name]['angle'] = angle + prior['nx'] = int(grid_dim[0]) + prior['ny'] = int(grid_dim[1]) + prior['nz'] = 1 - # Corr. length - if len(corr_length) < nz: - # Check if it is more than one entry and give error - assert len(corr_length) == 1, \ - 'Information from RANGE has been given for {0} layers, whereas {1} is needed!' \ - .format(len(corr_length), nz) + prior.pop('grid', None) - # Only 1 entry; copy this to all layers - print( - '\033[1;33mSingle entry for RANGE will be copied to all {0} layers\033[1;m'.format(nz)) - self.prior_info[name]['corr_length'] = corr_length * nz - - else: - self.prior_info[name]['corr_length'] = corr_length - - # Limits, if exists - if limits is not None: - self.prior_info[name]['limits'] = limits - - # if isinstance(limits[0], list) and len(limits) < nz or \ - # not isinstance(limits[0], list) and len(limits) < 2 * nz: - # # Check if it is more than one entry and give error - # assert (isinstance(limits[0], list) and len(limits) == 1), \ - # 'Information from LIMITS has been given for {0} layers, whereas {1} is needed!' \ - # .format(len(limits), nz) - # assert (not isinstance(limits[0], list) and len(limits) == 2), \ - # 'Information from LIMITS has been given for {0} layers, whereas {1} is needed!' \ - # .format(len(limits) / 2, nz) - # - # # Only 1 entry; copy this to all layers - # print( - # '\033[1;33mSingle entry for RANGE will be copied to all {0} layers\033[1;m'.format(nz)) - # self.prior_info[name]['limits'] = [limits] * nz - - else: # 2D grid only, or optimization case - nz = 1 - self.prior_info[name]['mean'] = mean - self.prior_info[name]['vario'] = vario - self.prior_info[name]['variance'] = variance - self.prior_info[name]['aniso'] = aniso - self.prior_info[name]['angle'] = angle - self.prior_info[name]['corr_length'] = corr_length - if limits is not None: - self.prior_info[name]['limits'] = limits - if active is not None: - self.prior_info[name]['active'] = active - - self.prior_info[name]['nx'] = nx - self.prior_info[name]['ny'] = ny - self.prior_info[name]['nz'] = nz - - # Loop over keys and input + # add prior to prior_info + prior_info[name] = prior + + return prior_info + def gen_init_ensemble(self): """ @@ -427,21 +300,21 @@ def gen_init_ensemble(self): ind_end = 0 # Extract info. - nz = self.prior_info[name]['nz'] - mean = self.prior_info[name]['mean'] - nx = self.prior_info[name]['nx'] - ny = self.prior_info[name]['ny'] + nx = self.prior_info[name].get('nx', 0) + ny = self.prior_info[name].get('ny', 0) + nz = self.prior_info[name].get('nz', 0) + mean = self.prior_info[name].get('mean', None) + if nx == ny == 0: # assume ensemble will be generated elsewhere if dimensions are zero break - variance = self.prior_info[name]['variance'] - corr_length = self.prior_info[name]['corr_length'] - aniso = self.prior_info[name]['aniso'] - vario = self.prior_info[name]['vario'] - angle = self.prior_info[name]['angle'] - if 'limits' in self.prior_info[name]: - limits = self.prior_info[name]['limits'] - else: - limits = None + + variance = self.prior_info[name].get('variance', None) + corr_length = self.prior_info[name].get('corr_length', None) + aniso = self.prior_info[name].get('aniso', None) + vario = self.prior_info[name].get('vario', None) + angle = self.prior_info[name].get('angle', None) + limits= self.prior_info[name].get('limits',None) + # Loop over nz to make layers of 2D priors for i in range(self.prior_info[name]['nz']): diff --git a/input_output/organize.py b/input_output/organize.py index 3accf02..8e500e3 100644 --- a/input_output/organize.py +++ b/input_output/organize.py @@ -3,6 +3,7 @@ from copy import deepcopy import csv import datetime as dt +import pandas as pd class Organize_input(): @@ -109,6 +110,13 @@ def _org_report(self): pred_prim.extend(csv_data) self.keys_fwd['reportpoint'] = pred_prim + elif isinstance(self.keys_fwd['reportpoint'], dict): + self.keys_fwd['reportpoint'] = pd.date_range(**self.keys_fwd['reportpoint']).to_pydatetime().tolist() + + else: + pass + + # Check if assimindex is given as a csv file. If so, we read and make a potential 2D list (if sequential). if 'assimindex' in self.keys_pr: if isinstance(self.keys_pr['assimindex'], str) and self.keys_pr['assimindex'].endswith('.csv'): diff --git a/input_output/read_config.py b/input_output/read_config.py index 986e755..ba00da2 100644 --- a/input_output/read_config.py +++ b/input_output/read_config.py @@ -51,6 +51,12 @@ def ndarray_constructor(loader, node): y = yaml.load(fid, Loader=FullLoader) # Check for dataassim and fwdsim + if 'ensemble' in y.keys(): + keys_en = y['ensemble'] + check_mand_keywords_en(keys_en) + else: + keys_en = None + if 'optim' in y.keys(): keys_pr = y['optim'] check_mand_keywords_opt(keys_pr) @@ -59,16 +65,17 @@ def ndarray_constructor(loader, node): check_mand_keywords_da(keys_pr) else: raise KeyError + if 'fwdsim' in y.keys(): keys_fwd = y['fwdsim'] else: raise KeyError # Organize keywords - org = Organize_input(keys_pr, keys_fwd) + org = Organize_input(keys_pr, keys_fwd, keys_en) org.organize() - return org.get_keys_pr(), org.get_keys_fwd() + return org.get_keys_pr(), org.get_keys_fwd(), org.get_keys_en() def convert_txt_to_toml(init_file): diff --git a/pipt/loop/assimilation.py b/pipt/loop/assimilation.py index 0ccd6dd..f5caa72 100644 --- a/pipt/loop/assimilation.py +++ b/pipt/loop/assimilation.py @@ -301,32 +301,20 @@ def _ext_max_iter(self): - ST 7/6-16 """ if 'iteration' in self.ensemble.keys_da: - # Make sure ITERATION is a list - if not isinstance(self.ensemble.keys_da['iteration'][0], list): - iter_opts = [self.ensemble.keys_da['iteration']] - else: - iter_opts = self.ensemble.keys_da['iteration'] - + iter_opts = dict(self.ensemble.keys_da['iteration']) # Check if 'max_iter' has been given; if not, give error (mandatory in ITERATION) - assert 'max_iter' in list( - zip(*iter_opts))[0], 'MAX_ITER has not been given in ITERATION!' - - # Extract max. iter - max_iter = [item[1] for item in iter_opts if item[0] == 'max_iter'][0] + try: + max_iter = iter_opts['max_iter'] + except KeyError: + raise AssertionError('MAX_ITER has not been given in ITERATION') elif 'mda' in self.ensemble.keys_da: - # Make sure ITERATION is a list - if not isinstance(self.ensemble.keys_da['mda'][0], list): - iter_opts = [self.ensemble.keys_da['mda']] - else: - iter_opts = self.ensemble.keys_da['mda'] - - # Check if 'max_iter' has been given; if not, give error (mandatory in ITERATION) - assert 'tot_assim_steps' in list( - zip(*iter_opts))[0], 'TOT_ASSIM_STEPS has not been given in MDA!' - - # Extract max. iter - max_iter = [item[1] for item in iter_opts if item[0] == 'tot_assim_steps'][0] + iter_opts = dict(self.ensemble.keys_da['mda']) + # Check if 'tot_assim_steps' has been given; if not, raise error (mandatory in MDA) + try: + max_iter = iter_opts['tot_assim_steps'] + except KeyError: + raise AssertionError('TOT_ASSIM_STEPS has not been given in MDA!') else: max_iter = 1 diff --git a/pipt/update_schemes/enrml.py b/pipt/update_schemes/enrml.py index 632564b..741b700 100644 --- a/pipt/update_schemes/enrml.py +++ b/pipt/update_schemes/enrml.py @@ -250,36 +250,22 @@ def _ext_iter_param(self): file. These parameters include convergence tolerances and parameters for the damping parameter. Default values for these parameters have been given here, if they are not provided in ITERATION. """ - - # Predefine all the default values - self.data_misfit_tol = 0.01 - self.step_tol = 0.01 - self.lam = 100 - self.lam_max = 1e10 - self.lam_min = 0.01 - self.gamma = 5 - self.trunc_energy = 0.95 + try: + options = dict(self.keys_da['iteration']) + except: + options = dict([self.keys_da['iteration']]) + + # unpack options + self.data_misfit_tol = options.get('data_misfit_tol', 0.01) + self.trunc_energy = options.get('energy', 0.95) + self.step_tol = options.get('step_tol', 0.01) + self.lam = options.get('lambda', 100) + self.lam_max = options.get('lambda_max', 1e10) + self.lam_min = options.get('lambda_min', 0.01) + self.gamma = options.get('lambda_factor', 5) self.iteration = 0 - # Loop over options in ITERATION and extract the parameters we want - for i, opt in enumerate(list(zip(*self.keys_da['iteration']))[0]): - if opt == 'data_misfit_tol': - self.data_misfit_tol = self.keys_da['iteration'][i][1] - if opt == 'step_tol': - self.step_tol = self.keys_da['iteration'][i][1] - if opt == 'lambda': - self.lam = self.keys_da['iteration'][i][1] - if opt == 'lambda_max': - self.lam_max = self.keys_da['iteration'][i][1] - if opt == 'lambda_min': - self.lam_min = self.keys_da['iteration'][i][1] - if opt == 'lambda_factor': - self.gamma = self.keys_da['iteration'][i][1] - - if 'energy' in self.keys_da: - # initial energy (Remember to extract this) - self.trunc_energy = self.keys_da['energy'] - if self.trunc_energy > 1: # ensure that it is given as percentage + if self.trunc_energy > 1: # ensure that it is given as percentage self.trunc_energy /= 100. @@ -593,33 +579,19 @@ def _ext_iter_param(self): file. These parameters include convergence tolerances and parameters for the damping parameter. Default values for these parameters have been given here, if they are not provided in ITERATION. """ - - # Predefine all the default values - self.data_misfit_tol = 0.01 - self.step_tol = 0.01 - self.gamma = 0.2 - self.gamma_max = 0.5 - self.gamma_factor = 2.5 - self.trunc_energy = 0.95 - self.iteration = 0 - - # Loop over options in ITERATION and extract the parameters we want - for i, opt in enumerate(list(zip(*self.keys_da['iteration']))[0]): - if opt == 'data_misfit_tol': - self.data_misfit_tol = self.keys_da['iteration'][i][1] - if opt == 'step_tol': - self.step_tol = self.keys_da['iteration'][i][1] - if opt == 'gamma': - self.gamma = self.keys_da['iteration'][i][1] - if opt == 'gamma_max': - self.gamma_max = self.keys_da['iteration'][i][1] - if opt == 'gamma_factor': - self.gamma_factor = self.keys_da['iteration'][i][1] - - if 'energy' in self.keys_da: - # initial energy (Remember to extract this) - self.trunc_energy = self.keys_da['energy'] - if self.trunc_energy > 1: # ensure that it is given as percentage + try: + options = dict(self.keys_da['iteration']) + except: + options = dict([self.keys_da['iteration']]) + + self.data_misfit_tol = options.get('data_misfit_tol', 0.01) + self.trunc_energy = options.get('energy', 0.95) + self.step_tol = options.get('step_tol', 0.01) + self.gamma = options.get('gamma', 0.2) + self.gamma_max = options.get('gamma_max', 0.5) + self.gamma_factor = options.get('gamma_factor', 2.5) + + if self.trunc_energy > 1: # ensure that it is given as percentage self.trunc_energy /= 100. diff --git a/pipt/update_schemes/esmda.py b/pipt/update_schemes/esmda.py index b848096..7cefd16 100644 --- a/pipt/update_schemes/esmda.py +++ b/pipt/update_schemes/esmda.py @@ -31,7 +31,7 @@ def __init__(self, keys_da, keys_en, sim): Parameters ---------- - keys_da['mda'] : list + keys_da['mda'] : dict - tot_assim_steps: total number of iterations in MDA, e.g., 3 - inflation_param: covariance inflation factors, e.g., [2, 4, 4] @@ -222,17 +222,16 @@ def _ext_inflation_param(self): alpha: list Data covariance inflation factor """ - # Make sure MDA is a list - if not isinstance(self.keys_da['mda'][0], list): - mda_opts = [self.keys_da['mda']] - else: - mda_opts = self.keys_da['mda'] + try: + mda_opts = dict(self.keys_da['mda']) + except: + mda_opts = dict([self.keys_da['mda']]) # Check if INFLATION_PARAM has been provided, and if so, extract the value(s). If not, we set alpha to the # default value equal to the tot. no. assim. steps - if 'inflation_param' in list(zip(*mda_opts))[0]: + if 'inflation_param' in mda_opts: # Extract value - alpha_tmp = [item[1] for item in mda_opts if item[0] == 'inflation_param'][0] + alpha_tmp = mda_opts['inflation_param'] # If one value is given, we copy it to all assim. steps. If multiple values are given, we check the # number of parameters corresponds to tot. no. assim. steps @@ -279,22 +278,17 @@ def _ext_assim_steps(self): - ST 7/6-16 - ST 1/3-17: Changed to output list of assim. steps instead of just tot. assim. steps """ - # Make sure MDA is a list - if not isinstance(self.keys_da['mda'][0], list): - mda_opts = [self.keys_da['mda']] - else: - mda_opts = self.keys_da['mda'] + try: + mda_opts = dict(self.keys_da['mda']) + except: + mda_opts = dict([self.keys_da['mda']]) + # Check if 'max_iter' has been given; if not, give error (mandatory in ITERATION) - assert 'tot_assim_steps' in list( - zip(*mda_opts))[0], 'TOT_ASSIM_STEPS has not been given in MDA!' - - # Extract max. iter - tot_no_assim = int([item[1] - for item in mda_opts if item[0] == 'tot_assim_steps'][0]) - - # Make a list of assim. steps - assim_steps = list(range(tot_no_assim)) + try: + assim_steps = list(range(int(mda_opts['tot_assim_steps']))) + except KeyError: + raise AssertionError('TOT_ASSIM_STEPS has not been given in MDA!') # If it is a restart run, we remove simulations already done if self.restart is True: diff --git a/popt/cost_functions/ecalc_npv.py b/popt/cost_functions/ecalc_npv.py index d13d8df..420e6ca 100644 --- a/popt/cost_functions/ecalc_npv.py +++ b/popt/cost_functions/ecalc_npv.py @@ -44,9 +44,7 @@ def ecalc_npv(pred_data, **kwargs): report = kwargs.get('true_order', []) # Economic values - npv_const = {} - for name, value in keys_opt['npv_const']: - npv_const[name] = value + npv_const = dict(keys_opt['npv_const']) # Collect production data Qop = [] diff --git a/popt/cost_functions/ecalc_pareto_npv.py b/popt/cost_functions/ecalc_pareto_npv.py index 2847dd9..9375f9f 100644 --- a/popt/cost_functions/ecalc_pareto_npv.py +++ b/popt/cost_functions/ecalc_pareto_npv.py @@ -46,9 +46,7 @@ def ecalc_pareto_npv(pred_data, kwargs): report = kwargs.get('true_order', []) # Economic values - npv_const = {} - for name, value in keys_opt['npv_const']: - npv_const[name] = value + npv_const = dict(keys_opt['npv_const']) # Collect production data Qop = [] diff --git a/popt/cost_functions/npv.py b/popt/cost_functions/npv.py index dfb3f6f..bb18c8c 100644 --- a/popt/cost_functions/npv.py +++ b/popt/cost_functions/npv.py @@ -38,9 +38,7 @@ def npv(pred_data, **kwargs): report = kwargs.get('true_order', []) # Economic values - npv_const = {} - for name, value in keys_opt['npv_const']: - npv_const[name] = value + npv_const = dict(keys_opt['npv_const']) values = [] for i in np.arange(1, len(pred_data)): diff --git a/popt/cost_functions/ren_npv.py b/popt/cost_functions/ren_npv.py index e76c567..0035f4a 100644 --- a/popt/cost_functions/ren_npv.py +++ b/popt/cost_functions/ren_npv.py @@ -32,9 +32,7 @@ def ren_npv(pred_data, kwargs): report = kwargs.get('true_order', []) # Economic values - npv_const = {} - for name, value in keys_opt['npv_const']: - npv_const[name] = value + npv_const = dict(keys_opt['npv_const']) # Loop over timesteps values = [] diff --git a/popt/update_schemes/enopt.py b/popt/update_schemes/enopt.py index a397ad9..ae9ed13 100644 --- a/popt/update_schemes/enopt.py +++ b/popt/update_schemes/enopt.py @@ -8,7 +8,7 @@ # Internal imports from popt.misc_tools import optim_tools as ot from popt.loop.optimize import Optimize -import popt.update_schemes.optimizers as opt +import popt.update_schemes.subroutines.optimizers as opt class EnOpt(Optimize): diff --git a/popt/update_schemes/genopt.py b/popt/update_schemes/genopt.py index 2316f6d..408baa7 100644 --- a/popt/update_schemes/genopt.py +++ b/popt/update_schemes/genopt.py @@ -7,8 +7,8 @@ # Internal imports from popt.misc_tools import optim_tools as ot from popt.loop.optimize import Optimize -import popt.update_schemes.optimizers as opt -from popt.update_schemes.cma import CMA +import popt.update_schemes.subroutines.optimizers as opt +from popt.update_schemes.subroutines.cma import CMA class GenOpt(Optimize): diff --git a/popt/update_schemes/linesearch.py b/popt/update_schemes/linesearch.py index 57a2a09..e82b305 100644 --- a/popt/update_schemes/linesearch.py +++ b/popt/update_schemes/linesearch.py @@ -10,7 +10,7 @@ # Internal imports from popt.misc_tools import optim_tools as ot from popt.loop.optimize import Optimize -from popt.update_schemes.line_search_step import line_search, line_search_backtracking +from popt.update_schemes.subroutines import line_search, line_search_backtracking, bfgs_update, newton_cg # Some symbols for logger subk = '\u2096' @@ -37,8 +37,8 @@ def LineSearch(fun, x, jac, method='GD', hess=None, args=(), bounds=None, callba method: str Which optimization method to use. Default is 'GD' for 'Gradient Descent'. - Other options are 'BFGS' for the 'Broyden–Fletcher–Goldfarb–Shanno' method, - and 'Newton-CG'. + Other options are 'BFGS' for the 'Broyden-Fletcher-Goldfarb-Shanno' method, + and 'Newton-CG' for the Newton-conjugate gradient method. hess: callable, optional Hessian function, hess(x, *args). Default is None. @@ -340,7 +340,7 @@ def calc_update(self, iter_resamp=0): if self.method == 'BFGS': pk = - np.matmul(self.Hk_inv, self.jk) if self.method == 'Newton-CG': - pk = newton_cg(self.jk, Hk=self.Hk, xk=self.xk, jac=self._jac, eps=1e-4) + pk = newton_cg(self.jk, Hk=self.Hk, xk=self.xk, jac=self._jac, logger=self.logger.info) # porject search direction onto the feasible set if self.bounds is not None: @@ -560,82 +560,6 @@ def _set_max_step_size(self, pk, xk): -def bfgs_update(Hk, sk, yk): - """ - Perform the BFGS update of the inverse Hessian approximation. - - Parameters: - - Hk: np.ndarray, current inverse Hessian approximation (n x n) - - sk: np.ndarray, step vector (x_{k+1} - x_k), shape (n,) - - yk: np.ndarray, gradient difference (grad_{k+1} - grad_k), shape (n,) - - Returns: - - Hk_new: np.ndarray, updated inverse Hessian approximation - """ - sk = sk.reshape(-1, 1) - yk = yk.reshape(-1, 1) - rho = 1.0 / (yk.T @ sk) - - if rho <= 0: - print('Non-positive curvature detected. BFGS update skipped....') - return Hk - - I = np.eye(Hk.shape[0]) - Vk = I - rho * sk @ yk.T - Hk_new = Vk @ Hk @ Vk.T + rho * sk @ sk.T - - return Hk_new - -def newton_cg(gk, Hk=None, maxiter=None, **kwargs): - print('\nRunning Newton-CG subroutine...') - - if Hk is None: - jac = kwargs.get('jac') - eps = kwargs.get('eps', 1e-4) - xk = kwargs.get('xk') - - # define a finite difference approximation of the Hessian times a vector - def Hessd(d): - return (jac(xk + eps*d) - gk)/eps - - if maxiter is None: - maxiter = 20*gk.size # Same dfault as in scipy - - tol = min(0.5, np.sqrt(la.norm(gk)))*la.norm(gk) - z = 0 - r = gk - d = -r - - for j in range(maxiter): - print('iteration: ', j) - if Hk is None: - Hd = Hessd(d) - else: - Hd = np.matmul(Hk, d) - - dTHd = np.dot(d, Hd) - - if dTHd <= 0: - print('Negative curvature detected, terminating subroutine') - print('\n') - if j == 0: - return -gk - else: - return z - - rold = r - a = np.dot(r,r)/dTHd - z = z + a*d - r = r + a*Hd - - if la.norm(r) < tol: - print('Subroutine converged') - print('\n') - return z - - b = np.dot(r, r)/np.dot(rold, rold) - d = -r + b*d - diff --git a/popt/update_schemes/smcopt.py b/popt/update_schemes/smcopt.py index f67503b..f0944fe 100644 --- a/popt/update_schemes/smcopt.py +++ b/popt/update_schemes/smcopt.py @@ -6,7 +6,7 @@ # Internal imports from popt.loop.optimize import Optimize -import popt.update_schemes.optimizers as opt +import popt.update_schemes.subroutines.optimizers as opt from popt.misc_tools import optim_tools as ot diff --git a/popt/update_schemes/subroutines/__init__.py b/popt/update_schemes/subroutines/__init__.py new file mode 100644 index 0000000..eab7b84 --- /dev/null +++ b/popt/update_schemes/subroutines/__init__.py @@ -0,0 +1,3 @@ +from .subroutines import * +from .cma import * +from .optimizers import * \ No newline at end of file diff --git a/popt/update_schemes/cma.py b/popt/update_schemes/subroutines/cma.py similarity index 99% rename from popt/update_schemes/cma.py rename to popt/update_schemes/subroutines/cma.py index 0a99874..bee93e5 100644 --- a/popt/update_schemes/cma.py +++ b/popt/update_schemes/subroutines/cma.py @@ -2,6 +2,8 @@ import numpy as np from popt.misc_tools import optim_tools as ot +__all__ = ['CMA'] + class CMA: def __init__(self, ne, dim, alpha_mu=None, n_mu=None, alpha_1=None, alpha_c=None, corr_update=False, equal_weights=True): diff --git a/popt/update_schemes/optimizers.py b/popt/update_schemes/subroutines/optimizers.py similarity index 99% rename from popt/update_schemes/optimizers.py rename to popt/update_schemes/subroutines/optimizers.py index 83514cd..a211093 100644 --- a/popt/update_schemes/optimizers.py +++ b/popt/update_schemes/subroutines/optimizers.py @@ -1,6 +1,8 @@ """Gradient acceleration.""" import numpy as np +__all__ = ['GradientAscent', 'Adam', 'AdaMax', 'Steihaug', ] + class GradientAscent: r""" diff --git a/popt/update_schemes/line_search_step.py b/popt/update_schemes/subroutines/subroutines.py similarity index 79% rename from popt/update_schemes/line_search_step.py rename to popt/update_schemes/subroutines/subroutines.py index 3f4af38..5e00c28 100644 --- a/popt/update_schemes/line_search_step.py +++ b/popt/update_schemes/subroutines/subroutines.py @@ -1,9 +1,16 @@ -# This is a an implementation of the Line Search Algorithm (Alg. 3.5) in Numerical Optimization from Nocedal 2006. - import numpy as np +import numpy.linalg as la from functools import cache from scipy.optimize._linesearch import _quadmin, _cubicmin +__all__ = [ + 'line_search', + 'zoom', + 'line_search_backtracking', + 'bfgs_update', + 'newton_cg' +] + def line_search(step_size, xk, pk, fun, jac, fk=None, jk=None, **kwargs): ''' @@ -304,4 +311,87 @@ def phi(alpha): step_size *= rho # If we reached this point, the line search failed - return None, None, None, ls_nfev, ls_njev \ No newline at end of file + return None, None, None, ls_nfev, ls_njev + + +def bfgs_update(Hk, sk, yk): + """ + Perform the BFGS update of the inverse Hessian approximation. + + Parameters: + - Hk: np.ndarray, current inverse Hessian approximation (n x n) + - sk: np.ndarray, step vector (x_{k+1} - x_k), shape (n,) + - yk: np.ndarray, gradient difference (grad_{k+1} - grad_k), shape (n,) + + Returns: + - Hk_new: np.ndarray, updated inverse Hessian approximation + """ + sk = sk.reshape(-1, 1) + yk = yk.reshape(-1, 1) + rho = 1.0 / (yk.T @ sk) + + if rho <= 0: + print('Non-positive curvature detected. BFGS update skipped....') + return Hk + + I = np.eye(Hk.shape[0]) + Vk = I - rho * sk @ yk.T + Hk_new = Vk @ Hk @ Vk.T + rho * sk @ sk.T + + return Hk_new + +def newton_cg(gk, Hk=None, maxiter=None, **kwargs): + + # Check for logger + logger = kwargs.get('logger', None) + if logger is None: + logger = print + + logger('Running Newton-CG subroutine..........') + + if Hk is None: + jac = kwargs.get('jac') + eps = kwargs.get('eps', 1e-4) + xk = kwargs.get('xk') + + # define a finite difference approximation of the Hessian times a vector + def Hessd(d): + return (jac(xk + eps*d) - gk)/eps + + if maxiter is None: + maxiter = 20*gk.size # Same dfault as in scipy + + tol = min(0.5, np.sqrt(la.norm(gk)))*la.norm(gk) + z = 0 + r = gk + d = -r + + for j in range(maxiter): + logger(f'iteration: {j}') + if Hk is None: + Hd = Hessd(d) + else: + Hd = np.matmul(Hk, d) + + dTHd = np.dot(d, Hd) + + if dTHd <= 0: + logger('Negative curvature detected, terminating subroutine') + logger('') + if j == 0: + return -gk + else: + return z + + rold = r + a = np.dot(r,r)/dTHd + z = z + a*d + r = r + a*Hd + + if la.norm(r) < tol: + logger('Subroutine converged') + logger('') + return z + + b = np.dot(r, r)/np.dot(rold, rold) + d = -r + b*d \ No newline at end of file From 3cdd192a8f84eadb71ece6026fcfdf87a5025a4e Mon Sep 17 00:00:00 2001 From: Mathias Methlie Nilsen Date: Mon, 8 Sep 2025 11:18:09 +0200 Subject: [PATCH 14/14] changed @cache to @lru_cache --- popt/update_schemes/subroutines/subroutines.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/popt/update_schemes/subroutines/subroutines.py b/popt/update_schemes/subroutines/subroutines.py index 5e00c28..ddd1105 100644 --- a/popt/update_schemes/subroutines/subroutines.py +++ b/popt/update_schemes/subroutines/subroutines.py @@ -1,6 +1,6 @@ import numpy as np import numpy.linalg as la -from functools import cache +from functools import lru_cache from scipy.optimize._linesearch import _quadmin, _cubicmin __all__ = [ @@ -85,7 +85,7 @@ def line_search(step_size, xk, pk, fun, jac, fk=None, jk=None, **kwargs): assert step_size <= amax, "Initial step size must be less than or equal to amax." # Define phi and derivative of phi - @cache + @lru_cache(maxsize=None) def phi(alpha): global ls_nfev if (alpha == 0): @@ -100,7 +100,7 @@ def phi(alpha): ls_nfev += 1 return phi.fun_val - @cache + @lru_cache(maxsize=None) def dphi(alpha): global ls_njev if (alpha == 0): @@ -281,7 +281,7 @@ def line_search_backtracking(step_size, xk, pk, fun, jac, fk=None, jk=None, **kw c1 = kwargs.get('c1', 1e-4) # Define phi and derivative of phi - @cache + @lru_cache(maxsize=None) def phi(alpha): global ls_nfev if (alpha == 0):