In [1]:
import os
import pickle
import re
import pprint
import numpy as np
import sympy as sp
import openturns as ot
import matplotlib.pyplot as plt
import trimesh as tr
from importlib import reload
from functools import partial

from math import pi
from joblib import Parallel, delayed
from importlib import reload
from IPython.display import display, clear_output
from time import time
from sympy.printing import latex
from trimesh import viewer as trview
import sklearn

from scipy.optimize import OptimizeResult, minimize, basinhopping, differential_evolution, brute, shgo, check_grad, approx_fprime

import tqdm
import otaf

from gldpy import GLD

ot.Log.Show(ot.Log.NONE)
np.set_printoptions(suppress=True)
ar = np.array

# Notebook for the analysis of a system comprised of N + 2 parts, 2 plates with N = N1 x N2 holes, and N pins. 

### Defintion on global descriptive parameters

In [2]:
NX = 2 ## Number of holes on x axis
NY = 2 ## Number of holes on y axis
Dext = 20 ## Diameter of holes in mm
Dint = 19.8 ## Diameter of pins in mm
EH = 50 ## Distance between the hole axises
LB = 25 # Distance between border holes axis and edge.
hPlate = 30 #Height of the plates in mm
hPin = 60 #Height of the pins in mm

CIRCLE_RESOLUTION = 16 # NUmber of points to model the contour of the outer holes

### Defining and constructing the system data dictionary

The plates have NX * NY + 1 surfaces. The lower left point has coordinate 0,0,0

We only model the surfaces that are touching. 

In [3]:
N_PARTS = NX * NY * 2
LX = (NX - 1) * EH + 2*LB
LY = (NY - 1) * EH + 2*LB

contour_points = ar([[0,0,0],[LX,0,0],[LX,LY,0],[0,LY,0]])

R0 = ar([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
x_, y_, z_ = R0[0], R0[1], R0[2]

Frame1 = ar([z_,y_,-x_])
Frame2 = ar([-z_,y_,x_])

First we define the base part dictionaries for the upper and lower plate, without holes

In [4]:
system_data = {
    "PARTS" : {
        '0' : {
            "a" : {
                "FRAME": Frame1,
                "POINTS": {'A0' : ar([0,0,0]),
                           'A1' : ar([LX,0,0]),
                           'A2' : ar([LX,LY,0]),
                           'A3' : ar([0,LY,0]),
                        },
                "TYPE": "plane",
                "INTERACTIONS": ['P1a'],
                "CONSTRAINTS_D": ["PERFECT"],
                "CONSTRAINTS_G": ["SLIDING"],            
            }
        },
        '1' : {
            "a" : {
                "FRAME": Frame2,
                "POINTS": {'A0' : ar([0,0,0]),
                           'A1' : ar([LX,0,0]),
                           'A2' : ar([LX,LY,0]),
                           'A3' : ar([0,LY,0]),
                        },
                "TYPE": "plane",
                "INTERACTIONS": ['P0a'],
                "CONSTRAINTS_D": ["PERFECT"],
                "CONSTRAINTS_G": ["SLIDING"],            
            }
        }  
    },
    "LOOPS": {
        "COMPATIBILITY": {
        },
    },
    "GLOBAL_CONSTRAINTS": "3D",
}

Then we iterate over the pin dimensions NX and NY, and create the corresponding holes and pins. At the same time there is 1 loop per pin

In [5]:
alpha_gen = otaf.common.alphabet_generator()
next(alpha_gen) # skipping 'a' as it has already been used above
part_id = 2 # Start part index for pins
for i in range(NX):
    for j in range(NY):
        pcor = ar([LB+i*EH, LB+j*EH, 0]) # Point coordinate for hole / pins
        slab = next(alpha_gen) # Surface label, same for each mating pin so its easeir to track
        # Creating pin
        system_data["PARTS"][str(part_id)] = {}
        system_data["PARTS"][str(part_id)][slab] = {
            "FRAME": Frame1, # Frame doesn't really matter, as long as x is aligned on the axis
            "ORIGIN": pcor, 
            "TYPE": "cylinder",
            "RADIUS": Dint / 2,
            "EXTENT_LOCAL": {"x_max": hPin/2, "x_min": -hPin/2},
            "INTERACTIONS": [f"P0{slab}", f"P1{slab}"], 
            "SURFACE_DIRECTION": "centrifugal",
            "CONSTRAINTS_D": ["PERFECT"], # No defects on the pins
            "BLOCK_ROTATIONS_G": 'x', # The pins do not rotate around their axis
            "BLOCK_TRANSLATIONS_G": 'x', # The pins do not slide along their axis
        }
        # Adding hole to part 0
        system_data["PARTS"]["0"][slab] = {
            "FRAME": Frame1,
            "ORIGIN": pcor, 
            "TYPE": "cylinder",
            "RADIUS": Dext / 2,
            "EXTENT_LOCAL": {"x_max": hPin/2, "x_min": -hPin/2},
            "INTERACTIONS": [f"P{part_id}{slab}"], 
            "SURFACE_DIRECTION": "centripetal",
        }
        # Adding hole to part 1
        system_data["PARTS"]["1"][slab] = {
            "FRAME": Frame2,
            "ORIGIN": pcor, 
            "TYPE": "cylinder",
            "RADIUS": Dext / 2,
            "EXTENT_LOCAL": {"x_max": hPin/2, "x_min": -hPin/2},
            "INTERACTIONS": [f"P{part_id}{slab}"],
            "SURFACE_DIRECTION": "centripetal",
        }
        # Construct Compatibility loop
        loop_id = f"L{part_id-1}"
        formater = lambda i,l : f"P{i}{l}{l.upper()}0" 
        system_data["LOOPS"]["COMPATIBILITY"][loop_id] = f"P0aA0 -> {formater(0,slab)} -> {formater(part_id,slab)} -> {formater(1,slab)} -> P1aA0"
        part_id += 1  

In [6]:
SDA = otaf.AssemblyDataProcessor(system_data)
SDA.generate_expanded_loops()

In [7]:
CLH = otaf.CompatibilityLoopHandling(SDA)
compatibility_expressions = CLH.get_compatibility_expression_from_FO_matrices()

In [8]:
ILH = otaf.InterfaceLoopHandling(SDA, CLH, circle_resolution=CIRCLE_RESOLUTION)
interface_constraints = ILH.get_interface_loop_expressions()

Processing part 0, surface b for cylinder-to-cylinder interactions.
usedGMatDat [['0', 'b', 'B0', '2', 'b', 'B0']]
Found 1 used gap matrices.
unusedGMatDat [['0', 'b', 'B1', '2', 'b', 'B1'], ['0', 'b', 'B2', '2', 'b', 'B2']]
Found 2 unused gap matrices.
Matching used and unused gap matrices: GP0bB0P2bB0 with GP0bB1P2bB1
Matching used and unused gap matrices: GP0bB0P2bB0 with GP0bB2P2bB2
Generated 32 interaction equations for current matching.
Total interaction equations generated: 32
Processing part 0, surface c for cylinder-to-cylinder interactions.
usedGMatDat [['0', 'c', 'C0', '3', 'c', 'C0']]
Found 1 used gap matrices.
unusedGMatDat [['0', 'c', 'C2', '3', 'c', 'C2'], ['0', 'c', 'C1', '3', 'c', 'C1']]
Found 2 unused gap matrices.
Matching used and unused gap matrices: GP0cC0P3cC0 with GP0cC2P3cC2
Matching used and unused gap matrices: GP0cC0P3cC0 with GP0cC1P3cC1
Generated 32 interaction equations for current matching.
Total interaction equations generated: 32
Processing part 0, sur

In [9]:
SOCAM = otaf.SystemOfConstraintsAssemblyModel(
    compatibility_expressions, interface_constraints
)

SOCAM.embedOptimizationVariable()

print(len(SOCAM.deviation_symbols), SOCAM.deviation_symbols)

32 [v_d_0, w_d_0, beta_d_0, gamma_d_0, v_d_2, w_d_2, beta_d_2, gamma_d_2, v_d_5, w_d_5, beta_d_5, gamma_d_5, v_d_7, w_d_7, beta_d_7, gamma_d_7, v_d_8, w_d_8, beta_d_8, gamma_d_8, v_d_10, w_d_10, beta_d_10, gamma_d_10, v_d_11, w_d_11, beta_d_11, gamma_d_11, v_d_13, w_d_13, beta_d_13, gamma_d_13]


## Construction of the stochastic model of the defects. (old lambda approach)

In [10]:
tol = 0.1 * np.sqrt(2)
Cm = 1  # Process capability

# Defining the uncertainties on the position and orientation uncertainties.
sigma_e_pos = tol / (6 * Cm)
theta_max = tol / hPlate
sigma_e_theta = (2 * theta_max) / (6 * Cm)

In [11]:
RandDeviationVect = otaf.distribution.get_composed_normal_defect_distribution(
    defect_names=SOCAM.deviation_symbols,
    sigma_dict = {"alpha":sigma_e_theta, 
                  "beta":sigma_e_theta,
                  "gamma":sigma_e_theta, 
                  "u":sigma_e_pos, 
                  "v":sigma_e_pos, 
                  "w":sigma_e_pos})
dim_devs = int(RandDeviationVect.getDimension())

def get_uniform_from_deviation_vect(composed_distribution, coef=2):
    """To construct a composed distribution of uniform distributions from
    normal distributions to feed the ai cause it struggles. 
    """
    uni_dist_list = []
    parameters = composed_distribution.getParametersCollection()
    for i in range(len(parameters)-1):
        #assert parameter[0]==0.0, "We said 0 mean!"
        std = parameters[i][1] #We assume all parameters are normal distributions!!
        a = -coef*std
        b = coef*std
        uni_dist_list.append(ot.Uniform(a,b))
    return ot.ComposedDistribution(uni_dist_list)

UniRandDevVect = get_uniform_from_deviation_vect(RandDeviationVect, 2.5)

## Construction of a neural network based surrogate 
(could be omitted but makes things faster)

In [12]:
# Define the seed, sample size, and coefficients
SEED = 420  # Example seed value
sample_size = 1000000
additional_sample_size = 100000  # Number of additional permutation-based samples
scaling_coefficients = [np.sqrt(2), 2, 3, 3.5]  # Dynamic coefficients
model_name = f"model{NX}x{NY}Pins"

# Create a unique file name based on seed, sample size, and coefficients
coeff_str = "_".join([f"{coeff:.2f}" for coeff in scaling_coefficients])
data_filename = f'STORAGE/data_{sample_size}_add_{additional_sample_size}_seed_{SEED}_{model_name}_coeff_{coeff_str}.npz'

# Check if the data file already exists
if os.path.exists(data_filename):
    # Load the sample and results from the file
    with np.load(data_filename) as data:
        Xtrain = data['Xtrain']
        ytrain = data['ytrain']
    print(f"Loaded data from {data_filename}.")
else:
    # Generate the base sample if it doesn't exist
    np.random.seed(SEED)  # Ensure reproducibility
    dist = UniRandDevVect
    TRAIN_SAMPLE = otaf.sampling.generate_and_transform_sequence(dist.getDimension(), sample_size, dist, sequence_type='halton')
    TRAIN_SAMPLE = np.array(TRAIN_SAMPLE, dtype="float32") #dist.getSample(sample_size), dtype="float32")

    # Compute the results for the base sample
    TRAIN_RESULTS = otaf.uncertainty.compute_gap_optimizations_on_sample_batch(
        SOCAM,
        TRAIN_SAMPLE,
        bounds=None,
        n_cpu=-2,
        progress_bar=True,
        dtype="float32"
    )

    # Generate permutations and compute their results using the scaling coefficients
    permutation_samples = []
    additional_results = []

    indices, subgroup_sizes = SOCAM.get_feature_indices_and_dimensions()
    _, std_deviations = otaf.distribution.get_means_standards_composed_distribution(RandDeviationVect)
    std_deviations = np.array(std_deviations, dtype="float32")

    for coeff in scaling_coefficients:
        # Generate permutation samples for each coefficient
        samples = otaf.sampling.generate_scaled_permutations(subgroup_sizes, std_deviations, additional_sample_size) * coeff
        permutation_samples.append(samples)

        # Compute the results for each permutation sample
        results = otaf.uncertainty.compute_gap_optimizations_on_sample_batch(
            SOCAM, samples, bounds=None,
            n_cpu=-2, progress_bar=True, dtype="float32"
        )
        additional_results.append(results)

    # Concatenate the generated permutation samples and results to the original ones
    TRAIN_SAMPLE = np.vstack([TRAIN_SAMPLE] + permutation_samples)
    TRAIN_RESULTS = np.vstack([TRAIN_RESULTS] + additional_results)

    # Convert to final training arrays
    Xtrain = np.array(TRAIN_SAMPLE, dtype="float32")
    ytrain = np.array(TRAIN_RESULTS, dtype="float32")

    # Save the generated sample and results to a single compressed file
    np.savez_compressed(data_filename, Xtrain=Xtrain, ytrain=ytrain)
    print(f"Generated and saved data to {data_filename}.")

# Calculate failure/success ratios
train_failure_ratio = np.where(ytrain[..., -1] < 0, 1, 0).sum() / len(ytrain)
train_success_ratio = np.where(ytrain[..., -1] >= 0, 1, 0).sum() / len(ytrain)

print(f"Ratio of failed simulations in sample: {train_failure_ratio}")

out_dim = ytrain.shape[-1]


Loaded data from STORAGE/data_1000000_add_100000_seed_420_model2x2Pins_coeff_1.41_2.00_3.00_3.50.npz.
Ratio of failed simulations in sample: 0.4486017443334516


In [13]:
# Define the seed, sample size, and file paths
SEED_valid = 1366631  # Example seed value
sample_size_valid = 250000
model_name = f"model{NX}x{NY}Pins"
sample_filename = f'STORAGE/validation_sample_{sample_size_valid}_seed_{SEED_valid}_{model_name}_ai.npy'
results_filename = f'STORAGE/validation_results_{sample_size_valid}_seed_{SEED_valid}_{model_name}_ai.npy'

# Ensure reproducibility by setting the seed
np.random.seed(SEED_valid)

if os.path.exists(sample_filename) and os.path.exists(results_filename):
    with open(sample_filename, 'rb') as file:
        VALID_SAMPLE = np.load(file)
    with open(results_filename, 'rb') as file:
        VALID_RESULTS = np.load(file)
    print("Loaded existing sample and results from file.")
else:
    # Generate the sample
    dist = UniRandDevVect # otaf.uncertainty.multiply_composed_distribution_with_constant(RandDeviationVect, 1.05) # We now work with low failure probabilities
    #VALID_SAMPLE = np.array(otaf.uncertainty.generateLHSExperiment(dist, sample_size_valid))
    VALID_SAMPLE = np.array(dist.getSample(sample_size_valid),dtype="float32")
    # Compute the results
    VALID_RESULTS = otaf.uncertainty.compute_gap_optimizations_on_sample_batch(
        SOCAM,
        VALID_SAMPLE,
        n_cpu=-2,
        progress_bar=True,
        dtype="float32"
    )
    
    # Save the sample and results
    with open(sample_filename, 'wb') as file:
        np.save(file, VALID_SAMPLE)
    with open(results_filename, 'wb') as file:
        np.save(file, VALID_RESULTS)
    print("Generated and saved new sample and results with seed for validation.")

# Assign X and y from VALID_SAMPLE and VALID_RESULTS
Xvalid = VALID_SAMPLE
yvalid = VALID_RESULTS
print(f"Ratio of failed simulations in validation sample : {np.where(yvalid[...,-1]<0,1,0).sum()/sample_size_valid}")
out_dim = yvalid.shape[-1]

Loaded existing sample and results from file.
Ratio of failed simulations in validation sample : 0.492868


In [14]:
# Let's first generate a classic LHS design of expezriment of size 16.
D_lambd = len(SOCAM.deviation_symbols)
lambda_vect_unconditioned = ot.ComposedDistribution([ot.Uniform(0, 1)] * D_lambd)
lambda_vect_unconditioned.setDescription(list(map(str, SOCAM.deviation_symbols)))
N_lambda = 800
lambda_sample_unconditioned = otaf.sampling.generateLHSExperiment(lambda_vect_unconditioned ,N_lambda, 999)
#lambda_sample_unconditioned = lambda_sample_unconditioned_generator.generate()
lambda_sample_random = lambda_vect_unconditioned.getSample(N_lambda)
lambda_sample_conditioned = otaf.sampling.condition_lambda_sample(lambda_sample_random, squared_sum=True)

In [15]:
pass_next = True

# *NOT IMPORTANT* The following was a brute force part to validate the results of the surrogate but takes 27h to run. 

Usually, in reliability, regression models are prefered to classifiers ref{https://www.sciencedirect.com/science/article/pii/S0167473020300989}, but in this case
the performance of the regression wasn't precise enough, and a classifier with some tweaking provided excellent results for reducing the number of calls to the optimization function. 

In [16]:
if not pass_next:    
    bounds = None
    SEED_MC_PF = 6436431
    SIZE_MC_PF = int(1e6) #1e4
    optimizations_array = np.empty((N_lambda, SIZE_MC_PF), dtype=OptimizeResult)
    failure_probabilities, successes, s_values, statuses = [], [], [], []
    failed_optimization_points = []
    
    start_time = time()  # Record the start time
    for i in range(N_lambda):
        print(f"Doing iteration {i} of {N_lambda}")
        if i>0:
            print(f"Failure probability, Min: {min(failure_probabilities)}, / Max: {max(failure_probabilities)}")
            print(f"Failed {(1-successes).sum()} optimizations on { SIZE_MC_PF}")
            print("s_mean: ", s_values.mean().round(3), "s_min: ", np.nanmin(s_values).round(3), "s_max: ", np.nanmax(s_values).round(3))
            print("Statuses are:", np.unique(statuses))
        ot.RandomGenerator.SetSeed(SEED_MC_PF)
        deviation_samples = np.array(RandDeviationVect.getSample(SIZE_MC_PF)) * np.array(
            lambda_sample_conditioned[i]
        )
        optimizations = otaf.uncertainty.compute_gap_optimizations_on_sample(
                SOCAM,
                deviation_samples,
                bounds=bounds,
                n_cpu=-1,
                progress_bar=True,
            )
        successes = np.array([opt.success for opt in optimizations], dtype=bool)
        statuses = np.array([opt.status for opt in optimizations], dtype=int)
        
        if successes.sum() == 0:
            print("All optimizations failed")
            sleep(0.5)
    
        failed_optimization_points.append(deviation_samples[np.invert(successes), :])
        
        s_values = np.array([opt.fun for opt in optimizations], dtype=float)
        s_values = np.nan_to_num(s_values, nan=np.nanmax(s_values))*-1 # Cause the obj function C is -1*s and failed optimizations count as a negative s
        failure_probabilities.append(np.where(s_values < 0, 1, 0).mean())
        clear_output(wait=True)
    print(f"Done {len (lambda_sample_conditioned)} experiments.")
    print(f"Elapsed time: {time() - start_time:.3f} seconds.")
    failed_optimization_points = np.vstack(failed_optimization_points)
    
    X = otaf.uncertainty.find_best_worst_quantile(np.array(lambda_sample_conditioned), np.array(failure_probabilities), 0.1)
    (best_5p_lambda, best_5p_res), (worst_5p_lambda, worst_5p_res) = X

In [17]:
if not pass_next:    
    best_worst_quants = otaf.uncertainty.find_best_worst_quantile(np.array(lambda_sample_conditioned)**2, np.array(failure_probabilities), 0.2)
    (best_5p_lambda, best_5p_res), (worst_5p_lambda, worst_5p_res) = best_worst_quants

In [18]:
if not pass_next:    
    print("Lower probability of failure:", "{:.5e}".format(min(failure_probabilities)))
    print("Upper probability of failure:", "{:.5e}".format(max(failure_probabilities)))
    plt.hist(failure_probabilities)
    plt.show()
    otaf.plotting.plot_best_worst_results(best_5p_res, worst_5p_res, figsize=(10,5))
    
    variable_labels = [var for var in lambda_sample_conditioned.getDescription()]
    otaf.plotting.plot_best_worst_input_data(best_5p_lambda, worst_5p_lambda, variable_labels, figsize=(20,5), labels=False)
    #Upper probability of failure: 5.64000e-05

In [19]:
# Define the filename
filename = 'STORAGE/Modele3D4TrousStored_objects.pkl'

# Check if the file already exists
if os.path.exists(filename):
    # Load the data from the file
    with open(filename, 'rb') as file:
        loaded_data = pickle.load(file)
        if 'lambda_sample_conditioned' not in globals():
            lambda_sample_conditioned = loaded_data['lambda_sample_conditioned']
        if 'failure_probabilities' not in globals():
            failure_probabilities = loaded_data['failure_probabilities']
        if 'successes' not in globals():
            successes = loaded_data['successes']
        if 's_values' not in globals():
            s_values = loaded_data['s_values']
        if 'statuses' not in globals():
            statuses = loaded_data['statuses']
        if 'failed_optimization_points' not in globals():
            failed_optimization_points = loaded_data['failed_optimization_points']
    print(f"Data has been loaded from {filename}")
else:
    try: 
        # Combine all objects into a dictionary for easier storage
        data_to_store = {
            'lambda_sample_conditioned': lambda_sample_conditioned,
            'failure_probabilities': failure_probabilities,
            'successes': successes,
            's_values': s_values,
            'statuses': statuses,
            'failed_optimization_points': failed_optimization_points
        }
    
        # Store the objects using pickle
        with open(filename, 'wb') as file:
            pickle.dump(data_to_store, file)
        print(f"Data has been stored in {filename}")
    except Exception as e :
        print(e)
        print("passing")

name 'failure_probabilities' is not defined
passing


# Training of the neural network model

In [20]:
load = True
save_path = f'STORAGE/AI_MODEL_3D_DIM_{dim_devs}_BINARY_SLACK_CLASSIFIER.pth'
binary_slack_classifier = otaf.surrogate.BinaryClassificationModel(
    dim_devs, 2, 
    Xtrain, ytrain[:,-1], 
    slack_threshold=0.0,
    clamping=True, 
    clamping_threshold=np.pi,
    metric_finish=1e6,
    max_epochs=500, 
    batch_size=100000,
    train_size=0.70,
    display_progress_disable=True,
    squeeze_labels = True,
    labels_to_long = True,
    use_dual_target = False,
    save_path = save_path)

lr=0.005

binary_slack_classifier.model = otaf.torch.nn.Sequential(
    *otaf.surrogate.get_custom_mlp_layers([dim_devs, dim_devs*5], activation_class = otaf.torch.nn.LeakyReLU,),
    *otaf.surrogate.get_custom_mlp_layers([dim_devs*5, dim_devs*3], activation_class = otaf.torch.nn.SELU,
                                         dropout_class = otaf.torch.nn.AlphaDropout, dropout_kwargs = {'p':0.069}),
    *otaf.surrogate.get_custom_mlp_layers([dim_devs*3, dim_devs, 2], activation_class = otaf.torch.nn.Sigmoid),)

binary_slack_classifier.optimizer = otaf.torch.optim.AdamW(binary_slack_classifier.parameters(), lr=lr, weight_decay=0.01)  # Lion(binary_slack_classifier.parameters(), lr=lr, weight_decay=0.0001) #Adam(neural_model.model.parameters(), lr=lr)#
weight =  otaf.torch.tensor([1.0/(train_success_ratio+1e-16), 1.0/(train_failure_ratio+1e-16)]).float()
binary_slack_classifier.criterion = otaf.torch.nn.CrossEntropyLoss(weight=weight, reduction='mean', label_smoothing=0.001) # otaf.torch.nn.BCEWithLogitsLoss(pos_weight=weight) # otaf.torch.nn.MSELoss()
otaf.surrogate.initialize_model_weights(binary_slack_classifier)
binary_slack_classifier.scheduler = otaf.torch.optim.lr_scheduler.ExponentialLR(binary_slack_classifier.optimizer, 1.0005) #LinearLR(neural_model.optimizer, 1, 0.1, 200)

if os.path.exists(save_path) and load:
    binary_slack_classifier.load_model()
else :
    binary_slack_classifier.train_model()
    binary_slack_classifier.plot_results(save_as_png=True, save_path='STORAGE/images')
    binary_slack_classifier.save_model()


[32, 392, 400, 312, 128]


In [21]:
# This was for testing, we pass it now 
if False : 
    pred_class = binary_slack_classifier.evaluate_model(Xvalid).cpu().detach().numpy()
    ground_truth = yvalid[:,-1]#np.array([res.x[-1] for res in ground_truth_full],dtype="float32")
    ground_truth_binary = np.where(ground_truth<0,1,0) #ground truth for the failures.
    
    # Optimize thresholds
    optimization_results_fn_tn = otaf.surrogate.optimize_thresholds_with_alpha(pred_class, ground_truth_binary, bounds=[-5.0, 5.0], optimize_for="minimize_fn_maximize_tn", optimal_ratio=1e-3)
    optimization_results_tp_fp = otaf.surrogate.optimize_thresholds_with_alpha(pred_class, ground_truth_binary, bounds=[-5.0, 5.0], optimize_for="minimize_fp_maximize_tp", equality_decision="success", optimal_ratio=4*1e-2)
    
    print(f"\nBest Failure Threshold (minimize_fn_maximize_tn): {optimization_results_fn_tn['best_failure_threshold']}")
    print(f"Best Success Threshold (minimize_fn_maximize_tn): {optimization_results_fn_tn['best_success_threshold']}")
    print("Evaluation Metrics (minimize_fn_maximize_tn):", optimization_results_fn_tn['evaluation'])
    print('\n')
    print(f"Best Failure Threshold (maximize_tp_minimize_fp): {optimization_results_tp_fp['best_failure_threshold']}")
    print(f"Best Success Threshold (maximize_tp_minimize_fp): {optimization_results_tp_fp['best_success_threshold']}")
    print("Evaluation Metrics (maximize_tp_minimize_fp):", optimization_results_tp_fp['evaluation'])

In [22]:
optimizer_milp_ai  = otaf.uncertainty.SocAssemblyAnalysisOptimized(binary_slack_classifier, SOCAM, Xvalid, yvalid)
optimizer_milp_ai.optimize_thresholds(bounds=[-5.0, 5.0]) 
optimizer_milp_ai

Using basin hopping with cobyla to optimize thresholds for minimizing classification errors.
Using basin hopping with cobyla to optimize thresholds for minimizing classification errors.


SocAssemblyAnalysisOptimized(
  Binary Classifier: BinaryClassificationModel
  Constraint Matrix Generator: SystemOfConstraintsAssemblyModel
  X Optimization Shape: (250000, 32)
  Y Optimization Shape: (250000, 36)
  Optimize Results (FN/TN):
    Best Failure Threshold: -2.1199193798865923
    Best Success Threshold: 1.3054405048795277
    Confusion Matrix: 
      TN: 11090
      FP: 115693
      FN: 2
      TP: 123215
  Optimize Results (FP/TP):
    Best Failure Threshold: 1.7720199692532086
    Best Success Threshold: -2.129186472274088
    Confusion Matrix: 
      TN: 126742
      FP: 41
      FN: 100898
      TP: 22319
)

# Trying to enrich the training points to augment precision, but does not yet work.

In [23]:
pass_data_aug = True

In [24]:
if not pass_data_aug:
    bounds = None
    SEED_MC_PF = 6436431
    SIZE_MC_PF = int(2*1e5) #1e4
    failure_probabilities, new_training_samples = [], []
    
    ot.RandomGenerator.SetSeed(SEED_MC_PF)
    sample = np.array(RandDeviationVect.getSample(SIZE_MC_PF))
    print('Generated Sample.')
    
    start_time = time()  # Record the start time
    for i in range(N_lambda):        
    
        sample_lambd = sample * np.array(lambda_sample_conditioned[i])
        
        failures, smp_res = optimizer_milp_ai.soc_optimization_sample(sample_lambd, n_cpu=-2, batch_size=500, progress_bar=False, batch_size_ai=int(1e6))
        failure_prob = failures.mean()
        failure_probabilities.append(failure_prob)
        new_training_samples.append(smp_res)
        print(f"--> Iteration {i + 1:03d}. Failures: {int(failures.sum()):04d}, Probability : {failure_prob:.3E}. Min: {min(failure_probabilities):.3E}, / Max: {max(failure_probabilities):.3E}")
        
    print(f"Done {len (lambda_sample_conditioned)} experiments.")
    print(f"Elapsed time: {time() - start_time:.3f} seconds.")
    
    X = otaf.uncertainty.find_best_worst_quantile(np.array(lambda_sample_conditioned), np.array(failure_probabilities), 0.1)
    (best_5p_lambda, best_5p_res), (worst_5p_lambda, worst_5p_res) = X

In [25]:
if not pass_data_aug:
    # Initialize lists to hold the individual matrices
    a_list = []
    b_list = []
    
    # Iterate through the list of tuples and separate them into two lists
    for a, b in new_training_samples:
        a_list.append(a)
        b_list.append(b)
    
    # Convert the lists to numpy arrays
    a_array = np.concatenate(a_list)
    b_array = np.concatenate(b_list)
    
    # Verify the shapes
    print(f"Shape of a_array: {a_array.shape}")  # Expected shape (2, 3, 4)
    print(f"Shape of b_array: {b_array.shape}")  # Expected shape (2, 3)

In [26]:
if not pass_data_aug:
    binary_slack_classifier.add_new_data_points(a_array, b_array)
    binary_slack_classifier.train_model()
    binary_slack_classifier.plot_results()
    best_worst_quants = otaf.uncertainty.find_best_worst_quantile(np.array(lambda_sample_conditioned)**2, np.array(failure_probabilities), 0.2)
    (best_5p_lambda, best_5p_res), (worst_5p_lambda, worst_5p_res) = best_worst_quants

In [27]:
if not pass_data_aug:
    print("Lower probability of failure:", "{:.5e}".format(min(failure_probabilities)))
    print("Upper probability of failure:", "{:.5e}".format(max(failure_probabilities)))
    plt.hist(failure_probabilities)
    plt.show()
    otaf.plotting.plot_best_worst_results(best_5p_res, worst_5p_res, figsize=(10,5))
    
    variable_labels = [var for var in lambda_sample_conditioned.getDescription()]
    otaf.plotting.plot_best_worst_input_data(best_5p_lambda[:3], worst_5p_lambda[:3], variable_labels, figsize=(20,5), labels=False)
    #Upper probability of failure: 5.64000e-05

# Using the score function aproach on the neural surrogate.

In [28]:
# Threshold and scaling factors
scale_factor = 1.0  # Adjust this scaling factor for your specific range
SEED = 38421668465243

N_SAMPLE_MINI = int(2*1e5)
N_SAMPLE_GLD = 2*int(1e4)
standards = np.array([RandDeviationVect.getParameter()[i] for i , param in enumerate(RandDeviationVect.getParameterDescription()) if "sigma" in param]) 
means = np.array([RandDeviationVect.getParameter()[i] for i , param in enumerate(RandDeviationVect.getParameterDescription()) if "mu" in param])
ot.RandomGenerator.SetSeed(SEED)
sample = otaf.sampling.generate_and_transform_sequence(RandDeviationVect.getDimension(), N_SAMPLE_MINI, RandDeviationVect, sequence_type='halton')*1.5
sample_gld = otaf.sampling.generate_and_transform_sequence(RandDeviationVect.getDimension(), N_SAMPLE_GLD, RandDeviationVect, sequence_type='halton')
#sample = np.array(RandDeviationVect.getSample(N_SAMPLE_MINI))*1.5
threshold = 0 

In [29]:
def model(x):
    res = optimizer_milp_ai.soc_optimization_sample(x, n_cpu=-2, batch_size=1000, progress_bar=False, batch_size_ai=int(1e6))
    return res[0]

def model_base(x, sample=sample_gld):
    # Model without surrogate, to get slack
    x = sample * np.sqrt(x[np.newaxis, :])
    lp_optimization_results = otaf.uncertainty.compute_gap_optimizations_on_sample_batch(
        constraint_matrix_generator=SOCAM,
        deviation_array=x,
        batch_size=5000,
        n_cpu=4,
        progress_bar=True,
        verbose=0,
        dtype="float32",
    )

    slack_values = np.array([x[-1] for x in lp_optimization_results])
    return slack_values

@otaf.optimization.scaling(scale_factor)
def optimization_function_mini(x, getJac=True, model=model): 
    if getJac:
        res = otaf.uncertainty.monte_carlo_non_compliancy_rate_w_gradient(
            threshold, sample, means, standards, model, model_is_bool=True)(x)
        return res[0], res[1]
    else:
        x = sample * np.sqrt(x[np.newaxis, :])

        return model(x).mean()  # Scale mean value

@otaf.optimization.scaling(scale_factor)
def optimization_function_maxi(x, getJac=True, model=model): 
    if getJac:
        res = otaf.uncertainty.monte_carlo_non_compliancy_rate_w_gradient(
            threshold, sample, means, standards, model, model_is_bool=True)(x)
        return -1 * res[0], -1 * res[1]
    else:
        x = sample * np.sqrt(x[np.newaxis, :])
        return -1 * model(x).mean()  # Scale mean value


# Define the callback function
def print_callback(xk):
    print(f"Current parameter values: {xk}")

In [30]:
slack = model_base(np.array([0.0, 0.0, 0.0, 1.0]*8))

  0%|          | 0/20000 [00:00<?, ?it/s]

In [38]:
#_ = plt.hist(slack, bins=100)
gld = GLD('VSL')
param_LMM = gld.fit_LMM(slack, disp_fit=False, disp_optimizer=False)
# Assuming 'param' contains the fitted parameters and gld is an instance of the GLD class
probability = gld.CDF_num(0, param_LMM)
print(f"Probability of being less than 0: {probability}")

Probability of being less than 0: [0.0098031]


In [32]:
qergqrg

NameError: name 'qergqrg' is not defined

In [None]:
np.where(slack<0,1,0).sum()/20000

In [None]:
from scipy.optimize import fsolve
def solve_for_lambda_beta(lambda_u, lambda_v, lambda_alpha):
    """
    Solve the equation for lambda_beta given lambda_u, lambda_v, and lambda_alpha.
    """
    def equation(lambda_beta):
        return (lambda_u**2 + lambda_v**2 + lambda_alpha**2 + lambda_beta**2 +
                2 * (lambda_u * lambda_alpha + lambda_v * lambda_beta) - 1)
    
    # Use a root-finding method to solve for lambda_beta
    lambda_beta_initial_guess = 0.5
    lambda_beta_solution = fsolve(equation, lambda_beta_initial_guess)[0]
    
    return lambda_beta_solution

def generate_points_on_surface(num_points=1000):
    """
    Generate points on the surface by solving for lambda_beta
    given random values for lambda_u, lambda_v, and lambda_alpha.
    
    Returns:
    - A numpy array of shape (num_valid_points, 4) where each row is 
      (lambda_u, lambda_v, lambda_alpha, lambda_beta).
    """
    # Generate random values for lambda_u, lambda_v, lambda_alpha in [0, 1]
    lambda_u = np.random.rand(num_points)
    lambda_v = np.random.rand(num_points)
    lambda_alpha = np.random.rand(num_points)
    
    # Assume lambda_beta = 0 and check if the inequality is satisfied
    inequality_values = lambda_u**2 + lambda_v**2 + lambda_alpha**2 + 2 * (lambda_u * lambda_alpha)

    # Filter valid points where inequality holds
    valid_mask = inequality_values <= 1
    lambda_u_valid = lambda_u[valid_mask]
    lambda_v_valid = lambda_v[valid_mask]
    lambda_alpha_valid = lambda_alpha[valid_mask]
    
    # Solve for lambda_beta for valid points
    lambda_beta_valid = np.array([solve_for_lambda_beta(u, v, a) 
                                  for u, v, a in zip(lambda_u_valid, lambda_v_valid, lambda_alpha_valid)])

    # Only keep points where the solved lambda_beta is between 0 and 1
    final_mask = (lambda_beta_valid >= 0) & (lambda_beta_valid <= 1)
    final_points = np.vstack((lambda_u_valid[final_mask],
                              lambda_v_valid[final_mask],
                              lambda_alpha_valid[final_mask],
                              lambda_beta_valid[final_mask])).T
    
    return final_points

# Generate the points on the surface
points = generate_points_on_surface(num_points=100000)

# Call the pair_plot function (assuming it's already defined)
labels = ['lambda_u', 'lambda_v', 'lambda_alpha', 'lambda_beta']
otaf.plotting.pair_plot(points, labels)

In [None]:
qdrgw

In [None]:
cons, linearConstraint = otaf.optimization.lambda_constraint_dict_from_composed_distribution(RandDeviationVect, tol=0, keep_feasible=False)
bounds_lambda = otaf.optimization.bounds_from_composed_distribution(RandDeviationVect, 0.01)

### Some checks to compare difference and score based gradient

x0_maxi = np.array([0.25]*RandDeviationVect.getDimension()) # [0.1,0.1,0.4,0.4]*8  # Initial guess
for i in range(len(x0_maxi)):
    x_perturb = x0_maxi.copy()
    x_perturb[i] += 0.05
    f_val = optimization_function_maxi(x_perturb, getJac=False)
    print(f"Perturbing parameter {i}: Objective value: {f_val}")

_, jac_score = optimization_function_maxi(x0_maxi, getJac=True)

jac_approx = approx_fprime(x0_maxi, optimization_function_maxi, 0.1, False)
otaf.plotting.compare_jacobians(jac_score, jac_approx, class_labels=otaf.sampling.validate_and_extract_indices(SOCAM.deviation_symbols))

In [None]:
x0_maxi = [0.25]*RandDeviationVect.getDimension() # [0.1,0.1,0.4,0.4]*8  # Initial guess

res = minimize(optimization_function_maxi, x0_maxi, 
               method="COBYQA", jac=False, args=(False,), 
               bounds=bounds_lambda, 
               constraints=linearConstraint, 
               options={"f_target":-0.2, 
                        "maxiter":50,
                        "maxfev":150,
                        "feasibility_tol":1e-4,
                        "initial_tr_radius":1,
                        "final_tr_radius":1e-3,
                       "disp":True}, 
               callback = print_callback)

In [None]:
res.x

# Global optimization basinhopping

In [None]:
constraint_checker = otaf.optimization.create_constraint_checker(linearConstraint,0)
opt_storage = otaf.optimization.OptimizationStorage(bounds_lambda, constraint_checker)
step_taking = otaf.optimization.StepTaking(opt_storage, SOCAM.deviation_symbols)
accept_test = otaf.optimization.AcceptTest(opt_storage, SOCAM.deviation_symbols)
callback = otaf.optimization.Callback(opt_storage)

In [None]:
# Basinhopping for the maximization function using COBYQA
x0_maxi = [0.25] * RandDeviationVect.getDimension()  # Initial guess

# Update minimizer_kwargs_maxi to use COBYQA
minimizer_kwargs_maxi = {
    "method": "COBYQA",   # Use COBYQA method
    "jac": False,         # COBYQA doesn't use Jacobians
    "args": (False,),     # Update args to match COBYQA requirements
    "constraints": linearConstraint,
    "bounds": bounds_lambda,
    "options": {
        "f_target": -0.2, 
        "maxiter": 50,
        "maxfev": 150,
        "feasibility_tol": 1e-4,
        "initial_tr_radius": 1,
        "final_tr_radius": 1e-3,
        "disp": True
    }
}

# Running basinhopping with COBYQA as the local optimizer
res_maxi = basinhopping(
    optimization_function_maxi, x0_maxi,
    niter=80,
    T=1,
    stepsize=3.0,
    niter_success=19,
    interval=5,
    minimizer_kwargs=minimizer_kwargs_maxi,
    disp=True,
    take_step=step_taking,
    accept_test=accept_test,
    callback=callback
)

print("Maximization Result with COBYQA:")
print(res_maxi)


In [None]:
res_maxi.x

In [None]:
optimization_function_maxi(np.array([0,0,0.5,0.5]*8))

In [None]:
# Basinhopping for the maximization function
x0_maxi = [0.25]*RandDeviationVect.getDimension() # Initial guess

minimizer_kwargs_maxi = {
    "method": "SLSQP",
    "jac":True,
    "args": (True, model),
    "constraints": linearConstraint,
    "bounds": bounds_lambda,
    "options": {"disp": True, "maxiter": 500, "ftol": 1e-4}
}

# Trying custom basin  hopping
res_maxi = basinhopping(optimization_function_maxi, x0_maxi, 
                        niter=80, 
                        T=1, 
                        stepsize=3.0, #2.3, 
                        niter_success=19,
                        interval=5,
                        minimizer_kwargs=minimizer_kwargs_maxi, 
                        disp=True,
                        take_step=step_taking,
                        accept_test=accept_test,
                        callback=callback)

print("Maximization Result:")
print(res_maxi)

In [None]:
# Basinhopping for the minimization function
x0_mini = [0.25]*RandDeviationVect.getDimension()   # Initial guess

minimizer_kwargs_mini = {
    "method": "SLSQP",
    "args": (True),
    "constraints": cons,
    "bounds": bounds_lambda,
    "options": {"disp": True, "maxiter": 100, "ftol": 1e-6, "eps":0.1},
    "jac":True
}

res_mini = basinhopping(optimization_function_mini, x0_mini,
                        niter=80, 
                        T=0.1, 
                        stepsize=1.3, 
                        niter_success=19,
                        interval=5,
                        target_accept_rate=0.69,
                        stepwise_factor=0.69,                        
                        minimizer_kwargs=minimizer_kwargs_maxi, disp=True)

print("Minimization Result:")
print(res_mini)

In [None]:
res_maxi.lowest_optimization_result

In [None]:
algo.x

In [None]:
def custom_format(value):
    """
    Custom format function to format numbers in scientific notation,
    but remove the 'E+00' if the exponent is zero and remove leading zero in the exponent.
    """
    formatted = f"{value:.1E}"
    if "E+00" in formatted:
        return formatted.replace("E+00", "")
    else:
        # Remove leading zeros in the exponent
        formatted = re.sub(r"E([+-])0*(\d+)", r"E\1\2", formatted)
        return formatted

def plot_bar(values, labels, ylim=[0,1],
            title = 'Relative contribution of each DOF to the total variability of the feature',
            ylabel = 'Contribution'):
    """
    Plots a bar plot with given values and LaTeX formatted labels.
    
    Parameters:
    - values: List or array of floats, values to plot.
    - labels: List of LaTeX strings, corresponding labels for the values.
    """
    # Generate the LaTeX formatted labels
    variable_labels_tex = [f"${sp.printing.latex(sp.Symbol(var))}$" for var in labels]

    # Define a color mapping dictionary for variables with the same index
    color_mapping = {}
    color_idx = 0
    for var_label in labels:
        index = re.match(".*?([0-9]+)$", str(var_label)).group(
            1
        )  # Extract the index part (e.g., '0')
        if index not in color_mapping:
            color_idx += 1
            color_mapping[index] = otaf.plotting.hex_to_rgba(otaf.plotting.color_palette_3[color_idx], as_float=True)
    
    # Create the bar plot
    fig, ax = plt.subplots(figsize=(10, 6))

    for i, var_label in enumerate(labels):
        var_index = int(re.match(".*?([0-9]+)$", str(var_label)).group(1))

        ax.axvspan(i - 0.5, i + 0.5, facecolor=color_mapping[str(var_index)], alpha=0.3)
        
    bars = ax.bar(range(len(values)), values, tick_label=variable_labels_tex, align='center')
    print('Hey', bars)
    # Set the labels and title
    ax.set_xlabel('Degrees of freedom', fontsize=14)
    ax.set_ylabel(ylabel, fontsize=14)
    ax.set_title(title, fontsize=14)
    ax.set_ylim(ylim)

    di = ylim[1] - ylim[0]
    upper_count = 0
    lower_count = 0
    
    # Add text box for values out of y-axis scope
    for i, bar in enumerate(bars):
        height = bar.get_height()
        if height > ylim[1]:
            upper_count += 1
            ax.text(bar.get_x() + bar.get_width() / 2, ylim[1] - (0.025 * di) - (0.05 * di * upper_count), custom_format(height), ha='center', va='bottom', fontsize=10, bbox=dict(facecolor='white', alpha=0.6))
        else:
            upper_count = 0
        
        if height < ylim[0]:
            lower_count += 1
            ax.text(bar.get_x() + bar.get_width() / 2, ylim[0] + (0.025 * di) + (0.05 * di * lower_count), custom_format(height), ha='center', va='top', fontsize=10, bbox=dict(facecolor='white', alpha=0.6))
        else:
            lower_count = 0
    
    # Rotate the x labels for better readability
    plt.xticks(rotation=45, ha='right')
    
    # Show the plot
    plt.tight_layout()
    plt.show()


In [None]:
best_smp = [0.01,0.01,0.97,0.01]*8#best_5p_lambda[2]
res2 = optimization_function_maxi(best_smp)
print(res2)

In [None]:
%matplotlib qt
plot_bar(best_smp, RandDeviationVect.getDescription(), [0,1.01])

In [None]:
plot_bar(res2[1], RandDeviationVect.getDescription(), [-1, 1], 'Score function for each degree of freedom', "Score function value x 100")