In [1]:
# imports
from scisample.samplers import new_sampler
import plotly.graph_objects as go
from scipy.optimize import minimize
import numpy as np
import yaml
import pandas as pd
import tempfile
import shutil
import os

In [5]:
# functions
def rosenbrock_vector(x):
    """ Rosenbrock function 2D """
    return (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2

def rosenbrock_xy(x, y):
    """ Rosenbrock function 2D """
    return (1 - x)**2 + 100*(y - x**2)**2

# def rastrigin(x):
#     """ Rastrigin function """
#     n = len(x)
#     return 10 * n + sum(x**2 - 10 * np.cos(2 * np.pi * x))

# def ackley(x):
#     """ Ackley function """
#     n = len(x)
#     return -20 * np.exp(-0.2 * np.sqrt((1/n) * np.sum(x**2))) - \
#            np.exp((1/n) * np.sum(np.cos(2 * np.pi * x))) + 20 + np.exp(1)

def booth(x):
    """ Booth's function """
    return (x[0] + 2*x[1] - 7)**2 + (2*x[0] + x[1] - 5)**2

def beale(x):
    """ Beale's function """
    return (1.5 - x[0] + x[0]*x[1])**2 + (2.25 - x[0] + x[0]*x[1]**2)**2 + \
           (2.625 - x[0] + x[0]*x[1]**3)**2

def booth_xy(x, y):
    """ Booth's function """
    return (x + 2*y - 7)**2 + (2*x + y - 5)**2

# def beale_xy(x, y):
#     """ Beale's function """
#     return (1.5 - x + x*y)**2 + (2.25 - x + x*y**2)**2 + \
#            (2.625 - x + x*y**3)**2


def yaml_string_to_dict(yaml_string):
    yaml_data = yaml.safe_load(yaml_string)
    return yaml_data

In [6]:
# Initial guess for the minimum
x0 = np.array([0.0, 0.0])

# Use Nelder-Mead algorithm to find the minimum
result = minimize(rosenbrock_vector, x0, method='Nelder-Mead')

# Print the minimum value and corresponding x, y coordinates
print("Minimum value:", result.fun)
print("Minimum point:", result.x)

# Use Nelder-Mead algorithm to find the minimum
result = minimize(booth, x0, method='Nelder-Mead')

# Print the minimum value and corresponding x, y coordinates
print("Minimum value:", result.fun)
print("Minimum point:", result.x)

# Use Nelder-Mead algorithm to find the minimum
result = minimize(beale, x0, method='Nelder-Mead')

# Print the minimum value and corresponding x, y coordinates
print("Minimum value:", result.fun)
print("Minimum point:", result.x)

Minimum value: 3.6861769151759075e-10
Minimum point: [1.00000439 1.00001064]
Minimum value: 2.5039618826825238e-09
Minimum point: [1.00001846 2.99996579]
Minimum value: 5.525325548786374e-10
Minimum point: [2.99994196 0.49998485]


In [27]:
# Initial set of samples
cost_target = 1.0

downselect_ratio = 0.3
function = rosenbrock_xy
(x_min, x_max) = (-1, 2)
(y_min, y_max) = (-1, 2)

# function = booth_xy
# (x_min, x_max) = (-1, 4)
# (y_min, y_max) = (-1, 4)
yaml_string = f"""
type: best_candidate
num_samples: 30
# previous_samples: samples.csv # optional
# cost_variable: Z   # required if previous_samples is provided
parameters:
    X:
        min: {x_min}
        max: {x_max}
    Y:
        min: {y_min}
        max: {y_max}
"""
sample_dictionary = yaml_string_to_dict(yaml_string)

sampler = new_sampler(sample_dictionary)
samples = sampler.get_samples()

for sample in samples:
    sample['Z'] = function(sample['X'], sample['Y'])

df = pd.DataFrame(samples)
# Display the 5 smallest elements of 'Z'
smallest_values = df['Z'].nsmallest(5).to_list()

iterations = 20
while iterations > 0:
    # Create a temporary directory
    temp_dir = tempfile.mkdtemp()
    temp_file = os.path.join(temp_dir, "temp.csv")
    df.to_csv(temp_file)

    # next set of samples
    yaml_string = f"""
    type: best_candidate
    num_samples: 30
    previous_samples: {temp_file} # optional
    cost_variable: Z   # required if previous_samples is provided
    cost_target_oversample_ratio: 2.0 # default = BestCandidateSampler.DEFAULT_COST_TARGET_OVERSAMPLE_RATIO
    downselect_ratio: 0.3  # default = BestCandidateSampler.DEFAULT_DOWNSELECT_RATIO
    voxel_overlap: 1.0 # default = BestCandidateSampler.DEFAULT_VOXEL_OVERLAP
    cost_target: {cost_target}   # optional
    parameters:
        X:
            min: {x_min}
            max: {x_max}
        Y:
            min: {y_min}
            max: {y_max}
    """
    sample_dictionary = yaml_string_to_dict(yaml_string)

    sampler = new_sampler(sample_dictionary)
    samples = sampler.get_samples()
    if len (samples) == 0:
        print("no more samples")
        break

    for sample in samples:
        sample['Z'] = function(sample['X'], sample['Y'])

    df_new = pd.DataFrame(samples)
    df = pd.concat([df, df_new], ignore_index=True)

    smallest_values = df['Z'].nsmallest(5).to_list()

    shutil.rmtree(temp_dir)
    iterations -= 1
    
selected_rows = df[df['Z'] <= cost_target]
# selected_rows = df

# Contour plot
x_values = np.linspace(x_min, x_max, 100)  # Generate x values between -1 and 2
y_values = np.linspace(y_min, y_max, 100)  # Generate y values between -1 and 2
X, Y = np.meshgrid(x_values, y_values)
Z = np.log(function(X, Y))

fig = go.Figure(data=[
    go.Contour(
        z=Z,
        x=x_values,
        y=y_values,
        zmin=-2,  # Set z minimum value
        zmax=6,   # Set z maximum value 
    ),
    go.Scatter(
        x=df["X"],
        y=df["Y"],
        mode="markers",
        marker=dict(
            color="green",
            size=4,
            symbol="circle"
        ),
        name="Samples"
    ),
    go.Scatter(
        x=selected_rows["X"],
        y=selected_rows["Y"],
        mode="markers",
        marker=dict(
            color="red",
            size=4,
            symbol="circle"
        ),
        name="Samples"
    ),
])
fig.show()


previous_samples: [2.4482540273431423, 3.493152886215404, 5.178256359881593, 6.02818222160903, 20.78035037448795]


extrema for new input_labels:  [-0.98701889 -0.98748883] [1.99977043 1.96635   ]
down sampling to 30 best candidates from 300 total points.
extrema for new input_labels:  [-0.97697003 -0.19247204] [1.4880055  1.97151213]
down sampling to 30 best candidates from 297 total points.


previous_samples: [0.0338157472192878, 0.2606236474598647, 0.2654447081101165, 0.5242710221226229, 1.5367765155164128]


extrema for new input_labels:  [-0.98375579 -0.23332071] [1.25845273 1.51427613]
down sampling to 30 best candidates from 429 total points.


previous_samples: [0.0169129607568715, 0.0338157472192878, 0.2606236474598647, 0.2654447081101165, 0.5242710221226229]


extrema for new input_labels:  [-0.40669638 -0.12178238] [1.26535187 1.52554673]
down sampling to 30 best candidates from 363 total points.


previous_samples: [0.0169129607568715, 0.0338157472192878, 0.2606236474598647, 0.2654447081101165, 0.2845496470485285]
previous_samples: [0.0169129607568715, 0.0338157472192878, 0.0914922198878656, 0.1092503034454281, 0.2401337837684824]
The number of samples to keep is greater than the number of samples to generate. The number of samples to generate will be increased to the number of samples to keep.


extrema for new input_labels:  [-0.36973997 -0.13241255] [1.30460735 1.60188089]
down sampling to 30 best candidates from 315 total points.
extrema for new input_labels:  [-0.32187164 -0.10181175] [1.33033696 1.6752157 ]
down sampling to 34 best candidates from 350 total points.


previous_samples: [0.0169129607568715, 0.0338157472192878, 0.0914922198878656, 0.1092503034454281, 0.235247407130686]
The number of samples to keep is greater than the number of samples to generate. The number of samples to generate will be increased to the number of samples to keep.


extrema for new input_labels:  [-0.27795528 -0.12487503] [1.34568763 1.70736418]
down sampling to 53 best candidates from 580 total points.
no more samples



divide by zero encountered in log



In [31]:
import pandas as pd
import plotly.graph_objects as go
from scipy.spatial import distance

# Calculate distances between each row
distances = []
for i in range(len(selected_rows)):
    for j in range(i + 1, len(selected_rows)):
        dist = distance.euclidean(selected_rows[["X","Y"]].iloc[i], selected_rows[["X","Y"]].iloc[j])
        distances.append(dist)

# Create histogram trace
histogram = go.Histogram(x=distances)
# , nbinsx='auto')

# Create figure layout
layout = go.Layout(
    title='Histogram of Distances',
    xaxis=dict(title='Distance'),
    yaxis=dict(title='Frequency'),
)

# Create figure object
fig = go.Figure(data=[histogram], layout=layout)

# Plot the histogram
fig.show()


In [15]:
# first random seed (downselect_number = 20, downselect_ratio = 0.3, voxel_overlap 0.6
fig = go.Figure(data=go.Scatter(x=df.index, y=df['Z'], mode='markers'))

# Set y-aZis scale to logarithmic
fig.update_layout(yaxis_type='log')

# Add labels and title to the plot
fig.update_layout(title=f'Scatter Plot of Column Z vs. Row Index (downselect_ratio = {downselect_ratio})',
                  xaxis_title='Row Index',
                  yaxis_title='Column Z')

# Display the plot
fig.show()

# Set y-axis scale to logarithmic
# fig.update_layout(yaxis_type='log')

fig = go.Figure(data=go.Scatter(x=df.index, y=df['X'], mode='markers'))

# Add labels and title to the plot
fig.update_layout(title=f'Scatter Plot of Column X vs. Row Index (downselect_ratio = {downselect_ratio})',
                  xaxis_title='Row Index',
                  yaxis_title='Column Z')

# Display the plot
fig.show()

fig = go.Figure(data=go.Scatter(x=df.index, y=df['Y'], mode='markers'))

# Set y-axis scale to logarithmic
# fig.update_layout(yaxis_type='log')

# Add labels and title to the plot
fig.update_layout(title=f'Scatter Plot of Column Y vs. Row Index (downselect_ratio = {downselect_ratio})',
                  xaxis_title='Row Index',
                  yaxis_title='Column Z')

# Display the plot
fig.show()

In [17]:
# Initial set of samples
cost_target = 0.1

downselect_ratio = 0.3
function = rosenbrock_xy
(x_min, x_max) = (-1, 2)
(y_min, y_max) = (-1, 2)

function = booth_xy
(x_min, x_max) = (-1, 4)
(y_min, y_max) = (-1, 4)
yaml_string = f"""
type: best_candidate
num_samples: 30
# previous_samples: samples.csv # optional
# cost_variable: Z   # required if previous_samples is provided
parameters:
    X:
        min: {x_min}
        max: {x_max}
    Y:
        min: {y_min}
        max: {y_max}
"""
sample_dictionary = yaml_string_to_dict(yaml_string)

sampler = new_sampler(sample_dictionary)
samples = sampler.get_samples()

for sample in samples:
    sample['Z'] = function(sample['X'], sample['Y'])

df = pd.DataFrame(samples)
# Display the 5 smallest elements of 'Z'
smallest_values = df['Z'].nsmallest(5).to_list()

iterations = 20
while iterations > 0:
    # Create a temporary directory
    temp_dir = tempfile.mkdtemp()
    temp_file = os.path.join(temp_dir, "temp.csv")
    df.to_csv(temp_file)

    # next set of samples
    yaml_string = f"""
    type: best_candidate
    num_samples: 30
    previous_samples: {temp_file} # optional
    cost_variable: Z   # required if previous_samples is provided
    cost_target_oversample_ratio: 2.0 # default = BestCandidateSampler.DEFAULT_COST_TARGET_OVERSAMPLE_RATIO
    downselect_ratio: 0.3  # default = BestCandidateSampler.DEFAULT_DOWNSELECT_RATIO
    voxel_overlap: 1.0 # default = BestCandidateSampler.DEFAULT_VOXEL_OVERLAP
    cost_target: {cost_target}   # optional
    parameters:
        X:
            min: {x_min}
            max: {x_max}
        Y:
            min: {y_min}
            max: {y_max}
    """
    sample_dictionary = yaml_string_to_dict(yaml_string)

    sampler = new_sampler(sample_dictionary)
    samples = sampler.get_samples()
    if len (samples) == 0:
        print("no more samples")
        break

    for sample in samples:
        sample['Z'] = function(sample['X'], sample['Y'])

    df_new = pd.DataFrame(samples)
    df = pd.concat([df, df_new], ignore_index=True)

    smallest_values = df['Z'].nsmallest(5).to_list()

    shutil.rmtree(temp_dir)
    iterations -= 1
    
selected_rows = df[df['Z'] <= cost_target]
# selected_rows = df

# Contour plot
x_values = np.linspace(x_min, x_max, 100)  # Generate x values between -1 and 2
y_values = np.linspace(y_min, y_max, 100)  # Generate y values between -1 and 2
X, Y = np.meshgrid(x_values, y_values)
Z = np.log(function(X, Y))

fig = go.Figure(data=[
    go.Contour(
        z=Z,
        x=x_values,
        y=y_values,
        zmin=-2,  # Set z minimum value
        zmax=6,   # Set z maximum value 
    ),
    go.Scatter(
        x=df["X"],
        y=df["Y"],
        mode="markers",
        marker=dict(
            color="green",
            size=4,
            symbol="circle"
        ),
        name="Samples"
    ),
    go.Scatter(
        x=selected_rows["X"],
        y=selected_rows["Y"],
        mode="markers",
        marker=dict(
            color="red",
            size=4,
            symbol="circle"
        ),
        name="Samples"
    ),
])
fig.show()


previous_samples: [0.1963402310946984, 1.850611385863968, 1.919412723647402, 2.033533924232648, 3.935510864500608]


extrema for new input_labels:  [-0.99618858 -0.9869792 ] [3.98381373 3.98123202]
down sampling to 30 best candidates from 300 total points.
extrema for new input_labels:  [-0.87955274  0.        ] [3.96729617 3.98388411]
down sampling to 30 best candidates from 297 total points.


previous_samples: [0.1963402310946984, 1.0624276402716455, 1.1303933240415525, 1.850611385863968, 1.919412723647402]
previous_samples: [0.1963402310946984, 0.5649387345638603, 0.5713656664571348, 0.6493905853038583, 0.8343401371755892]


extrema for new input_labels:  [-0.57024794  0.        ] [2.03784897 3.97297842]
down sampling to 30 best candidates from 297 total points.
extrema for new input_labels:  [0.16736751 0.        ] [1.6550641  3.82901416]
down sampling to 30 best candidates from 297 total points.


previous_samples: [0.0605292351783712, 0.10079214354125, 0.1963402310946984, 0.1963991781369117, 0.3184634446466901]
previous_samples: [0.0160187577794619, 0.0304983918010942, 0.0605292351783712, 0.10079214354125, 0.109842151767434]


extrema for new input_labels:  [0.57167173 0.        ] [1.404777   3.66201871]
down sampling to 30 best candidates from 297 total points.
extrema for new input_labels:  [0.96388753 0.        ] [1.36414819 3.18651355]
down sampling to 30 best candidates from 297 total points.


previous_samples: [0.0003499167881862, 0.0027656639376299, 0.0154570984068513, 0.0160187577794619, 0.0249701732738036]
The number of samples to keep is greater than the number of samples to generate. The number of samples to generate will be increased to the number of samples to keep.


extrema for new input_labels:  [0.9413806 0.       ] [1.32703111 3.22149685]
down sampling to 31 best candidates from 330 total points.


previous_samples: [0.0003499167881862, 0.0024856863512164, 0.0027656639376299, 0.0034790882356162, 0.0074768293763636]
The number of samples to keep is greater than the number of samples to generate. The number of samples to generate will be increased to the number of samples to keep.


extrema for new input_labels:  [0.91515808 0.        ] [1.3270825  3.21597982]
down sampling to 55 best candidates from 580 total points.
no more samples


In [None]:
import random
import numpy as np


def latin_hypercube_sampler(num_samples, num_variables, variable_ranges):
    """
    Generate a Latin Hypercube Sample with different min/max values for each variable.

    :param num_samples: Number of samples to generate.
    :param num_variables: Number of variables.
    :param variable_ranges: Dictionary specifying the min/max ranges for each variable.
                            Format: {'variable_name': (min_value, max_value), ...}
    :return: Latin Hypercube Sample as a 2D numpy array.
    """
    # Generate a random permutation for each variable
    permutation = [random.sample(range(num_samples), num_samples) for _ in range(num_variables)]

    # Create an empty array to store the Latin Hypercube Sample
    lhs = np.empty((num_samples, num_variables))

    # Generate the Latin Hypercube Sample
    for i, variable_name in enumerate(variable_ranges.keys()):
        min_value, max_value = variable_ranges[variable_name]
        for j in range(num_samples):
            lhs[j, i] = (permutation[i][j] + random.uniform(0, 1)) / num_samples
            lhs[j, i] = lhs[j, i] * (max_value - min_value) + min_value

    return lhs

num_samples = 10
num_variables = 2
variable_ranges = {'X': (0, 1), 'Y': (5, 10)}

sample = latin_hypercube_sampler(num_samples, num_variables, variable_ranges)
print(sample)

import plotly.graph_objects as go

# Generate Latin Hypercube Sample
num_samples = 10
num_variables = 2
variable_ranges = {'X': (0, 1), 'Y': (5, 10)}
sample = latin_hypercube_sampler(num_samples, num_variables, variable_ranges)

# Extract X and Y values from the sample
X = sample[:, 0]
Y = sample[:, 1]

# Create a scatter plot using Plotly
fig = go.Figure(data=go.Scatter(
    x=X,
    y=Y,
    mode='markers',
    marker=dict(
        color='red',
        size=8,
        symbol='circle'
    )
))

# Add axis labels and title
fig.update_layout(
    xaxis=dict(title='X'),
    yaxis=dict(title='Y'),
    title='Latin Hypercube Sample'
)

# Display the plot
fig.show()


In [28]:
import numpy as np
import plotly.graph_objects as go
import scipy.signal

# Generate sample data
data = np.concatenate((
    np.random.normal(loc=1, scale=1, size=(300,)),
    np.random.normal(loc=5, scale=1, size=(300,)),
    np.random.normal(loc=10, scale=1, size=(400,))
))

# # Calculate the number of bins using the Freedman-Diaconis rule
# q75, q25 = np.percentile(data, [75 ,25])
# iqr = q75 - q25
# bin_width = 2 * iqr * (len(data) ** (-1/3))
# num_bins = int((np.max(data) - np.min(data)) / bin_width) * 10

# Create histogram trace
histogram = go.Histogram(x=data)
# , nbinsx=num_bins)

# Create figure layout
layout = go.Layout(
    title='Histogram',
    xaxis=dict(title='Value'),
    yaxis=dict(title='Frequency'),
)

# Create figure object
fig = go.Figure(data=[histogram], layout=layout)

# Plot the histogram
fig.show()

# Calculate the histogram
hist, bin_edges = np.histogram(data, bins=num_bins)

# Find peaks in the histogram
peaks, _ = scipy.signal.find_peaks(hist)

# Determine if histogram is multimodal
is_multimodal = len(peaks) > 1

# Print the result
if is_multimodal:
    print("The histogram is multimodal.")
else:
    print("The histogram is unimodal.")


The histogram is multimodal.
