### Objective

In this notebook, we generate candidate samples for the design variables. The purpose of the candidate samples is that the trained surrogate model can propose promising, feasible solutions from the candidate samples to facilitate downstream optimization task.

Here, the key difference is that we do not sample Q1 and Q2 variables.

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import qmc
from sklearn.cluster import KMeans
from collections import defaultdict

In [None]:
# Define parameters
Ta = 25
c_source = 50e-3
d_source = 65e-3
c_module = 61.4e-3
d_module = 106e-3
fan = 3

# Generate samples
data_length = 100000
data_number = 6
append_position_number = 1
sampler = qmc.LatinHypercube(d=data_number)
X = sampler.random(n=data_length)

In [None]:
# Scale X to the required ranges
d_min, d_max = 5e-3, 30e-3
d_sum = (d_max - d_min) * X[:, 0] + d_min

b_min, b_max = 73.7e-3, 307e-3
b_sum = (b_max - b_min) * X[:, 1] + b_min

L_min, L_max = 127.2e-3, 530e-3
L_sum = (L_max - L_min) * X[:, 2] + L_min

c_min, c_max = 10e-3, 39e-3
c_sum = (c_max - c_min) * X[:, 3] + c_min

L_duct_min, L_duct_max = 20e-3, 50e-3
L_duct_sum = (L_duct_max - L_duct_min) * X[:, 4] + L_duct_min

n_min, n_max = 10, 50
n_sum = np.round((n_max - n_min) * X[:, 5] + n_min).astype(int)

data_sum = np.column_stack((d_sum, b_sum, L_sum, c_sum, L_duct_sum, n_sum))

In [None]:
result_design = []
t_invalid = 0
valid_position = defaultdict(list)

# Derived parameters calculation
for i in range(data_length):
    if (i+1)%500 == 0:
        print(f"Processing {i+1} samples")
        
    d, b, L, c, L_duct, n = data_sum[i]

    t_min = 1e-3
    t_max = b / n - 1e-3

    if t_min > t_max:
        t_invalid += 1
    
    else:
        t = np.random.rand() * (t_max - t_min) + t_min

        Xc_min = c_module / 2
        Xc_max = b - c_module / 2
        Yc_min = d_module / 2
        Yc_max = L - d_module / 2

        # Generate position samples 
        position_sampler = qmc.LatinHypercube(d=4)
        positions = position_sampler.random(n=int(5000))

        # Scale samples
        positions[:, 0] = positions[:, 0]*(Xc_max-Xc_min)+ Xc_min;
        positions[:, 1] = positions[:, 1]*(Yc_max-Yc_min)+ Yc_min;
        positions[:, 2] = positions[:, 2]*(Xc_max-Xc_min)+ Xc_min;
        positions[:, 3] = positions[:, 3]*(Yc_max-Yc_min)+ Yc_min;

        # Check non-overlapping
        xc1, yc1, xc2, yc2 = positions[:, 0], positions[:, 1], positions[:, 2], positions[:, 3]
        non_overlapping = (np.abs(xc1 - xc2) > c_module) | (np.abs(yc1 - yc2) > d_module)

        # Retain valid positions
        valid_positions = positions[non_overlapping]
        valid_position['b'].append(b)
        valid_position['L'].append(L)
        valid_position['valid_pos'].append(len(valid_positions))

        # Compose the design variables
        if len(valid_positions) > 0:
            
            if append_position_number == 1:
                # Randomly pick 1 sample from the valid positions
                random_index = np.random.randint(0, len(valid_positions))
                selected_position = valid_positions[random_index]
                result_design.append(np.concatenate((data_sum[i], [t], selected_position)))
                
            elif len(valid_positions) <= append_position_number:
                for pos in valid_positions:
                    # Append each valid position to the design
                    result_design.append(np.concatenate((data_sum[i], [t], pos)))
    
            else:
                # Perform clustering and append the centroid/closest to centroid positions
                kmeans = KMeans(n_clusters=append_position_number, n_init=10, random_state=0).fit(valid_positions)
                centers = kmeans.cluster_centers_
                
                # For each center, find the closest valid position
                for center in centers:
                    distances = np.sqrt(((valid_positions - center)**2).sum(axis=1))
                    closest_index = np.argmin(distances)
                    closest_position = valid_positions[closest_index]
                    
                    # Append the closest valid position to the design
                    result_design.append(np.concatenate((data_sum[i], [t], closest_position)))

result_design = np.array(result_design)

In [None]:
df = pd.DataFrame(valid_position)

In [None]:
print(f"Invalid t design: {t_invalid}/{data_length}")
print(f"Invalid heat source position: {np.sum(df['valid_pos'] == 0)}/{len(df)}")
print(f"Invalid heat source position ratio (%): {np.sum(df['valid_pos'] == 0)/len(df)*100:.3f}%")
print(f"Remaining valid samples: {result_design.shape[0]}/{data_length}")

In [None]:
# Configure dataframe
design_df = pd.DataFrame(result_design)
column_names = ['d', 'b', 'L', 'c', 'L_duct', 'n', 't', 'xc1', 'yc1', 'xc2', 'yc2']
design_df.columns = column_names
design_df

In [None]:
# Save the design samples
design_df.to_csv("./Dataset/candidate_100000.csv", index=False)

#### Generate Q1 and Q2 samples

In [2]:
Q_min, Q_max = 50, 400 

Q_sampler = qmc.LatinHypercube(d=2)
Q = Q_sampler.random(n=50)
Q = (Q_max-Q_min) * Q + Q_min

swap = Q[:, 0] < Q[:, 1]
Q[swap, 0], Q[swap, 1] = Q[swap, 1], Q[swap, 0].copy()

Q_df = pd.DataFrame(Q)
column_names = ['Q1', 'Q2']
Q_df.columns = column_names
Q_df.to_csv("./dataset/Q_test_locations.csv", index=False)