In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

In [20]:
# Step 1: Load the material dataset from a CSV file
df = pd.read_csv("df_ga.csv")

In [21]:
# Step 2: Separate numerical and categorical features
categorical_cols = ['Material']

In [22]:
# Step 3: Encode Categorical Features using Label Encoding
label_encoder = LabelEncoder()

for col in categorical_cols:
    df[col + '_encoded'] = label_encoder.fit_transform(df[col])

In [23]:
area = 1.0  # Assume area = 1 m^2 for simplicity

In [24]:
# Step 1: Define the range-based approach for thickness

def generate_range_based_thickness(min_thickness=0.01, max_thickness=0.1, num_samples=5):
    """
    Generate a range of thickness values between min_thickness and max_thickness.
    """
    return np.linspace(min_thickness, max_thickness, num_samples)


# Step 2: Define the physics-based approach for thickness

def calculate_physics_based_thickness(thermal_conductivity, temp_difference=1200, heat_flux=50):
    """
    Calculate the required thickness based on the thermal conductivity, 
    temperature difference (default 1200°C), and heat flux (default 50 W/m^2).
    The formula used is: t = (k * ∆T) / q
    
    Args:
        thermal_conductivity (float): Thermal conductivity of the material (W/m*K).
        temp_difference (float): Temperature difference (K or °C if relative) during re-entry.
        heat_flux (float): Heat flux in W/m^2.
    
    Returns:
        float: Calculated thickness in meters.
    """
    if heat_flux == 0:  # Avoid division by zero
        return np.nan
    return (thermal_conductivity * temp_difference) / heat_flux


# Step 3: Apply both thickness generation methods to each material

# Number of range-based samples per material
num_samples = 5  

# Create lists to store new rows of data
new_rows = []

for index, row in df.iterrows():
    material_name = row['Material']
    thermal_conductivity = row['Thermal Conductivity (W/m*K)']
    
    # Generate range-based thickness values
    range_based_thicknesses = generate_range_based_thickness(num_samples=num_samples)
    
    # Generate physics-based thickness for one or two scenarios
    physics_based_thickness1 = calculate_physics_based_thickness(thermal_conductivity, temp_difference=1200, heat_flux=50)
    physics_based_thickness2 = calculate_physics_based_thickness(thermal_conductivity, temp_difference=1000, heat_flux=60)
    
    # Combine all thickness values for this material
    thickness_values = np.append(range_based_thicknesses, [physics_based_thickness1, physics_based_thickness2])
    
    # For each thickness value, create a new row with the material properties
    for thickness in thickness_values:
        new_row = row.copy()
        new_row['Thickness'] = round(thickness, 4)  # Round thickness to 4 decimal places
        # Calculate new features that depend on thickness
        new_row['Weight'] = round(new_row['Material Density (kg/m3)'] * thickness * area, 4)  # Area is assumed to be 1 m^2
        new_row['Cost'] = round(new_row['Price (c) ($/m^3)'] * thickness * area, 4)  # Area is assumed to be 1 m^2
        new_rows.append(new_row)

In [25]:
# Convert the list of new rows into a DataFrame
expanded_df = pd.DataFrame(new_rows)

In [26]:
# Optional: Drop rows where thickness might be NaN due to issues with physics-based calculation
expanded_df = expanded_df.dropna(subset=['Thickness'])

In [27]:
# Step 4: Save the new dataset
output_path = "df_expanded_ga.csv" 
expanded_df.to_csv(output_path, index=False, float_format='%.4f')

print(f"New dataset saved to {output_path} with {len(expanded_df)} rows.")

New dataset saved to df_expanded_ga.csv with 14000 rows.
