In [None]:
import numpy as np
import pandas as pd

# Number of samples
n = 5000

# Features
delta_T = np.random.uniform(50, 300, n)      # Temperature difference in °C
Rload = np.random.uniform(1, 20, n)          # Load resistance in ohms
materials = ['Bi2Te3','PbTe','Skutterudite']
S_dict = {'Bi2Te3': 220e-6, 'PbTe': 180e-6, 'Skutterudite': 200e-6}  # Seebeck in V/K

material = np.random.choice(materials, n)
S = np.array([S_dict[m] for m in material])

# Calculate Seebeck voltage (physics consistent)
Voltage_V = S * delta_T  # V = S * ΔT

# Power and Efficiency (targets, not features)
Power_W = Voltage_V**2 / Rload
Qin = delta_T * 0.5  # assumed heat input, arbitrary units for simulation
Efficiency = np.clip((Power_W / Qin) * 100, 0.5, 8)  # realistic efficiency %

# Build DataFrame
df = pd.DataFrame({
    'Delta_T_C': delta_T,
    'Rload_ohm': Rload,
    'Material': material,
    'Seebeck_V_per_K': S,
    # 'Voltage_V', 'Power_W', 'Efficiency_%' are targets, NOT features
})

# Round numeric values for cleanliness
df = df.round({'Delta_T_C': 2, 'Rload_ohm': 2, 'Seebeck_V_per_K': 6})

# Save CSV
df.to_csv("TEG_data.csv", index=False)
df.head()


Unnamed: 0,Delta_T_C,Rload_ohm,Material,Seebeck_V_per_K
0,238.3,19.64,Skutterudite,0.0002
1,265.62,15.27,Skutterudite,0.0002
2,51.11,11.04,Skutterudite,0.0002
3,233.68,5.31,Skutterudite,0.0002
4,170.75,3.08,PbTe,0.00018
