In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

class OptoelectronicDataGenerator:
    def __init__(self, n_samples, I_s=0.5):
        self.n_samples = n_samples
        self.I_s = I_s
        self.material_properties = {
            0: {'name': 'Silicon (Si)', 'coefficient': 1.0},
            1: {'name': 'Gallium Arsenide (GaAs)', 'coefficient': 0.9},
            2: {'name': 'Indium Phosphide (InP)', 'coefficient': 1.1},
            3: {'name': 'Germanium (Ge)', 'coefficient': 0.95},
            4: {'name': 'Zinc Oxide (ZnO)', 'coefficient': 1.2}
        }

    def generate_data(self):
        light_intensity = np.random.rand(self.n_samples) * 800_000  # Light intensity: 0 to 800,000 Lux
        temperature = np.random.rand(self.n_samples) * 100 - 20  # Temperature: -20°C to 80°C
        material_types = np.random.randint(0, 5, self.n_samples)  # Five different materials (0-4)
        humidity = np.random.rand(self.n_samples) * 100  # Humidity: 0% to 100%
        wavelength = np.random.rand(self.n_samples) * 300 + 400  # Wavelength: 400 nm to 700 nm
        pressure = np.random.rand(self.n_samples) * 200 + 900  # Pressure: 900 hPa to 1100 hPa
        thickness = np.random.rand(self.n_samples) * 10  # Thickness: 0 to 10 µm
        impurity = np.random.rand(self.n_samples) * 0.02  # Impurity concentration: 0 to 2%
        bias = np.random.rand(self.n_samples) * 10 - 5  # Bias: -5V to 5V
        surface = np.random.randint(0, 2, self.n_samples)  # Surface treatment: 0 (none) or 1 (treated)
        electric_field = np.random.rand(self.n_samples) * 1000  # Electric field strength: 0 to 1000 V/m
        density = np.random.rand(self.n_samples) * 10 + 1  # Material density: 1 to 11 g/cm³

        X = np.column_stack((light_intensity, temperature, material_types, humidity,
                             wavelength, pressure, thickness, impurity, bias,
                             surface, electric_field, density))
        
        y = self._calculate_current(X, material_types)
        return X, y

    def _calculate_current(self, X, material_types):
        n = self.n_samples
        noise = np.random.normal(0, 0.1, n)  # Add noise
        current = np.zeros(n)
        
        for i in range(n):
            current[i] = (X[i, 0] * self.material_properties[material_types[i]]['coefficient'] +
                          X[i, 1] * 0.5 + 
                          X[i, 3] * 0.05 +
                          X[i, 4] * 0.002 + 
                          X[i, 5] * 0.001 +
                          X[i, 6] * 0.03 + 
                          X[i, 7] * 0.1 +
                          X[i, 8] * 0.2 + 
                          X[i, 9] * 0.3 +
                          X[i, 10] * 0.01 +  
                          X[i, 11] * 0.1 +           
                          self.I_s) + noise[i]
        
        return current

# Create an instance of the data generator and generate data
data_generator = OptoelectronicDataGenerator(n_samples=1000)
X, y = data_generator.generate_data()

# Convert X and y to a DataFrame
columns = ['Light Intensity', 'Temperature', 'Material Type', 'Humidity', 'Wavelength', 
           'Pressure', 'Thickness', 'Impurity', 'Bias', 'Surface Treatment', 
           'Electric Field', 'Density', 'Current']
df = pd.DataFrame(np.column_stack((X, y)), columns=columns)

# Save to CSV
csv_file_path = 'generated_data.csv'
df.to_csv(csv_file_path, index=False)

print("Data saved to CSV.")


Data saved to CSV.
