In [1]:
import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Generate date range for the past 2 years with 2-day intervals
date_range = pd.date_range(end=pd.Timestamp.today(), periods=int(365/2*2), freq='2D')

# Number of samples
n_samples = len(date_range)

# Synthetic dataset generation
data = pd.DataFrame({
    "Date": date_range,
    "CO2_Level_ppm": np.random.normal(loc=410, scale=20, size=n_samples),      # CO2 levels
    "CH4_Level_ppm": np.random.normal(loc=1.9, scale=0.2, size=n_samples),      # CH4 levels
    "Oxygen_Level_percent": np.random.normal(loc=20.9, scale=0.5, size=n_samples), # Oxygen %

    "Feed_Consumption_tpd": np.random.normal(loc=1000, scale=100, size=n_samples), # tons per day
    "Product_Output_tpd": np.random.normal(loc=800, scale=80, size=n_samples),     # tons per day
    "Byproduct_Output_tpd": np.random.normal(loc=100, scale=20, size=n_samples),   # tons per day

    "Steam_Usage_tph": np.random.normal(loc=50, scale=5, size=n_samples),          # tons per hour
    "Fuel_Gas_Usage_MMBtu": np.random.normal(loc=500, scale=50, size=n_samples),   # energy input

    "Flare_Volume_m3": np.random.exponential(scale=10, size=n_samples),            # flare activity
    "Flow_Rate_m3ph": np.random.normal(loc=300, scale=30, size=n_samples),         # m^3 per hour

    "Cooling_Water_Usage_m3": np.random.normal(loc=1500, scale=150, size=n_samples), # cooling water

    "Temperature_C": np.random.normal(loc=120, scale=10, size=n_samples),          # process temperature
    "Pressure_bar": np.random.normal(loc=10, scale=1.5, size=n_samples),           # system pressure

    "Is_Peak_Hour": np.random.choice([0, 1], size=n_samples, p=[0.7, 0.3]),         # binary indicator
})

# Clip values to realistic ranges (optional for realism)
data["CO2_Level_ppm"] = data["CO2_Level_ppm"].clip(350, 500)
data["CH4_Level_ppm"] = data["CH4_Level_ppm"].clip(1.5, 2.5)
data["Oxygen_Level_percent"] = data["Oxygen_Level_percent"].clip(19.5, 21)

data["Pressure_bar"] = data["Pressure_bar"].clip(5, 15)
data["Temperature_C"] = data["Temperature_C"].clip(80, 160)

data.head()

Unnamed: 0,Date,CO2_Level_ppm,CH4_Level_ppm,Oxygen_Level_percent,Feed_Consumption_tpd,Product_Output_tpd,Byproduct_Output_tpd,Steam_Usage_tph,Fuel_Gas_Usage_MMBtu,Flare_Volume_m3,Flow_Rate_m3ph,Cooling_Water_Usage_m3,Temperature_C,Pressure_bar,Is_Peak_Hour
0,2023-07-19 23:23:51.156519,419.934283,1.819756,20.997923,991.928342,788.923521,108.838813,42.009378,526.731454,14.927401,319.903432,1527.721545,124.739341,13.278567,0
1,2023-07-21 23:23:51.156519,407.234714,1.944818,20.410814,1007.863519,702.056141,78.192018,52.310863,368.212613,2.671289,278.272604,1687.963338,112.793739,9.426659,0
2,2023-07-23 23:23:51.156519,422.953771,1.902518,21.0,800.179932,783.278139,128.218648,60.121548,500.168775,4.232479,317.589989,1558.750592,135.187184,8.979256,0
3,2023-07-25 23:23:51.156519,440.460597,1.919535,20.048708,1091.632767,731.958364,98.028237,43.18413,516.391059,0.305663,288.116217,1733.81971,126.583925,8.882015,0
4,2023-07-27 23:23:51.156519,405.316933,1.745398,21.0,1034.648848,753.558124,100.376992,50.948531,546.213507,32.568977,286.406824,1434.188112,101.39552,8.9804,1


In [3]:
data.size

5475