In [1]:
import pandas as pd
import numpy as np
import random

num_rows = 10000

# Define possible categorical values
process_stages = ['Raw Material Extraction', 'Manufacturing', 'Transport', 'Use', 'End-of-Life']
technologies = ['Conventional', 'Advanced', 'Emerging']
time_periods = ['2020-2025', '2015-2019', '2010-2014']
locations = ['Europe', 'Asia', 'North America', 'South America']
functional_units = ['1 kg Aluminium Sheet', '1 kg Copper Wire', '1 m2 Aluminium Panel']

raw_materials_types = ['Aluminium Ore', 'Copper Ore', 'Aluminium Scrap', 'Copper Scrap']
energy_types = ['Electricity', 'Natural Gas', 'Coal']
transport_modes = ['Truck', 'Ship', 'Rail']
fuel_types = ['Diesel', 'Heavy Fuel Oil', 'Electric']
emissions_air_types = ['CO2', 'SOx', 'NOx', 'Particulate Matter']
emissions_water_types = ['BOD', 'Heavy Metals']
end_of_life_options = ['Recycling', 'Landfill', 'Incineration', 'Reuse']

data = []

random.seed(42)
np.random.seed(42)

for _ in range(num_rows):
    process_stage = random.choice(process_stages)
    technology = random.choice(technologies)
    time_period = random.choice(time_periods)
    location = random.choice(locations)
    functional_unit = random.choice(functional_units)

    raw_material = random.choice(raw_materials_types)
    raw_qty = round(np.random.uniform(0.5, 5), 3)

    energy_type = random.choice(energy_types)
    energy_qty = round(np.random.uniform(1, 100), 2)

    transport_mode = random.choice(transport_modes)
    transport_distance = round(np.random.uniform(10, 1000), 1)
    fuel_type = random.choice(fuel_types)

    emission_air_values = {em: round(np.random.uniform(0, 5), 3) for em in emissions_air_types}
    emission_water_values = {em: round(np.random.uniform(0, 1), 3) for em in emissions_water_types}

    ghg_emissions = round(np.random.uniform(0.5, 20), 3)  # kg CO2-eq
    
    recycled_content = round(np.random.uniform(0, 100), 2)
    reuse_potential = round(np.random.uniform(0, 100), 2)
    end_of_life = random.choice(end_of_life_options)
    recovery_rate = round(np.random.uniform(0, 100), 2)
    
    row = {
        'Process Stage': process_stage,
        'Technology': technology,
        'Time Period': time_period,
        'Location': location,
        'Functional Unit': functional_unit,
        'Raw Material Type': raw_material,
        'Raw Material Quantity (kg or unit)': raw_qty,
        'Energy Input Type': energy_type,
        'Energy Input Quantity (MJ)': energy_qty,
        'Transport Mode': transport_mode,
        'Transport Distance (km)': transport_distance,
        'Fuel Type': fuel_type,
        'Emissions to Air CO2 (kg)': emission_air_values['CO2'],
        'Emissions to Air SOx (kg)': emission_air_values['SOx'],
        'Emissions to Air NOx (kg)': emission_air_values['NOx'],
        'Emissions to Air Particulate Matter (kg)': emission_air_values['Particulate Matter'],
        'Emissions to Water BOD (kg)': emission_water_values['BOD'],
        'Emissions to Water Heavy Metals (kg)': emission_water_values['Heavy Metals'],
        'Greenhouse Gas Emissions (kg CO2-eq)': ghg_emissions,
        'Recycled Content (%)': recycled_content,
        'Reuse Potential (%)': reuse_potential,
        'End-of-Life Treatment': end_of_life,
        'Recovery Rate (%)': recovery_rate
    }
    data.append(row)

df = pd.DataFrame(data)

df.to_csv('detailed_dummy_lca_dataset.csv', index=False)

print("Dummy LCA dataset with detailed columns generated and saved as 'detailed_dummy_lca_dataset.csv'.")


Dummy LCA dataset with detailed columns generated and saved as 'detailed_dummy_lca_dataset.csv'.


In [2]:
import pandas as pd
import numpy as np
import random

num_rows = 10000

# Define possible categorical values
process_stages = ['Raw Material Extraction', 'Manufacturing', 'Transport', 'Use', 'End-of-Life']
technologies = ['Conventional', 'Advanced', 'Emerging']
time_periods = ['2020-2025', '2015-2019', '2010-2014']
locations = ['Europe', 'Asia', 'North America', 'South America']
functional_units = ['1 kg Aluminium Sheet', '1 kg Copper Wire', '1 m2 Aluminium Panel']
raw_materials_types = ['Aluminium Ore', 'Copper Ore', 'Aluminium Scrap', 'Copper Scrap']
energy_types = ['Electricity', 'Natural Gas', 'Coal']
transport_modes = ['Truck', 'Ship', 'Rail']
fuel_types = ['Diesel', 'Heavy Fuel Oil', 'Electric']
emissions_air_types = ['CO2', 'SOx', 'NOx', 'Particulate Matter']
emissions_water_types = ['BOD', 'Heavy Metals']
end_of_life_options = ['Recycling', 'Landfill', 'Incineration', 'Reuse']

data = []
random.seed(42)
np.random.seed(42)

for _ in range(num_rows):
    process_stage = random.choice(process_stages)
    technology = random.choice(technologies)
    time_period = random.choice(time_periods)
    location = random.choice(locations)
    functional_unit = random.choice(functional_units)
    raw_material = random.choice(raw_materials_types)
    raw_qty = round(np.random.uniform(0.5, 5), 3)
    energy_type = random.choice(energy_types)
    energy_qty = round(np.random.uniform(1, 100), 2)
    transport_mode = random.choice(transport_modes)
    transport_distance = round(np.random.uniform(10, 1000), 1)
    fuel_type = random.choice(fuel_types)

    # Emissions depend on energy, transport, fuel, and technology
    base_co2 = energy_qty * (50 if fuel_type == 'Coal' else 20 if fuel_type == 'Diesel' else 10)
    co2_emission = np.clip(np.random.normal(base_co2, base_co2 * 0.1), 0, None) / 1000  # scale down
    sox_emission = co2_emission * 0.05
    nox_emission = co2_emission * 0.03
    particulate_emission = co2_emission * 0.02

    bod_emission = raw_qty * 0.01
    heavy_metals_emission = raw_qty * 0.005

    ghg_emissions = co2_emission + sox_emission + nox_emission + particulate_emission

    # Set recycled content based on process stage and raw material
    if process_stage == 'End-of-Life' or raw_material.endswith('Scrap'):
        recycled_content = np.clip(np.random.normal(80, 10), 30, 100)
    else:
        recycled_content = np.clip(np.random.normal(20, 5), 0, 50)

    # Reuse potential depends on technology and location
    if technology == 'Advanced':
        reuse_potential = np.clip(np.random.normal(70, 15), 30, 100)
    else:
        reuse_potential = np.clip(np.random.normal(40, 20), 0, 70)

    # Recovery rate influenced by recycled content and reuse potential
    recovery_rate = np.clip(0.5 * recycled_content + 0.4 * reuse_potential + np.random.normal(0, 5), 20, 100)

    # End-of-life depends on process stage and recycled content
    if recycled_content > 50:
        end_of_life = 'Recycling'
    else:
        end_of_life = random.choice(end_of_life_options)

    row = {
        'Process Stage': process_stage,
        'Technology': technology,
        'Time Period': time_period,
        'Location': location,
        'Functional Unit': functional_unit,
        'Raw Material Type': raw_material,
        'Raw Material Quantity (kg or unit)': raw_qty,
        'Energy Input Type': energy_type,
        'Energy Input Quantity (MJ)': energy_qty,
        'Transport Mode': transport_mode,
        'Transport Distance (km)': transport_distance,
        'Fuel Type': fuel_type,
        'Emissions to Air CO2 (kg)': round(co2_emission, 3),
        'Emissions to Air SOx (kg)': round(sox_emission, 3),
        'Emissions to Air NOx (kg)': round(nox_emission, 3),
        'Emissions to Air Particulate Matter (kg)': round(particulate_emission, 3),
        'Emissions to Water BOD (kg)': round(bod_emission, 3),
        'Emissions to Water Heavy Metals (kg)': round(heavy_metals_emission, 3),
        'Greenhouse Gas Emissions (kg CO2-eq)': round(ghg_emissions, 3),
        'Recycled Content (%)': round(recycled_content, 2),
        'Reuse Potential (%)': round(reuse_potential, 2),
        'End-of-Life Treatment': end_of_life,
        'Recovery Rate (%)': round(recovery_rate, 2)
    }
    data.append(row)

df = pd.DataFrame(data)
df.to_csv('detailed_dummy_lca_dataset_with_patterns.csv', index=False)
print("Improved Dummy LCA dataset with realistic patterns generated and saved as 'detailed_dummy_lca_dataset_with_patterns.csv'.")


Improved Dummy LCA dataset with realistic patterns generated and saved as 'detailed_dummy_lca_dataset_with_patterns.csv'.
