### Manganese Processing Plant Feature Engineering
#### Advanced feature creation for ML optimization models

#### AUTHOR: DARLENE WENDY NASIMIYU
#### Purpose: Create powerful features for manganese processing optimization

In [14]:
import pandas as pd
import numpy as np
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
import os

In [15]:
#----SETUP: Get absolute path ---
BASE_DIR = os.path.dirname(os.getcwd()) # Current working directory of the notebook
data_dir = os.path.join(BASE_DIR, 'data', 'synthetic')

print("Using data directory:", data_dir)

Using data directory: /home/darlenewendie/PycharmProjects/Intelligent-Manganese-Processing-Plant-Optimization/data/synthetic


In [16]:
# -----DEFINE DATASET FILES-------
dataset_files = {
    'ore_feed': 'manganese_ore_feed.csv',
    'blended_ore': 'manganese_blended_ore_feed.csv',
    'crushing': 'manganese_crushing_circuit.csv',
    'separation': 'manganese_separation_circuit.csv',
    'flotation': 'manganese_flotation_circuit.csv',
    'dms': 'manganese_dms_circuit.csv',
    'jigging':'manganese_jigging_circuit.csv',
    'dewatering': 'manganese_dewatering_circuit.csv',
    'equipment': 'manganese_equipment_health.csv',
    'energy': 'manganese_energy_consumption.csv',
}
# Load all datasets
datasets = {}
for name, filename in dataset_files.items():
    filepath  = os.path.join(data_dir, filename)
    try:
        df = pd.read_csv(filepath, parse_dates=['timestamp'])
        datasets[name] = df
        print(f" Loaded {name}: {len(df):,} records, {len(df.columns)} columns")
    except FileNotFoundError:
        print(f"Could not find {filepath}")
    except Exception as e:
        print(f"Error loading {name}: {str(e)}")

print(f"\nTotal datasets loaded: {len(datasets)}")
print(f"Total records: {sum(len(df) for df in datasets.values()):,}")

 Loaded ore_feed: 10,000 records, 11 columns
 Loaded blended_ore: 6,522 records, 11 columns
 Loaded crushing: 15,000 records, 9 columns
 Loaded separation: 12,000 records, 13 columns
 Loaded flotation: 12,000 records, 22 columns
 Loaded dms: 8,000 records, 16 columns
 Loaded jigging: 10,000 records, 16 columns
 Loaded dewatering: 8,000 records, 18 columns
 Loaded equipment: 8,000 records, 12 columns
 Loaded energy: 10,000 records, 30 columns

Total datasets loaded: 10
Total records: 99,522


In [17]:
#INITIALIZING AN EMPTY DICTIONARY FOR ENGINEERED DATASETS
engineered_datasets = {}


In [18]:
# CATEGORY 1: ORE CHARACTERISTICS FEATURES
def engineer_ore_features(ore_df):
    """Generate ore characteristics features (Features 1-18)"""
    print("\nEngineering Ore Characteristics Features...")

    ore_data = ore_df.copy()

    # Basic transformations (1-7)
    ore_data['mn_grade_squared'] = ore_data['mn_grade_pct'] ** 2
    ore_data['mn_grade_log'] = np.log1p(ore_data['mn_grade_pct'])

    ore_data['gangue_total'] = (ore_data['fe_content_pct'] + ore_data['siO2_content_pct'] +
                         ore_data['al2O3_content_pct'] + ore_data['p_content_pct'])
    ore_data['ore_quality_index'] = ore_data['mn_grade_pct'] / (ore_data['gangue_total'] + 0.01)
    ore_data['mn_to_fe_ratio'] = ore_data['mn_grade_pct'] / (ore_data['fe_content_pct'] + 0.01)
    ore_data['mn_to_silica_ratio'] = ore_data['mn_grade_pct'] / (ore_data['siO2_content_pct'] + 0.01)
    ore_data['mn_to_al_ratio'] = ore_data['mn_grade_pct'] / (ore_data['al2O3_content_pct'] + 0.01)
    ore_data['mn_to_phosphorus_ration'] = ore_data['mn_grade_pct'] / (ore_data['p_content_pct']+ 0.01)
    ore_data['valuable_mineral_ratio'] = ore_data['mn_grade_pct'] / (ore_data['mn_grade_pct'] + ore_data['gangue_total'])

    # Derived features (8-12)
    ore_data['ore_hardness_category'] = pd.cut(ore_data['work_index_kwh_t'],
                                         bins=[0, 12, 15, 18, 25],
                                         labels=['soft', 'medium', 'hard', 'very_hard'])

    ore_data['liberation_difficulty'] = ore_data['work_index_kwh_t'] * ore_data['p80_mm']
    ore_data['density_grade_product'] = ore_data['specific_gravity'] * ore_data['mn_grade_pct']
    ore_data['moisture_adjusted_grade'] = ore_data['mn_grade_pct'] * (100 - ore_data['moisture_pct']) / 100

    max_possible_grade = 52.0
    ore_data['enrichment_potential'] = (max_possible_grade - ore_data['mn_grade_pct']) / ore_data['mn_grade_pct']

    # Ore type encoding (13-14)
    ore_type_dummies = pd.get_dummies(ore_data['ore_type'], prefix='ore_type')
    ore_data = pd.concat([ore_data, ore_type_dummies], axis=1)

    processability_map = {'oxide': 0.7, 'carbonate': 0.85, 'silicate': 0.9}
    ore_data['ore_processability_score'] = ore_data['ore_type'].map(processability_map)
    ore_data['ore_processability_score'] *= (ore_data['mn_grade_pct'] / 50) * (1 / (ore_data['work_index_kwh_t'] / 15))

    # Statistical features (15-18)
    mean_grade = ore_data['mn_grade_pct'].mean()
    ore_data['grade_deviation_from_mean'] = ore_data['mn_grade_pct'] - mean_grade
    ore_data['grade_percentile_rank'] = ore_data['mn_grade_pct'].rank(pct=True)
    ore_data['is_high_grade'] = (ore_data['mn_grade_pct'] > 60).astype(int)
    ore_data['is_low_grade'] = (ore_data['mn_grade_pct'] < 45).astype(int)

    print(f"  Generated {len([c for c in ore_data.columns if c not in ore_df.columns])} ore features")
    return ore_data


engineered_datasets['engineered_ore_feed'] = engineer_ore_features(datasets['ore_feed'])


Engineering Ore Characteristics Features...
  Generated 22 ore features


#### ORE FEATURE CHARACTERISTICS
- This feature engineering function for manganese ore begins by creating basic transformations of the manganese grade to capture non-linear relationships, namely mn_grade_squared and mn_grade_log, which can enhance predictive models by emphasizing extreme values or reducing skew.
- It then calculates gangue_total as the sum of impurities (Fe, SiO₂, Al₂O₃, P) to measure overall dilution of the ore and derives quality and ratio-based features including ore_quality_index, mn_to_fe_ratio, mn_to_silica_ratio, mn_to_al_ratio, mn_to_phosphorus_ration, and valuable_mineral_ratio; these standardize manganese content relative to impurities, highlighting metallurgical value and potential separation efficiency.
- The function also generates derived physical and operational features: ore_hardness_category categorizes ore hardness based on work index into soft, medium, hard, and very hard classes, liberation_difficulty combines hardness with particle size (p80_mm) to indicate processing effort, density_grade_product multiplies specific gravity by Mn grade to capture separation behavior, moisture_adjusted_grade corrects Mn grade for moisture content, and enrichment_potential estimates the potential to upgrade the ore toward a theoretical maximum grade.
- To account for ore type effects, categorical encoding is applied with dummy variables (ore_type_oxide, ore_type_carbonate, ore_type_silicate) and a combined ore_processability_score integrates ore type, grade, and hardness into a numerical measure of processing ease.
- Finally, statistical features are calculated to capture relative quality across the dataset: grade_deviation_from_mean measures deviation from average Mn grade, grade_percentile_rank ranks samples by percentile, and binary indicators is_high_grade and is_low_grade flag ores with exceptionally high or low Mn content.
- Collectively, these engineered features provide a rich, multi-faceted representation of ore chemistry, physical properties, and processing behavior, making the dataset much more informative for downstream modeling, optimization, and metallurgical decision-making.

In [19]:

# CATEGORY 2: CRUSHING & SIZE REDUCTION FEATURES
def engineer_crushing_features(crushing_df):
    """Generate crushing circuit features (Features 22-38)"""
    print("\nEngineering Crushing Circuit Features...")

    crushing_data = crushing_df.copy()

    # Energy features (22-25)
    crushing_data ['energy_per_ton'] = crushing_data ['power_draw_kw'] / (crushing_data ['feed_rate_tph'] + 0.01)
    crushing_data ['specific_energy'] = crushing_data ['energy_per_ton'] / (crushing_data ['ore_hardness_wi'] + 0.01)

    theoretical_energy = crushing_data ['ore_hardness_wi'] * (
        1 / np.sqrt(crushing_data ['product_p80_mm']) - 1 / np.sqrt(50)
    )
    crushing_data ['energy_efficiency_index'] = theoretical_energy / (crushing_data ['energy_per_ton'] + 0.01)

    max_crusher_capacity = 150
    crushing_data ['power_utilization'] = crushing_data ['power_draw_kw'] / (max_crusher_capacity * 5)

    # Size reduction features (26-28)
    crushing_data ['reduction_ratio'] = 50 / (crushing_data ['product_p80_mm'] + 0.01)
    crushing_data ['size_reduction_efficiency'] = crushing_data ['reduction_ratio'] / (crushing_data ['energy_per_ton'] + 0.01)
    crushing_data ['crushing_effectiveness'] = (50 - crushing_data ['product_p80_mm']) / (crushing_data ['power_draw_kw'] + 0.01)

    # Equipment condition features (29-32)
    crushing_data ['liner_wear_impact'] = (100 - crushing_data ['liner_wear_pct']) / 100
    baseline_vibration = 2.0
    crushing_data ['vibration_normalized'] = crushing_data ['vibration_rms_mm_s'] / baseline_vibration

    crushing_data ['vibration_severity_category'] = pd.cut(crushing_data ['vibration_rms_mm_s'],
                                               bins=[0, 3, 5, 8, 20],
                                               labels=['low', 'medium', 'high', 'critical'])

    crushing_data ['wear_rate_per_hour'] = crushing_data ['liner_wear_pct'] / (crushing_data .index + 1)

    # Operational features (33-36)
    max_gap = 25
    crushing_data ['gap_utilization'] = crushing_data ['crusher_gap_mm'] / max_gap
    crushing_data ['throughput_efficiency'] = crushing_data ['feed_rate_tph'] / max_crusher_capacity
    crushing_data ['ore_hardness_interaction'] = crushing_data ['feed_rate_tph'] * crushing_data ['ore_hardness_wi']
    crushing_data ['moisture_impact_factor'] = 1 - (crushing_data ['feed_moisture_pct'] / 20)

    # Time-based features (37-38)
    crushing_data ['hours_since_maintenance'] = (100 - crushing_data ['liner_wear_pct']) * 5
    crushing_data ['is_end_of_liner_life'] = (crushing_data ['liner_wear_pct'] < 30).astype(int)

    print(f"  Generated {len([c for c in crushing_data.columns if c not in crushing_df.columns])} crushing features")
    return crushing_data

engineered_datasets['engineered_crushing'] = engineer_crushing_features(datasets['crushing'])



Engineering Crushing Circuit Features...
  Generated 17 crushing features


##### CRUSHING AND SIZE REDUCTION FEATURES
- The engineer_crushing_features function systematically generates 17 new features (Features 22–38) designed to quantify the performance, efficiency, and operational health of the crushing circuit.
- It starts by calculating energy-related features such as energy_per_ton, which measures the power consumption per ton of ore to highlight energy intensity, and specific_energy, which normalizes this energy by ore hardness to reflect the relative difficulty of crushing harder ores. The energy_efficiency_index compares actual energy usage to the theoretically required energy, providing a measure of operational efficiency, while power_utilization quantifies the fraction of maximum crusher capacity being used. Next, size reduction features like reduction_ratio capture the extent of size reduction, size_reduction_efficiency measures how effectively energy is converted into particle size reduction, and crushing_effectiveness assesses reduction achieved per kilowatt, reflecting performance optimization.
- To monitor equipment health, features such as liner_wear_impact indicate remaining liner effectiveness, vibration_normalized scales observed vibrations relative to a baseline to detect anomalies, vibration_severity_category classifies vibration levels into low, medium, high, and critical bands, and wear_rate_per_hour tracks the rate of liner degradation over time.
- Operational features including gap_utilization and throughput_efficiency evaluate how effectively the crusher gap and feed rate are being used, while ore_hardness_interaction combines ore hardness and feed rate to estimate mechanical stress, and moisture_impact_factor adjusts performance expectations based on feed moisture content.
- Finally, time-based features such as hours_since_maintenance estimate operational hours remaining before maintenance, and is_end_of_liner_life flags liners approaching end-of-life, helping to schedule preventive maintenance and avoid unplanned downtime. Collectively, these features provide a multidimensional view of crushing circuit efficiency, energy consumption, equipment health, and operational performance, enabling better process monitoring, optimization, and predictive modeling.

- List of engineered crushing features: energy_per_ton, specific_energy, energy_efficiency_index, power_utilization, reduction_ratio, size_reduction_efficiency, crushing_effectiveness, liner_wear_impact, vibration_normalized, vibration_severity_category, wear_rate_per_hour, gap_utilization, throughput_efficiency, ore_hardness_interaction, moisture_impact_factor, hours_since_maintenance, is_end_of_liner_life.

In [22]:
# CATEGORY 3: SEPARATION CIRCUIT FEATURES
def engineer_separation_features(separation_df):
    """Generate separation circuit features (Features 39-56)"""
    print("\nEngineering Separation Circuit Features...")

    separation_data = separation_df.copy()

    # Recovery features (39-42)
    theoretical_max = 0.95
    separation_data['recovery_efficiency_ratio'] = separation_data['spiral_recovery'] / theoretical_max
    separation_data['grade_recovery_product'] = separation_data['spiral_concentrate_grade_pct'] * separation_data['spiral_recovery']

    separation_data['separation_sharpness'] = (
            (separation_data['spiral_concentrate_grade_pct'] - separation_data['feed_grade_pct']) /
            (separation_data['spiral_concentrate_grade_pct'] - separation_data['spiral_tailings_grade_pct'] + 0.01)
    )

    separation_data['upgrade_ratio'] = separation_data['spiral_concentrate_grade_pct'] / (separation_data['feed_grade_pct'] + 0.01)

    # Operational features (43-46)
    optimal_spiral_speed = 200
    separation_data['spiral_speed_deviation'] = separation_data['spiral_speed_rpm'] - optimal_spiral_speed
    separation_data['spiral_speed_squared'] = separation_data['spiral_speed_rpm'] ** 2

    separation_data['water_to_solids_ratio'] = separation_data['wash_water_m3h'] / (separation_data['feed_density_pct_solids'] / 100 + 0.01)
    separation_data['dilution_factor'] = 100 / (separation_data['feed_density_pct_solids'] + 0.01)

    # Performance features (47-49)
    separation_data['separation_selectivity'] = (
            (separation_data['spiral_concentrate_grade_pct'] / (separation_data['feed_grade_pct'] + 0.01)) /
            (separation_data['spiral_recovery'] + 0.01)
    )

    separation_data['manganese_loss_to_tailings'] = separation_data['spiral_tailings_grade_pct'] * (1 - separation_data['spiral_recovery'])
    separation_data['enrichment_index'] = (
            (separation_data['spiral_concentrate_grade_pct'] - separation_data['spiral_tailings_grade_pct']) /
            (separation_data['feed_grade_pct'] + 0.01)
    )

    # Magnetic separation features (50-53)
    baseline_intensity = 0.8
    separation_data['magnetic_intensity_effect'] = separation_data['magnetic_intensity_t'] - baseline_intensity

    optimal_belt_speed = 1.0
    separation_data['belt_speed_optimal_deviation'] = np.abs(separation_data['belt_speed_ms'] - optimal_belt_speed)

    if 'ore_type' in separation_data.columns:
        ore_magnetic_map = {'oxide': 0.75, 'carbonate': 0.85, 'silicate': 0.90}
        separation_data['magnetic_efficiency_by_ore'] = separation_data['ore_type'].map(ore_magnetic_map)
        separation_data['magnetic_susceptibility_proxy'] = separation_data['magnetic_efficiency_by_ore'] * separation_data['feed_grade_pct']

    # Combined performance (54-56)
    separation_data['overall_enrichment'] = separation_data['final_concentrate_grade_pct'] / (separation_data['feed_grade_pct'] + 0.01)
    separation_data['two_stage_recovery'] = separation_data['spiral_recovery'] * separation_data['overall_recovery']

    separation_data['spiral_efficiency'] = separation_data['spiral_recovery']
    mag_efficiency = separation_data['overall_recovery'] / (separation_data['spiral_recovery'] + 0.01)
    separation_data['combined_efficiency'] = (separation_data['spiral_efficiency'] + mag_efficiency) / 2

    print(f"  Generated {len([c for c in separation_data.columns if c not in separation_df.columns])} separation features")
    return separation_data


engineered_datasets['engineered_separation'] = engineer_separation_features(datasets['separation'])


Engineering Separation Circuit Features...
  Generated 19 separation features


##### ENGINEERED SEPARATION FEATURES
- The engineer_separation_features function is designed to extract 18 new features (Features 39–56) that describe the recovery performance, operational conditions, and magnetic separation efficiency of the spiral and magnetic separation circuits. The goal is to quantify how effectively ore is separated into concentrate and tailings while considering equipment settings, ore characteristics, and process limitations.
- Recovery features include recovery_efficiency_ratio, which compares actual spiral recovery to a theoretical maximum to indicate process efficiency; grade_recovery_product, which combines grade and recovery to measure overall recovery value; separation_sharpness, which quantifies how distinctly valuable minerals are separated from gangue; and upgrade_ratio, representing grade improvement from feed to concentrate.
- Operational features monitor process settings, such as spiral_speed_deviation and spiral_speed_squared, which capture deviations from optimal speed and non-linear effects of speed; water_to_solids_ratio and dilution_factor measure slurry consistency, impacting separation performance. Performance metrics include separation_selectivity, which normalizes enrichment relative to recovery; manganese_loss_to_tailings, indicating ore lost in tailings; and enrichment_index, measuring grade improvement relative to feed.
- Magnetic separation features such as magnetic_intensity_effect, belt_speed_optimal_deviation, magnetic_efficiency_by_ore, and magnetic_susceptibility_proxy account for magnetic field strength, belt speed, and ore-type sensitivity, improving predictions of magnetic separation effectiveness.
- Finally, combined performance metrics like overall_enrichment, two_stage_recovery, spiral_efficiency, and combined_efficiency integrate spiral and magnetic separation results into holistic indicators of circuit efficiency. These engineered features enable deeper analysis of recovery optimization, energy usage, and ore-specific processing behavior, providing a robust dataset for monitoring, modeling, and predictive analytics.

- List of engineered separation features: recovery_efficiency_ratio, grade_recovery_product, separation_sharpness, upgrade_ratio, spiral_speed_deviation, spiral_speed_squared, water_to_solids_ratio, dilution_factor, separation_selectivity, manganese_loss_to_tailings, enrichment_index, magnetic_intensity_effect, belt_speed_optimal_deviation, magnetic_efficiency_by_ore, magnetic_susceptibility_proxy, overall_enrichment, two_stage_recovery, spiral_efficiency, combined_efficiency.

In [24]:

# CATEGORY 4: FLOTATION CIRCUIT FEATURES
def engineer_flotation_features(flotation_df):
    """Generate flotation circuit features (Features 57-79)"""
    print("\nEngineering Flotation Circuit Features...")

    floatation_data = flotation_df.copy()

    # Reagent optimization - Dosage features (57-61)
    floatation_data['collector_intensity'] = floatation_data['collector_dosage_gt'] / (floatation_data['feed_grade_pct'] + 0.01)
    floatation_data['frother_intensity'] = floatation_data['frother_dosage_gt'] / (floatation_data['pulp_density_pct_solids'] + 0.01)

    floatation_data['reagent_cost_per_ton'] = (
                                                      floatation_data['collector_dosage_gt'] * 0.5 + floatation_data['frother_dosage_gt'] * 0.8
    ) / 1000

    floatation_data['collector_to_frother_ratio'] = floatation_data['collector_dosage_gt'] / (floatation_data['frother_dosage_gt'] + 0.01)
    floatation_data['reagent_efficiency'] = floatation_data['flotation_recovery'] / (
            floatation_data['collector_dosage_gt'] + floatation_data['frother_dosage_gt'] + 0.01
    )

    # Dosing accuracy features (62-64)
    if 'actual_collector_consumed_gt' in floatation_data.columns:
        floatation_data['dosing_error'] = floatation_data['actual_collector_consumed_gt'] - floatation_data['collector_dosage_gt']
        floatation_data['reagent_wastage'] = floatation_data['dosing_error'] / (floatation_data['collector_dosage_gt'] + 0.01)

        if 'blower_health_score' in floatation_data.columns:
            floatation_data['pump_health_impact_on_dosing'] = floatation_data['dosing_error'] * (1 - floatation_data['blower_health_score'] / 100)

    # pH features (65-68)
    optimal_ph = 9.25
    floatation_data['ph_deviation_from_optimal'] = np.abs(floatation_data['ph_value'] - optimal_ph)
    floatation_data['ph_in_optimal_range'] = ((floatation_data['ph_value'] >= 9.0) & (floatation_data['ph_value'] <= 9.5)).astype(int)
    floatation_data['ph_recovery_interaction'] = floatation_data['ph_value'] * floatation_data['flotation_recovery']
    floatation_data['ph_squared'] = floatation_data['ph_value'] ** 2

    # Process features (69-72)
    floatation_data['air_to_solids_ratio'] = floatation_data['air_flow_m3_min'] / (floatation_data['pulp_density_pct_solids'] + 0.01)
    floatation_data['residence_time_per_grade'] = floatation_data['residence_time_min'] / (floatation_data['feed_grade_pct'] + 0.01)
    floatation_data['flotation_kinetics_factor'] = floatation_data['residence_time_min'] * floatation_data['air_flow_m3_min']
    floatation_data['froth_loading'] = floatation_data['concentrate_grade_pct'] / (floatation_data['froth_stability_index'] + 0.01)

    # Equipment-linked performance (73-76)
    if 'cell_health_score' in floatation_data.columns:
        floatation_data['cell_health_recovery_product'] = floatation_data['cell_health_score'] * floatation_data['flotation_recovery']
        floatation_data['equipment_degradation_impact'] = (100 - floatation_data['cell_health_score']) * floatation_data.get('actual_collector_consumed_gt', floatation_data['collector_dosage_gt']) / 100

    if 'blower_health_score' in floatation_data.columns:
        floatation_data['blower_efficiency_factor'] = floatation_data['blower_health_score'] / 100

    floatation_data['agitator_mixing_efficiency'] = floatation_data.get('cell_health_score', 80) * floatation_data['flotation_recovery'] / 100

    # Ore type interactions (77-79)
    if 'ore_type' in floatation_data.columns:
        ore_flotation_map = {'oxide': 0.65, 'carbonate': 0.78, 'silicate': 0.85}
        floatation_data['flotation_ore_suitability'] = floatation_data['ore_type'].map(ore_flotation_map)

        floatation_data['carbonate_flotation_bonus'] = (
                (floatation_data['ore_type'] == 'carbonate').astype(int) * floatation_data['flotation_recovery'] * 0.1
        )

        floatation_data['oxide_flotation_penalty'] = (
                (floatation_data['ore_type'] == 'oxide').astype(int) * floatation_data['flotation_recovery'] * 0.15
        )

    print(f"  Generated {len([c for c in floatation_data.columns if c not in flotation_df.columns])} flotation features")
    return floatation_data

engineered_datasets['engineered_flotation'] = engineer_flotation_features(datasets['flotation'])



Engineering Flotation Circuit Features...
  Generated 23 flotation features


In [25]:

# CATEGORY 5: DMS FEATURES

def engineer_dms_features(dms_df):
    """Generate DMS circuit features (Features 80-92)"""
    print("\nEngineering DMS Circuit Features...")

    dms_data = dms_df.copy()

    # Media properties (80-82)
    dms_data['density_differential'] = np.abs(dms_data['ore_density_sg'] - dms_data['media_density_sg'])
    dms_data['separation_sharpness_dms'] = dms_data['density_differential'] / 0.5
    dms_data['media_efficiency'] = dms_data['media_recovery_pct'] / 100

    # Cyclone performance (83-86)
    if 'cyclone_health_score' in dms_data.columns:
        dms_data['cyclone_health_efficiency_product'] = dms_data['cyclone_health_score'] * dms_data['separation_efficiency']

    if 'cyclone_wear_rate_pct' in dms_data.columns:
        dms_data['wear_impact_on_separation'] = dms_data['separation_efficiency'] * (100 - dms_data['cyclone_wear_rate_pct']) / 100

    max_pressure = 120
    dms_data['pressure_utilization'] = dms_data['cyclone_pressure_kpa'] / max_pressure
    dms_data['media_consumption_efficiency'] = dms_data['dms_recovery'] / (dms_data['media_consumption_kg_t'] + 0.01)

    # Size effects (87-89)
    dms_data['size_suitability_for_dms'] = (dms_data['feed_size_mm'] >= 10).astype(int)
    optimal_size = 25
    dms_data['coarse_fraction_ratio'] = dms_data['feed_size_mm'] / optimal_size
    dms_data['size_density_interaction'] = dms_data['feed_size_mm'] * dms_data['density_differential']

    # Product quality (90-92)
    dms_data['sink_float_separation'] = dms_data['sink_grade_pct'] / (dms_data['float_grade_pct'] + 0.01)
    dms_data['yield_recovery_product'] = (dms_data['sink_yield_pct'] / 100) * dms_data['dms_recovery']
    dms_data['dms_upgrade_factor'] = dms_data['sink_grade_pct'] / (dms_data['feed_grade_pct'] + 0.01)

    print(f"  Generated {len([c for c in dms_data.columns if c not in dms_df.columns])} DMS features")
    return dms_data


engineered_datasets['engineered_dms'] = engineer_dms_features(datasets['dms'])


Engineering DMS Circuit Features...
  Generated 13 DMS features


In [23]:
#SAVING ENGINEERED DATASETS TO ENGINEERED_DATA DIRECTORY INSIDE THE DATA DIRECTORY.
# Base project directory
base_dir = os.path.dirname(os.getcwd())

# New directory path for engineered datasets (this will create a folder named 'engineered_data' inside 'data/')
engineered_data = os.path.join(base_dir, "data", "engineered_data")

# Create the directory if it doesn't exist
os.makedirs(engineered_data, exist_ok=True)

print(f"Engineered datasets will be saved in: {engineered_data}")

for name, df in engineered_datasets.items():
    save_path = os.path.join(engineered_data, f"{name}.csv")
    df.to_csv(save_path, index=False)
    print(f"Saved {name} to {save_path}")


Engineered datasets will be saved in: /home/darlenewendie/PycharmProjects/Intelligent-Manganese-Processing-Plant-Optimization/data/engineered_data
Saved engineered_ore_feed to /home/darlenewendie/PycharmProjects/Intelligent-Manganese-Processing-Plant-Optimization/data/engineered_data/engineered_ore_feed.csv
Saved engineered_crushing to /home/darlenewendie/PycharmProjects/Intelligent-Manganese-Processing-Plant-Optimization/data/engineered_data/engineered_crushing.csv
Saved engineered_separation to /home/darlenewendie/PycharmProjects/Intelligent-Manganese-Processing-Plant-Optimization/data/engineered_data/engineered_separation.csv
