### Manganese Processing Plant Feature Engineering
#### Advanced feature creation for ML optimization models

#### AUTHOR: DARLENE WENDY NASIMIYU
#### Purpose: Create powerful features for manganese processing optimization

In [2]:
import pandas as pd
import numpy as np
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
import os

In [3]:
#----SETUP: Get absolute path ---
BASE_DIR = os.path.dirname(os.getcwd()) # Current working directory of the notebook
data_dir = os.path.join(BASE_DIR, 'data', 'synthetic')

print("Using data directory:", data_dir)

Using data directory: /home/darlenewendie/PycharmProjects/Intelligent-Manganese-Processing-Plant-Optimization/data/synthetic


In [4]:
# -----DEFINE DATASET FILES-------
dataset_files = {
    'ore_feed': 'manganese_ore_feed.csv',
    'blended_ore': 'manganese_blended_ore_feed.csv',
    'crushing': 'manganese_crushing_circuit.csv',
    'separation': 'manganese_separation_circuit.csv',
    'flotation': 'manganese_flotation_circuit.csv',
    'dms': 'manganese_dms_circuit.csv',
    'jigging':'manganese_jigging_circuit.csv',
    'dewatering': 'manganese_dewatering_circuit.csv',
    'equipment': 'manganese_equipment_health.csv',
    'energy': 'manganese_energy_consumption.csv',
}
# Load all datasets
datasets = {}
for name, filename in dataset_files.items():
    filepath  = os.path.join(data_dir, filename)
    try:
        df = pd.read_csv(filepath, parse_dates=['timestamp'])
        datasets[name] = df
        print(f" Loaded {name}: {len(df):,} records, {len(df.columns)} columns")
    except FileNotFoundError:
        print(f"Could not find {filepath}")
    except Exception as e:
        print(f"Error loading {name}: {str(e)}")

print(f"\nTotal datasets loaded: {len(datasets)}")
print(f"Total records: {sum(len(df) for df in datasets.values()):,}")

 Loaded ore_feed: 10,000 records, 11 columns
 Loaded blended_ore: 6,522 records, 11 columns
 Loaded crushing: 15,000 records, 9 columns
 Loaded separation: 12,000 records, 13 columns
 Loaded flotation: 12,000 records, 22 columns
 Loaded dms: 8,000 records, 16 columns
 Loaded jigging: 10,000 records, 16 columns
 Loaded dewatering: 8,000 records, 18 columns
 Loaded equipment: 8,000 records, 12 columns
 Loaded energy: 10,000 records, 30 columns

Total datasets loaded: 10
Total records: 99,522


In [6]:
#INITIALIZING AN EMPTY DICTIONARY FOR ENGINEERED DATASETS
engineered_datasets = {}


In [7]:
# CATEGORY 1: ORE CHARACTERISTICS FEATURES
def engineer_ore_features(ore_df):
    """Generate ore characteristics features (Features 1-18)"""
    print("\nEngineering Ore Characteristics Features...")

    ore_data = ore_df.copy()

    # Basic transformations (1-7)
    ore_data['mn_grade_squared'] = ore_data['mn_grade_pct'] ** 2
    ore_data['mn_grade_log'] = np.log1p(ore_data['mn_grade_pct'])

    ore_data['gangue_total'] = (ore_data['fe_content_pct'] + ore_data['siO2_content_pct'] +
                         ore_data['al2O3_content_pct'] + ore_data['p_content_pct'])
    ore_data['ore_quality_index'] = ore_data['mn_grade_pct'] / (ore_data['gangue_total'] + 0.01)
    ore_data['mn_to_fe_ratio'] = ore_data['mn_grade_pct'] / (ore_data['fe_content_pct'] + 0.01)
    ore_data['mn_to_silica_ratio'] = ore_data['mn_grade_pct'] / (ore_data['siO2_content_pct'] + 0.01)
    ore_data['valuable_mineral_ratio'] = ore_data['mn_grade_pct'] / (ore_data['mn_grade_pct'] + ore_data['gangue_total'])

    # Derived features (8-12)
    ore_data['ore_hardness_category'] = pd.cut(ore_data['work_index_kwh_t'],
                                         bins=[0, 12, 15, 18, 25],
                                         labels=['soft', 'medium', 'hard', 'very_hard'])

    ore_data['liberation_difficulty'] = ore_data['work_index_kwh_t'] * ore_data['p80_mm']
    ore_data['density_grade_product'] = ore_data['specific_gravity'] * ore_data['mn_grade_pct']
    ore_data['moisture_adjusted_grade'] = ore_data['mn_grade_pct'] * (100 - ore_data['moisture_pct']) / 100

    max_possible_grade = 52.0
    ore_data['enrichment_potential'] = (max_possible_grade - ore_data['mn_grade_pct']) / ore_data['mn_grade_pct']

    # Ore type encoding (13-14)
    ore_type_dummies = pd.get_dummies(ore_data['ore_type'], prefix='ore_type')
    ore_data = pd.concat([ore_data, ore_type_dummies], axis=1)

    processability_map = {'oxide': 0.7, 'carbonate': 0.85, 'silicate': 0.9}
    ore_data['ore_processability_score'] = ore_data['ore_type'].map(processability_map)
    ore_data['ore_processability_score'] *= (ore_data['mn_grade_pct'] / 50) * (1 / (ore_data['work_index_kwh_t'] / 15))

    # Statistical features (15-18)
    mean_grade = ore_data['mn_grade_pct'].mean()
    ore_data['grade_deviation_from_mean'] = ore_data['mn_grade_pct'] - mean_grade
    ore_data['grade_percentile_rank'] = ore_data['mn_grade_pct'].rank(pct=True)
    ore_data['is_high_grade'] = (ore_data['mn_grade_pct'] > 60).astype(int)
    ore_data['is_low_grade'] = (ore_data['mn_grade_pct'] < 45).astype(int)

    print(f"  Generated {len([c for c in ore_data.columns if c not in ore_df.columns])} ore features")
    return ore_data


engineered_datasets['ore_feed_engineered'] = engineer_ore_features(datasets['ore_feed'])


Engineering Ore Characteristics Features...
  Generated 20 ore features


In [8]:
#SAVING ENGINEERED DATASETS TO ENGINEERED_DATA DIRECTORY INSIDE THE DATA DIRECTORY.
# Base project directory
base_dir = os.path.dirname(os.getcwd())

# New directory path for engineered datasets (this will create a folder named 'engineered_data' inside 'data/')
engineered_data = os.path.join(base_dir, "data", "engineered_data")

# Create the directory if it doesn't exist
os.makedirs(engineered_data, exist_ok=True)

print(f"Engineered datasets will be saved in: {engineered_data}")

for name, df in engineered_datasets.items():
    save_path = os.path.join(engineered_data, f"{name}.csv")
    df.to_csv(save_path, index=False)
    print(f"Saved {name} to {save_path}")


Engineered datasets will be saved in: /home/darlenewendie/PycharmProjects/Intelligent-Manganese-Processing-Plant-Optimization/data/engineered_data
Saved ore_feed_engineered to /home/darlenewendie/PycharmProjects/Intelligent-Manganese-Processing-Plant-Optimization/data/engineered_data/ore_feed_engineered.csv
