FULL DATASET GENERATION

In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)

n = 15000

# Generate inputs within industrial ranges
Fe = np.random.uniform(55, 68, n)
Coke = np.random.uniform(300, 600, n)
PCI = np.random.uniform(100, 250, n)
HBT = np.random.uniform(900, 1200, n)
Pressure = np.random.uniform(2, 4, n)
Moisture = np.random.uniform(2, 8, n)
Basicity = np.random.uniform(1.0, 1.4, n)
O2 = np.random.uniform(21, 28, n)
CO = np.random.uniform(18, 28, n)
CO2 = np.random.uniform(18, 25, n)

# Industrial noise
noise = np.random.normal(0, 10, n)

# Physics-informed temperature model
Temp = (
    1450
    + 2.5*(Fe - 60)
    + 0.12*(Coke - 450)
    + 0.08*(PCI - 180)
    + 0.15*(HBT - 1050)
    + 18*(O2 - 23)
    + 4*(CO - 22)
    - 6*(Moisture - 4)
    - 5*(CO2 - 21)
    + 25*(Basicity - 1.2)
    + noise
)

# Instability logic
Instability = (
    (Moisture > 7) |
    (O2 < 22) |
    (Coke < 350) |
    (CO2 > 24) |
    (Temp < 1420) |
    (Temp > 1520)
).astype(int)

# Create DataFrame
df = pd.DataFrame({
    "Fe_percent": Fe,
    "Coke_rate": Coke,
    "PCI_rate": PCI,
    "Hot_blast_temp": HBT,
    "Blast_pressure": Pressure,
    "Moisture_percent": Moisture,
    "Basicity": Basicity,
    "Oxygen_percent": O2,
    "CO_percent": CO,
    "CO2_percent": CO2,
    "Hot_metal_temp": Temp,
    "Instability_flag": Instability
})

# Save dataset
df.to_csv("blast_furnace_industrial_dataset.csv", index=False)

print("Dataset generated successfully!")
print(df.head())

Dataset generated successfully!
   Fe_percent   Coke_rate    PCI_rate  Hot_blast_temp  Blast_pressure  \
0   59.869022  449.901073  195.721685      998.979722        3.483110   
1   67.359286  524.024032  168.893868     1145.021526        3.762204   
2   64.515921  468.800034  244.674779     1198.313045        2.926360   
3   62.782560  324.990775  132.846768     1152.228461        2.578357   
4   57.028242  355.674071  188.178462     1003.842878        2.637693   

   Moisture_percent  Basicity  Oxygen_percent  CO_percent  CO2_percent  \
0          2.011306  1.350864       27.033034   19.297540    23.535011   
1          7.759906  1.362869       26.525454   23.397259    18.889375   
2          5.211900  1.151349       22.967298   24.153863    20.578568   
3          6.202403  1.092652       23.938024   23.077053    21.109077   
4          7.475583  1.076664       24.553806   23.175972    21.878470   

   Hot_metal_temp  Instability_flag  
0     1509.483536                 0  
1     15

Uploading Dataset into Pandas DataFrame

In [2]:
df = pd.read_csv("blast_furnace_industrial_dataset.csv")
df.head()

Unnamed: 0,Fe_percent,Coke_rate,PCI_rate,Hot_blast_temp,Blast_pressure,Moisture_percent,Basicity,Oxygen_percent,CO_percent,CO2_percent,Hot_metal_temp,Instability_flag
0,59.869022,449.901073,195.721685,998.979722,3.48311,2.011306,1.350864,27.033034,19.29754,23.535011,1509.483536,0
1,67.359286,524.024032,168.893868,1145.021526,3.762204,7.759906,1.362869,26.525454,23.397259,18.889375,1544.697202,1
2,64.515921,468.800034,244.674779,1198.313045,2.92636,5.2119,1.151349,22.967298,24.153863,20.578568,1498.906033,0
3,62.78256,324.990775,132.846768,1152.228461,2.578357,6.202403,1.092652,23.938024,23.077053,21.109077,1465.377855,1
4,57.028242,355.674071,188.178462,1003.842878,2.637693,7.475583,1.076664,24.553806,23.175972,21.87847,1399.460359,1


In [3]:
df.shape

(15000, 12)

In [4]:
df.columns

Index(['Fe_percent', 'Coke_rate', 'PCI_rate', 'Hot_blast_temp',
       'Blast_pressure', 'Moisture_percent', 'Basicity', 'Oxygen_percent',
       'CO_percent', 'CO2_percent', 'Hot_metal_temp', 'Instability_flag'],
      dtype='object')

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15000 entries, 0 to 14999
Data columns (total 12 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Fe_percent        15000 non-null  float64
 1   Coke_rate         15000 non-null  float64
 2   PCI_rate          15000 non-null  float64
 3   Hot_blast_temp    15000 non-null  float64
 4   Blast_pressure    15000 non-null  float64
 5   Moisture_percent  15000 non-null  float64
 6   Basicity          15000 non-null  float64
 7   Oxygen_percent    15000 non-null  float64
 8   CO_percent        15000 non-null  float64
 9   CO2_percent       15000 non-null  float64
 10  Hot_metal_temp    15000 non-null  float64
 11  Instability_flag  15000 non-null  int64  
dtypes: float64(11), int64(1)
memory usage: 1.4 MB


# Model Building Work Start

STEP 1 â€” IMPORT LIBRARIES

In [7]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
import joblib

STEP 2 â€” DEFINE FEATURES & TARGET

In [8]:
X = df.drop(columns=["Hot_metal_temp", "Instability_flag"])
y = df["Hot_metal_temp"]

STEP 3 â€” TRAIN TEST SPLIT

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

STEP 4 â€” SCALING (Important for Production)

In [10]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

STEP 5 â€” MODEL SELECTION (Random Forest + GridSearch)

In [11]:
rf = RandomForestRegressor(random_state=42)

param_grid = {
    "n_estimators": [200, 300],
    "max_depth": [10, 15, None],
    "min_samples_split": [2, 5],
    "min_samples_leaf": [1, 2]
}

grid = GridSearchCV(
    rf,
    param_grid,
    cv=3,
    scoring="r2",
    n_jobs=-1
)

grid.fit(X_train_scaled, y_train)

best_model = grid.best_estimator_

STEP 6 â€” MODEL EVALUATION

In [12]:
y_pred = best_model.predict(X_test_scaled)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("Best Parameters:", grid.best_params_)
print("R2 Score:", r2)
print("MAE:", mae)
print("RMSE:", rmse)

Best Parameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 300}
R2 Score: 0.9195725959111718
MAE: 10.487395907038161
RMSE: 13.08537617387084


STEP 7 â€” SAVE MODEL + SCALER (.pkl)

In [13]:
joblib.dump(best_model, "blast_furnace_model.pkl")
joblib.dump(scaler, "blast_furnace_scaler.pkl")

print("Model and Scaler saved successfully!")

Model and Scaler saved successfully!


STEP 8 â€” CREATE INDUSTRIAL OUTPUT LOGIC

In [14]:
def calculate_efficiency(temp):
    score = 100 - abs(temp - 1475) * 0.2
    return max(0, min(100, score))

Risk Indicator Function

In [15]:
def risk_indicator(temp):
    if 1450 <= temp <= 1500:
        return "ðŸŸ¢ Stable"
    elif (1420 <= temp < 1450) or (1500 < temp <= 1520):
        return "ðŸŸ¡ Warning"
    else:
        return "ðŸ”´ Critical"

STEP 9 â€” FULL TESTING SCRIPT (Simulated User Input)

In [16]:
# Load model and scaler
model = joblib.load("blast_furnace_model.pkl")
scaler = joblib.load("blast_furnace_scaler.pkl")

# Example new furnace state (user input)
new_input = pd.DataFrame([{
    "Fe_percent": 62,
    "Coke_rate": 480,
    "PCI_rate": 190,
    "Hot_blast_temp": 1080,
    "Blast_pressure": 3.2,
    "Moisture_percent": 4.5,
    "Basicity": 1.25,
    "Oxygen_percent": 24,
    "CO_percent": 23,
    "CO2_percent": 20
}])

# Scale input
new_input_scaled = scaler.transform(new_input)

# Predict temperature
predicted_temp = model.predict(new_input_scaled)[0]

# Calculate efficiency
efficiency = calculate_efficiency(predicted_temp)

# Risk status
risk = risk_indicator(predicted_temp)

print("Predicted Hot Metal Temperature:", round(predicted_temp, 2))
print("Efficiency Score:", round(efficiency, 2))
print("Risk Indicator:", risk)

Predicted Hot Metal Temperature: 1493.8
Efficiency Score: 96.24
Risk Indicator: ðŸŸ¢ Stable
