In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix, roc_curve
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('/content/MiningProcess_Flotation_Plant_Database.csv')

# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Set date as the index
df.set_index('date', inplace=True)

# Convert object columns to numeric
df['% Iron Concentrate'] = pd.to_numeric(df['% Iron Concentrate'], errors='coerce')
df['% Silica Concentrate'] = pd.to_numeric(df['% Silica Concentrate'], errors='coerce')

# Fill missing values
df.fillna(method='ffill', inplace=True)

# Extract datetime features
df['hour'] = df.index.hour
df['day'] = df.index.day
df['dayofweek'] = df.index.dayofweek
df['month'] = df.index.month
df['year'] = df.index.year

# Create lag features
for lag in range(1, 11):
    df[f'% Silica Concentrate_lag_{lag}'] = df['% Silica Concentrate'].shift(lag)

# Drop rows with NaN values after lag feature creation
df.dropna(inplace=True)


In [6]:
# Define the feature columns and target column
feature_cols = ['% Iron Feed', '% Silica Feed', 'Starch Flow', 'Amina Flow',
                'Ore Pulp Flow', 'Ore Pulp pH', 'Ore Pulp Density',
                'Flotation Column 01 Air Flow', 'Flotation Column 02 Air Flow',
                'Flotation Column 03 Air Flow', 'Flotation Column 04 Air Flow',
                'Flotation Column 05 Air Flow', 'Flotation Column 06 Air Flow',
                'Flotation Column 07 Air Flow', 'Flotation Column 01 Level',
                'Flotation Column 02 Level', 'Flotation Column 03 Level',
                'Flotation Column 04 Level', 'Flotation Column 05 Level',
                'Flotation Column 06 Level', 'Flotation Column 07 Level',
                'hour', 'day', 'dayofweek', 'month', 'year'] + \
               [f'% Silica Concentrate_lag_{lag}' for lag in range(1, 11)]

target_col = '% Silica Concentrate'

# Split the data into train and test sets
X = df[feature_cols]
y = df[target_col]

# Remove commas from the affected columns
for col in df.columns:
    if df[col].dtype == 'object':
        if df[col].str.contains(',').any():
            df[col] = df[col].str.replace(',', '')

# Convert the affected columns to float
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = pd.to_numeric(df[col], errors='coerce')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
# Train a Random Forest model
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred_rf = model_rf.predict(X_test_scaled)

In [9]:
from sklearn.metrics import precision_score

# Calculate precision score using 'micro' average
precision_rf_micro = precision_score(y_test, np.round(y_pred_rf), average='micro')
print(f'Random Forest Precision (micro): {precision_rf_micro}')

# Calculate precision score using 'macro' average
precision_rf_macro = precision_score(y_test, np.round(y_pred_rf), average='macro')
print(f'Random Forest Precision (macro): {precision_rf_macro}')

# Calculate precision score using 'weighted' average
precision_rf_weighted = precision_score(y_test, np.round(y_pred_rf), average='weighted')
print(f'Random Forest Precision (weighted): {precision_rf_weighted}')

# Calculate precision score without averaging
precision_rf_none = precision_score(y_test, np.round(y_pred_rf), average=None)
print(f'Random Forest Precision (none): {precision_rf_none}')

Random Forest Precision (micro): 0.9999581344720757
Random Forest Precision (macro): 0.9997326173275318
Random Forest Precision (weighted): 0.9999581356842394
Random Forest Precision (none): [1.         0.9999485  0.99996772 0.99874687 1.        ]


In [15]:
from sklearn.metrics import mean_squared_error, r2_score

# Calculate mean squared error
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f'Random Forest Mean Squared Error: {mse_rf}')

# Calculate R-squared score
r2_rf = r2_score(y_test, y_pred_rf)
print(f'Random Forest R-squared Score: {r2_rf}')

Random Forest Mean Squared Error: 3.934871193725753e-05
Random Forest R-squared Score: 0.9999348399147597


In [19]:
from sklearn.ensemble import RandomForestClassifier

# Train the Random Forest Classifier
model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)


In [23]:

# Mean Squared Error
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f'Random Forest Mean Squared Error: {mse_rf}')

# Accuracy
accuracy_rf = accuracy_score(y_test, np.round(y_pred_rf))
print(f'Random Forest Accuracy: {accuracy_rf}')


# Confusion Matrix
conf_matrix_rf = confusion_matrix(y_test, np.round(y_pred_rf))
print(f'Random Forest Confusion Matrix:\n{conf_matrix_rf}')

# Convert the target column to binary
df['% Silica Concentrate'] = (df['% Silica Concentrate'] > 50).astype(int)

# Split the data into train and test sets
X = df[feature_cols]
y = df['% Silica Concentrate']


# Predict on the test set
y_pred_rf = model_rf.predict(X_test)


from sklearn.metrics import classification_report

y_pred_rf = model_rf.predict(X_test)
report = classification_report(y_test, y_pred_rf)
print(report)



Random Forest Mean Squared Error: 2.7910351949538085e-05
Random Forest Accuracy: 0.9999720896480505
Random Forest Confusion Matrix:
[[31500     2     0     0     0]
 [    0 77665     0     0     0]
 [    0     2 30978     0     0]
 [    0     0     0   798     0]
 [    0     0     0     0  2371]]
              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00     31502
         2.0       1.00      1.00      1.00     77665
         3.0       1.00      1.00      1.00     30980
         4.0       1.00      1.00      1.00       798
         5.0       1.00      1.00      1.00      2371

    accuracy                           1.00    143316
   macro avg       1.00      1.00      1.00    143316
weighted avg       1.00      1.00      1.00    143316

