## Prerequisites

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
import joblib
import warnings
warnings.filterwarnings('ignore')

# DATA PREPARATION

In [3]:
# Mount Google Drive in Colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Upload vegemite.csv to Colab first
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/vegemite.csv')

print(f"Original dataset shape: {df.shape}")
print(f"Original class distribution:\n{df.iloc[:, -1].value_counts()}")

Original dataset shape: (15237, 47)
Original class distribution:
Class
2    7548
1    5047
0    2642
Name: count, dtype: int64


In [5]:
constant_columns = []
for col in df.columns[:-1]:  # Exclude target column
    if df[col].nunique() <= 1:  # Only one unique value (constant)
        constant_columns.append(col)
        print(f"Constant column found: {col} (unique values: {df[col].nunique()})")

if constant_columns:
    df = df.drop(columns=constant_columns)
    print(f"Removed {len(constant_columns)} constant columns")
else:
    print("No constant value columns found - Good!")

print(f"Dataset shape after constant column removal: {df.shape}")

Constant column found: TFE Steam temperature SP (unique values: 1)
Constant column found: TFE Product out temperature (unique values: 1)
Removed 2 constant columns
Dataset shape after constant column removal: (15237, 45)


In [6]:
# Shuffle and split data
df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)
target_col = df_shuffled.columns[-1]
X = df_shuffled.drop(target_col, axis=1)
y = df_shuffled[target_col]

# Separate a test set with at least 333 samples per class
test_indices = []
for class_label in y.unique():
    class_indices = y[y == class_label].index.tolist()
    # Ensure at least 333 samples if available, otherwise take all
    num_samples = min(334, len(class_indices))
    test_indices.extend(np.random.choice(class_indices, num_samples, replace=False))

X_test = X.loc[test_indices]
y_test = y.loc[test_indices]

# Use the remaining data for training
train_indices = list(set(X.index) - set(test_indices))
X_train_temp = X.loc[train_indices]
y_train_temp = y.loc[train_indices]

print(f"Test samples: {len(X_test)}, Training samples: {len(X_train_temp)}")
print(f"Test class distribution:\n{y_test.value_counts().sort_index()}")

Test samples: 1002, Training samples: 14235
Test class distribution:
Class
0    334
1    334
2    334
Name: count, dtype: int64


Does the dataset have any constant value columns?

In [7]:
categorical_threshold = 10
converted_cols = []

for col in X_train_temp.columns:
    unique_count = X_train_temp[col].nunique()
    if unique_count <= categorical_threshold and unique_count > 1:
        # Check if values are integers
        if X_train_temp[col].dtype in ['int64', 'int32'] or all(X_train_temp[col].dropna().apply(lambda x: float(x).is_integer())):
            converted_cols.append(col)
            X_train_temp[col] = X_train_temp[col].astype('category')
            X_test[col] = X_test[col].astype('category')
            print(f"Converted {col} to categorical (unique values: {unique_count})")

print(f"Total converted to categorical: {len(converted_cols)} columns")

Converted FFTE Feed tank level SP to categorical (unique values: 3)
Converted FFTE Pump 1 to categorical (unique values: 5)
Converted FFTE Pump 1 - 2 to categorical (unique values: 4)
Converted FFTE Pump 2 to categorical (unique values: 5)
Converted TFE Motor speed to categorical (unique values: 3)
Total converted to categorical: 5 columns


Check for missing values and handle them

In [8]:
missing_counts = X_train_temp.isnull().sum()
if missing_counts.sum() > 0:
    numeric_cols = X_train_temp.select_dtypes(include=[np.number]).columns
    categorical_cols = X_train_temp.select_dtypes(include=['category']).columns

    if len(numeric_cols) > 0:
        numeric_imputer = SimpleImputer(strategy='median')
        X_train_temp[numeric_cols] = numeric_imputer.fit_transform(X_train_temp[numeric_cols])
        X_test[numeric_cols] = numeric_imputer.transform(X_test[numeric_cols])

    if len(categorical_cols) > 0:
        categorical_imputer = SimpleImputer(strategy='most_frequent')
        X_train_temp[categorical_cols] = categorical_imputer.fit_transform(X_train_temp[categorical_cols])
        X_test[categorical_cols] = categorical_imputer.transform(X_test[categorical_cols])
    print("Missing values imputed")
else:
    print("No missing values found")


No missing values found


Class balance analysis and correction

In [9]:
class_dist = y_train_temp.value_counts().sort_index()
print(f"\nClass distribution: {dict(class_dist)}")
imbalance_ratio = class_dist.max() / class_dist.min()
print(f"Imbalance ratio: {imbalance_ratio:.2f}")

if imbalance_ratio > 1.5:
    X_train_for_smote = X_train_temp.copy()
    categorical_cols = X_train_temp.select_dtypes(include=['category']).columns
    for col in categorical_cols:
        X_train_for_smote[col] = X_train_for_smote[col].cat.codes

    smote = SMOTE(random_state=42)
    X_train_balanced, y_train_balanced = smote.fit_resample(X_train_for_smote, y_train_temp)
    X_train_balanced = pd.DataFrame(X_train_balanced, columns=X_train_temp.columns)
    print(f"After SMOTE: {dict(pd.Series(y_train_balanced).value_counts().sort_index())}")
else:
    X_train_balanced, y_train_balanced = X_train_temp, y_train_temp


Class distribution: {0: np.int64(2308), 1: np.int64(4713), 2: np.int64(7214)}
Imbalance ratio: 3.13
After SMOTE: {0: np.int64(7214), 1: np.int64(7214), 2: np.int64(7214)}


Feature exploration and composite features

In [10]:
# Convert categorical columns back to numeric for mathematical operations
categorical_cols = X_train_balanced.select_dtypes(include=['category']).columns
if len(categorical_cols) > 0:
    for col in categorical_cols:
        X_train_balanced[col] = X_train_balanced[col].cat.codes

# ALSO convert categorical columns in X_test to numeric
categorical_cols_test = X_test.select_dtypes(include=['category']).columns
if len(categorical_cols_test) > 0:
    for col in categorical_cols_test:
        X_test[col] = X_test[col].cat.codes

numeric_cols = X_train_balanced.select_dtypes(include=[np.number]).columns
composite_features = []

print("Creating composite features based on domain knowledge:")

# Create ratio features between SP and PV columns
sp_cols = [col for col in numeric_cols if 'SP' in col]
pv_cols = [col for col in numeric_cols if 'PV' in col]

print(f"Found {len(sp_cols)} SP columns and {len(pv_cols)} PV columns")

# SP to PV ratios (limited to avoid too many features)
for sp_col in sp_cols[:3]:
    base_name = sp_col.replace(' SP', '')
    pv_col = base_name + ' PV'
    if pv_col in pv_cols:
        ratio_col = f"{base_name}_SP_to_PV_ratio"
        X_train_balanced[ratio_col] = X_train_balanced[sp_col] / (X_train_balanced[pv_col] + 1e-8)
        X_test[ratio_col] = X_test[sp_col] / (X_test[pv_col] + 1e-8)
        composite_features.append(ratio_col)
        print(f"Created: {ratio_col}")

# Temperature difference features
temp_cols = [col for col in numeric_cols if 'Temperature' in col or 'temp' in col.lower()]
if len(temp_cols) >= 2:
    for i in range(min(2, len(temp_cols)-1)):
        diff_col = f"{temp_cols[i]}_minus_{temp_cols[i+1]}_diff"
        X_train_balanced[diff_col] = X_train_balanced[temp_cols[i]] - X_train_balanced[temp_cols[i+1]]
        X_test[diff_col] = X_test[temp_cols[i]] - X_test[temp_cols[i+1]]
        composite_features.append(diff_col)
        print(f"Created: {diff_col}")

# Flow rate efficiency features (if flow columns exist)
flow_cols = [col for col in numeric_cols if 'flow' in col.lower()]
if len(flow_cols) >= 2:
    efficiency_col = f"{flow_cols[0]}_to_{flow_cols[1]}_efficiency"
    X_train_balanced[efficiency_col] = X_train_balanced[flow_cols[0]] / (X_train_balanced[flow_cols[1]] + 1e-8)
    X_test[efficiency_col] = X_test[flow_cols[0]] / (X_test[flow_cols[1]] + 1e-8)
    composite_features.append(efficiency_col)
    print(f"Created: {efficiency_col}")

print(f"Total composite features created: {len(composite_features)}")

Creating composite features based on domain knowledge:
Found 9 SP columns and 11 PV columns
Created: FFTE Feed tank level_SP_to_PV_ratio
Created: FFTE Production solids_SP_to_PV_ratio
Created: FFTE Steam pressure_SP_to_PV_ratio
Created: FFTE Out steam temp SP_minus_FFTE Heat temperature 1_diff
Created: FFTE Heat temperature 1_minus_FFTE Heat temperature 2_diff
Created: TFE Out flow SP_to_FFTE Feed flow SP_efficiency
Total composite features created: 6


Final feature count

In [11]:
print(f"Final training set shape: {X_train_balanced.shape}")
print(f"Final test set shape: {X_test.shape}")
print(f"Total features in final dataset: {len(X_train_balanced.columns)}")

# Show feature breakdown
numeric_final = X_train_balanced.select_dtypes(include=[np.number]).columns
categorical_final = X_train_balanced.select_dtypes(include=['category']).columns
print(f"  - Numeric features: {len(numeric_final)}")
print(f"  - Categorical features: {len(categorical_final)}")
print(f"  - Original features: {len(X_train_balanced.columns) - len(composite_features)}")
print(f"  - Composite features: {len(composite_features)}")

Final training set shape: (21642, 50)
Final test set shape: (1002, 50)
Total features in final dataset: 50
  - Numeric features: 50
  - Categorical features: 0
  - Original features: 44
  - Composite features: 6


## Feature Selection, Model Training and Evaluation

In [12]:
# Ensure all data is numeric for feature selection
X_train_for_selection = X_train_balanced.copy()
X_test_for_selection = X_test.copy()

# Convert categorical columns to numeric codes if any remain
categorical_cols_final = X_train_for_selection.select_dtypes(include=['category']).columns
if len(categorical_cols_final) > 0:
    for col in categorical_cols_final:
        X_train_for_selection[col] = X_train_for_selection[col].cat.codes
        X_test_for_selection[col] = X_test_for_selection[col].cat.codes

# Final check for any remaining missing values
remaining_missing = X_train_for_selection.isnull().sum().sum()
if remaining_missing > 0:
    final_imputer = SimpleImputer(strategy='mean')
    X_train_for_selection = pd.DataFrame(
        final_imputer.fit_transform(X_train_for_selection),
        columns=X_train_for_selection.columns
    )
    X_test_for_selection = pd.DataFrame(
        final_imputer.transform(X_test_for_selection),
        columns=X_test_for_selection.columns
    )
    print("Final imputation completed!")
else:
    print("No missing values found after preprocessing!")

No missing values found after preprocessing!


Feature selection justification

In [13]:
print(f"Current feature count: {len(X_train_for_selection.columns)}")
print("SelectKBest with f_classif will identify most discriminative features")

# Apply feature selection
k_features = min(20, len(X_train_for_selection.columns))
print(f"Selecting top {k_features} features using ANOVA F-test (f_classif)...")

selector = SelectKBest(f_classif, k=k_features)
X_train_selected = selector.fit_transform(X_train_for_selection, y_train_balanced)
X_test_selected = selector.transform(X_test_for_selection)

selected_features = X_train_for_selection.columns[selector.get_support()]
print(f"Selected {len(selected_features)} features using SelectKBest")

# Show selected features with their scores
feature_scores = selector.scores_[selector.get_support()]
feature_ranking = pd.DataFrame({
    'Feature': selected_features,
    'F_Score': feature_scores
}).sort_values('F_Score', ascending=False)

print("\nTop selected features:")
print(feature_ranking.head(10))

# Convert back to DataFrames with proper column names
X_train_final = pd.DataFrame(X_train_selected, columns=selected_features)
X_test_final = pd.DataFrame(X_test_selected, columns=selected_features)

# Split for validation
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_train_final, y_train_balanced, test_size=0.2, stratify=y_train_balanced, random_state=42
)

Current feature count: 50
SelectKBest with f_classif will identify most discriminative features
Selecting top 20 features using ANOVA F-test (f_classif)...
Selected 20 features using SelectKBest

Top selected features:
                                              Feature     F_Score
2                                     TFE Out flow SP  550.151316
10                             FFTE Temperature 1 - 1  514.786130
13                             FFTE Temperature 3 - 2  513.860299
12                             FFTE Temperature 2 - 1  426.796825
16                                    TFE Temperature  379.684261
11                             FFTE Temperature 1 - 2  356.240231
18  FFTE Heat temperature 1_minus_FFTE Heat temper...  285.115353
1                           FFTE Production solids SP  270.831639
19    TFE Out flow SP_to_FFTE Feed flow SP_efficiency  238.285671
15                              TFE Steam temperature  199.779190


Train multiple ML models

In [14]:
print("\n7) Training 6 different ML models with optimized hyperparameters:")

models = {
    'DecisionTree': DecisionTreeClassifier(random_state=42, max_depth=10, min_samples_split=20, min_samples_leaf=10),
    'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42),
    'SVM': SVC(random_state=42, C=1.0),
    'LogisticRegression': LogisticRegression(random_state=42, max_iter=1000),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'NaiveBayes': GaussianNB()
}


7) Training 6 different ML models with optimized hyperparameters:


Model evaluation and comparison

In [15]:
results = {}
trained_models = {}

for name, model in models.items():
    model.fit(X_train_split, y_train_split)
    trained_models[name] = model

    y_pred_val = model.predict(X_val_split)
    val_accuracy = accuracy_score(y_val_split, y_pred_val)
    cv_scores = cross_val_score(model, X_train_final, y_train_balanced, cv=5, scoring='accuracy')

    results[name] = {
        'val_accuracy': val_accuracy,
        'cv_mean': cv_scores.mean(),
        'cv_std': cv_scores.std(),
        'classification_report': classification_report(y_val_split, y_pred_val),
        'confusion_matrix': confusion_matrix(y_val_split, y_pred_val)
    }

In [23]:
comparison_df = pd.DataFrame({
    'Model': list(models.keys()),
    'Validation_Accuracy': [results[name]['val_accuracy'] for name in models.keys()],
    'CV_Mean': [results[name]['cv_mean'] for name in models.keys()],
    'CV_Std': [results[name]['cv_std'] for name in models.keys()]
}).set_index('Model')

print("Model Performance Comparison:")
print(comparison_df.sort_values('Validation_Accuracy', ascending=False))

Model Performance Comparison:
                    Validation_Accuracy   CV_Mean    CV_Std
Model                                                      
RandomForest                   0.974590  0.967701  0.006179
KNN                            0.928390  0.931707  0.002250
DecisionTree                   0.884038  0.881805  0.014595
LogisticRegression             0.473781  0.469966  0.006260
NaiveBayes                     0.473550  0.466778  0.005081
SVM                            0.447216  0.445153  0.005598


Best model selection and justification

In [17]:
best_model_name = comparison_df['Validation_Accuracy'].idxmax()
best_model = trained_models[best_model_name]
print(f"\n10) Best Model: {best_model_name} (Validation Accuracy: {comparison_df.loc[best_model_name, 'Validation_Accuracy']:.4f})")
print(f"\nClassification Report:\n{results[best_model_name]['classification_report']}")


10) Best Model: RandomForest (Validation Accuracy: 0.9746)

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.99      0.97      1443
           1       0.98      0.95      0.97      1443
           2       0.98      0.98      0.98      1443

    accuracy                           0.97      4329
   macro avg       0.97      0.97      0.97      4329
weighted avg       0.97      0.97      0.97      4329



Save model

In [18]:
# Save Model
joblib.dump(best_model, 'best_vegemite_model.pkl')
joblib.dump(selector, 'feature_selector.pkl')
print("11) Model saved successfully")

11) Model saved successfully


## ML TO AI DEPLOYMENT

Load model and test on unseen data

In [19]:
# Load Model and Test on Unseen Data
loaded_model = joblib.load('best_vegemite_model.pkl')
loaded_selector = joblib.load('feature_selector.pkl')

# Process test data through the same pipeline
X_test_processed = X_test_for_selection.copy()

# Apply feature selection
X_test_selected = loaded_selector.transform(X_test_processed)
X_test_final_processed = pd.DataFrame(X_test_selected, columns=selected_features)
correct_predictions = 0
for idx in range(len(X_test_final_processed)):
    single_row = X_test_final_processed.iloc[idx:idx+1]
    prediction = loaded_model.predict(single_row)[0]
    actual = y_test.iloc[idx]

    # Track correct predictions
    if prediction == actual:
        correct_predictions += 1

    # Print first 10 as examples
    if idx < 10:
        match_status = "✓" if prediction == actual else "✗"
        print(f"Row {idx+1}: Predicted={prediction}, Actual={actual} {match_status}")

print(f"\nProcessed all {len(X_test_final_processed)} rows individually")
print(f"Correct predictions: {correct_predictions}/{len(X_test_final_processed)}")
y_pred_test = loaded_model.predict(X_test_final_processed)
test_accuracy = accuracy_score(y_test, y_pred_test)

Row 1: Predicted=2, Actual=2 ✓
Row 2: Predicted=2, Actual=2 ✓
Row 3: Predicted=2, Actual=2 ✓
Row 4: Predicted=2, Actual=2 ✓
Row 5: Predicted=2, Actual=2 ✓
Row 6: Predicted=2, Actual=2 ✓
Row 7: Predicted=2, Actual=2 ✓
Row 8: Predicted=2, Actual=2 ✓
Row 9: Predicted=2, Actual=2 ✓
Row 10: Predicted=2, Actual=2 ✓

Processed all 1002 rows individually
Correct predictions: 980/1002


Performance measurement on unseen data

In [20]:
print(f"Performance on 1002 unseen data points:")
print(f"Test accuracy: {test_accuracy:.4f}")
print("\nClassification Report on Test Data:")
print(classification_report(y_test, y_pred_test))

print("\nConfusion Matrix on Test Data:")
print(confusion_matrix(y_test, y_pred_test))

Performance on 1002 unseen data points:
Test accuracy: 0.9780

Classification Report on Test Data:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       334
           1       0.97      0.98      0.98       334
           2       0.98      0.97      0.97       334

    accuracy                           0.98      1002
   macro avg       0.98      0.98      0.98      1002
weighted avg       0.98      0.98      0.98      1002


Confusion Matrix on Test Data:
[[329   1   4]
 [  3 327   4]
 [  2   8 324]]


Compare all models on test data

In [24]:
print(f"All Models Test Performance:")
test_results = {}
for name, model in trained_models.items():
    y_pred = model.predict(X_test_final_processed)
    accuracy = accuracy_score(y_test, y_pred)
    test_results[name] = accuracy
    print(f"{name}: {accuracy:.4f}")

All Models Test Performance:
DecisionTree: 0.8473
RandomForest: 0.9780
SVM: 0.4361
LogisticRegression: 0.4481
KNN: 0.9132
NaiveBayes: 0.4651


The text from the PDF is now stored in the `text` variable. You can further process or display it as needed.

DEVELOP RULES FROM ML MODEL

In [25]:
# Extract SP features from selected features
sp_features_in_selection = [col for col in selected_features if 'SP' in col]
print(f"Found {len(sp_features_in_selection)} SP features in selected features:")
for feat in sp_features_in_selection:
    print(f"  - {feat}")

if len(sp_features_in_selection) > 0:
    X_train_sp = X_train_final[sp_features_in_selection]

    # Train a SIMPLE decision tree for clear rules
    print("\nTraining Decision Tree with SP features only...")
    dt_sp = DecisionTreeClassifier(
        random_state=42,
        max_depth=4,  # Shallow for simple rules
        min_samples_split=200,
        min_samples_leaf=100
    )
    dt_sp.fit(X_train_sp, y_train_balanced)

    # Print tree rules
    tree_rules = export_text(dt_sp, feature_names=sp_features_in_selection, max_depth=4)

    print("DECISION TREE RULES:")

    print(tree_rules)

    # Feature importance

    print("FEATURE IMPORTANCE:")

    importance_df = pd.DataFrame({
        'SP_Feature': sp_features_in_selection,
        'Importance': dt_sp.feature_importances_
    }).sort_values('Importance', ascending=False)
    print(importance_df)

    # Extract SIMPLE rules for each class

    print("RECOMMENDED SET POINT RANGES FOR EACH CLASS")


    for class_label in sorted(y_train_balanced.unique()):

        print(f"CLASS {class_label} - Recommended Control Settings")


        # Get samples for this class
        class_mask = y_train_balanced == class_label
        class_samples = X_train_sp.loc[class_mask]

        if len(class_samples) > 0:
            # Get top 4 most important features
            top_features = importance_df.head(4)['SP_Feature'].tolist()

            print("\nRecommended ranges (25th-75th percentile):\n")
            for feature in top_features:
                if feature in class_samples.columns:
                    q25 = class_samples[feature].quantile(0.25)
                    q75 = class_samples[feature].quantile(0.75)
                    median = class_samples[feature].median()

                    print(f"  {feature}:")
                    print(f"    Range: {q25:.1f} to {q75:.1f}")
                    print(f"    Target: {median:.1f}")
                    print()

    # Simple IF-THEN rules

    print("SIMPLE IF-THEN RULES (Extracted from Decision Tree)")


    # Get the most important feature for splitting
    top_feature = importance_df.iloc[0]['SP_Feature']
    second_feature = importance_df.iloc[1]['SP_Feature'] if len(importance_df) > 1 else None

    print(f"\nBased on most important features: {top_feature}")
    if second_feature:
        print(f"and {second_feature}\n")

    for class_label in sorted(y_train_balanced.unique()):
        class_mask = y_train_balanced == class_label
        class_samples = X_train_sp.loc[class_mask]

        if len(class_samples) > 0 and top_feature in class_samples.columns:
            q25 = class_samples[top_feature].quantile(0.25)
            q75 = class_samples[top_feature].quantile(0.75)

            print(f"\nRULE {class_label}: For Class {class_label}")
            print(f"  IF {top_feature} is between {q25:.1f} and {q75:.1f}")

            if second_feature and second_feature in class_samples.columns:
                q25_2 = class_samples[second_feature].quantile(0.25)
                q75_2 = class_samples[second_feature].quantile(0.75)
                print(f"  AND {second_feature} is between {q25_2:.1f} and {q75_2:.1f}")

            print(f"  THEN predict Class {class_label}")

    # Summary table

    print("QUICK REFERENCE TABLE")


    summary_data = []
    for class_label in sorted(y_train_balanced.unique()):
        class_mask = y_train_balanced == class_label
        class_samples = X_train_sp.loc[class_mask]

        row = {'Class': class_label}
        for feature in importance_df.head(3)['SP_Feature'].tolist():
            if feature in class_samples.columns:
                q25 = class_samples[feature].quantile(0.25)
                q75 = class_samples[feature].quantile(0.75)
                row[feature] = f"{q25:.0f}-{q75:.0f}"
        summary_data.append(row)

    summary_df = pd.DataFrame(summary_data)
    print(summary_df.to_string(index=False))

else:
    print("ERROR: No SP features found!")

Found 9 SP features in selected features:
  - FFTE Feed tank level SP
  - FFTE Production solids SP
  - TFE Out flow SP
  - TFE Production solids SP
  - TFE Vacuum pressure SP
  - FFTE Feed flow SP
  - FFTE Out steam temp SP
  - FFTE Out steam temp SP_minus_FFTE Heat temperature 1_diff
  - TFE Out flow SP_to_FFTE Feed flow SP_efficiency

Training Decision Tree with SP features only...
DECISION TREE RULES:
|--- FFTE Feed flow SP <= 10199.96
|   |--- FFTE Feed flow SP <= 9231.31
|   |   |--- TFE Out flow SP <= 2172.07
|   |   |   |--- FFTE Feed tank level SP <= 1.50
|   |   |   |   |--- class: 1
|   |   |   |--- FFTE Feed tank level SP >  1.50
|   |   |   |   |--- class: 1
|   |   |--- TFE Out flow SP >  2172.07
|   |   |   |--- FFTE Production solids SP <= 41.31
|   |   |   |   |--- class: 2
|   |   |   |--- FFTE Production solids SP >  41.31
|   |   |   |   |--- class: 0
|   |--- FFTE Feed flow SP >  9231.31
|   |   |--- TFE Production solids SP <= 63.00
|   |   |   |--- FFTE Out steam