In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib
import os

# ==========================================
# CONFIGURATION
# ==========================================
MODEL_PATH = 'dms_wort_only_model.keras'
SCALER_PATH = 'dms_wort_only_scaler.pkl'
TEST_FILE_PATH = r"C:\Users\DELL\OneDrive\Documents\Smart Brewery\Measurements\Measurements 11.11.2025\Wort 300 mcg_L end boiling.csv"

DROP_COLS = [
    "id", "timestamp", "label", "sensorID",
    "measurementID", "heaterProfileID",
    "timeSincePowerOn", "stepIndex"
]


# PREDICTION FUNCTION

def predict_dms_raw(file_path):
    print(f"\n--- Processing File (RAW): {os.path.basename(file_path)} ---")
    
    if not os.path.exists(MODEL_PATH) or not os.path.exists(SCALER_PATH):
        print("Error: Model or scaler file not found.")
        return

    try:
        model = tf.keras.models.load_model(MODEL_PATH)
        scaler = joblib.load(SCALER_PATH)
    except Exception as e:
        print(f"Error loading model/scaler: {e}")
        return

    if not os.path.exists(file_path):
        print(f"Error: Data file not found: {file_path}")
        return
    
    df = pd.read_csv(file_path)
    print(f"   -> Total rows loaded: {len(df)}")

    # Prepare Data
    cols_to_drop = DROP_COLS + ['original_concentration']
    X_new = df.drop(columns=cols_to_drop, errors='ignore')
    
    try:
        X_scaled = scaler.transform(X_new)
    except Exception as e:
        print(f" Normalization Error: {e}")
        return

    # --- PREDICTION & FIX ---
    predictions_prob = model.predict(X_scaled, verbose=0)
    
    # Flatten the array to 1D to make sure sum() returns a simple number
    predictions_label = (predictions_prob > 0.5).astype(int).flatten()
    
    # Count
    pass_count = np.sum(predictions_label == 0)
    fail_count = np.sum(predictions_label == 1)
    
    # Calculate ratios
    total = len(df)
    if total > 0:
        pass_ratio = (pass_count / total) * 100
        fail_ratio = (fail_count / total) * 100
    else:
        pass_ratio = 0
        fail_ratio = 0
        
    avg_prob = np.mean(predictions_prob)
    
    print("\n" + "="*40)
    print("   PREDICTION RESULTS (ON RAW DATA)")
    print("="*40)
    # Now these variables are scalars, so formatting will work
    print(f"ðŸ”¹ PASS (Predicted < 100 ppb):   {pass_count} rows ({pass_ratio:.1f}%)")
    print(f"ðŸ”¸ FAIL (Predicted >= 100 ppb):  {fail_count} rows ({fail_ratio:.1f}%)")
    print(f"ðŸ”¹ Average Probability of Failure: {avg_prob:.4f}")
    
    final_decision = "FAIL (>= 100 ppb)" if fail_count > pass_count else "PASS (< 100 ppb)"
    
    print(f"\nFINAL DECISION: {final_decision}")
    print("="*40 + "\n")

# RUN TEST

predict_dms_raw(TEST_FILE_PATH)


--- Processing File (RAW): Wort 300 mcg_L end boiling.csv ---
   -> Total rows loaded: 4080

   PREDICTION RESULTS (ON RAW DATA)
ðŸ”¹ PASS (Predicted < 100 ppb):   0 rows (0.0%)
ðŸ”¸ FAIL (Predicted >= 100 ppb):  4080 rows (100.0%)
ðŸ”¹ Average Probability of Failure: 1.0000

FINAL DECISION: FAIL (>= 100 ppb)

