In [1]:
import joblib
import os
import pandas as pd
# import numpy as np

In [2]:
MODEL_FILE = 'model_outputs/best_model.joblib'

In [3]:
EXPECTED_COLUMNS = [
    'total_carbon_wt_pct',
    'graphite_wt_pct',
    'carbon_black_wt_pct',
    'resin_wt_pct',
    'pitch_wt_pct',
    'graphene_wt_pct',
    'cnt_wt_pct',
    'gnp_wt_pct',
    'antioxidant_wt_pct',
    'mgo_purity_pct',
    'd50_micron',
    'dominant_carbon_source' # This is the categorical feature
]

In [4]:
TARGET_NAMES = [
    'porosity_pct', 'density_g_cm3', 'thermal_conductivity_w_mk',
    'oxidation_mass_loss_pct', 'oxidation_penetration_mm', 'hot_mor_mpa',
    'slag_contact_angle_deg', 'residual_strength_pct_after_shock'
]


In [5]:
def standardize_columns(df):
    """
    Cleans column names to the same standard format used in the training script.
    This is crucial for consistency.
    """
    cols = df.columns
    cols = cols.str.strip().str.lower()
    cols = cols.str.replace('%', '_pct', regex=False)
    cols = cols.str.replace(' ', '_', regex=False)
    cols = cols.str.replace('-', '_', regex=False)
    df.columns = cols
    return df


In [6]:
def get_manual_input(columns):
    """Guides the user to enter a single sample's data manually."""
    print("\n--- Enter Material Properties Manually ---")
    manual_values = {}
    for col in columns:
        while True:
            # We use the standardized names for the prompt
            prompt_name = col.replace('_', ' ').replace('pct', '%')
            val_str = input(f"  Enter value for '{prompt_name}': ").strip()
            if col == 'dominant_carbon_source':
                manual_values[col] = [val_str]
                break
            else:
                try:
                    manual_values[col] = [float(val_str)]
                    break
                except ValueError:
                    print("  Invalid input. Please enter a number.")
    
    return pd.DataFrame(manual_values, columns=columns)

In [7]:
def get_file_input(columns):
    """Loads and validates data from a user-specified file."""
    print("\n--- Load Material Properties from File ---")
    filepath = input("  Enter the path to your CSV or Excel file: ").strip()

    if not os.path.exists(filepath):
        print(f"  Error: File not found at '{filepath}'")
        return None

    try:
        if filepath.endswith('.csv'):
            df = pd.read_csv(filepath)
        elif filepath.endswith(('.xls', '.xlsx')):
            df = pd.read_excel(filepath)
        else:
            print("  Error: Unsupported file type.")
            return None
    except Exception as e:
        print(f"  Error reading file: {e}")
        return None

    # Standardize columns to match the training process
    df = standardize_columns(df)
    
    missing_cols = [col for col in columns if col not in df.columns]
    if missing_cols:
        print(f"  Error: The file is missing the following required columns: {missing_cols}")
        return None
    
    return df[columns]

In [8]:
def main():
    """Main function to run the prediction script."""
    try:
        loaded_model = joblib.load(MODEL_FILE)
        print(f"✅ Model '{MODEL_FILE}' loaded successfully!")
    except FileNotFoundError:
        print(f"❌ Error: Model file not found. Make sure you have run the new training script first.")
        return

    print("\nHow would you like to provide data for prediction?")
    choice = input("  Enter '1' for Manual Entry\n  Enter '2' to Load a File\n  Your choice: ").strip()

    if choice == '1':
        new_data = get_manual_input(EXPECTED_COLUMNS)
    elif choice == '2':
        new_data = get_file_input(EXPECTED_COLUMNS)
    else:
        print("Invalid choice. Exiting.")
        return

    if new_data is None:
        print("Could not retrieve data. Exiting.")
        return

    print("\nRunning predictions...")
    predictions = loaded_model.predict(new_data)
    results_df = pd.DataFrame(predictions, columns=TARGET_NAMES)

    if choice == '2':
        final_output = pd.concat([new_data.reset_index(drop=True), results_df], axis=1)
    else:
        final_output = results_df

    # --- <<< NEW CODE TO SAVE THE FILE >>> ---
    output_filename = 'prediction_results.csv'
    final_output.to_csv(output_filename, index=False)
    print(f"\n✅ Predictions saved to '{output_filename}'")
    
    print("\n📈 --- Prediction Results ---")
    print(final_output.to_string())


if __name__ == "__main__":
    main()

✅ Model 'model_outputs/best_model.joblib' loaded successfully!

How would you like to provide data for prediction?


  Enter '1' for Manual Entry
  Enter '2' to Load a File
  Your choice:  2



--- Load Material Properties from File ---


  Enter the path to your CSV or Excel file:  model_outputs/test_set_for_verification.csv



Running predictions...

✅ Predictions saved to 'prediction_results.csv'

📈 --- Prediction Results ---
     total_carbon_wt_pct  graphite_wt_pct  carbon_black_wt_pct  resin_wt_pct  pitch_wt_pct  graphene_wt_pct  cnt_wt_pct  gnp_wt_pct  antioxidant_wt_pct  mgo_purity_pct  d50_micron dominant_carbon_source  porosity_pct  density_g_cm3  thermal_conductivity_w_mk  oxidation_mass_loss_pct  oxidation_penetration_mm  hot_mor_mpa  slag_contact_angle_deg  residual_strength_pct_after_shock
0                  5.098            3.161                0.191         0.622         0.090            0.003       1.024       0.007               1.362           98.80       15.94               graphite      1.931578       3.506999                  14.103860                 0.009141                  0.569152   212.347712              100.465305                         105.530246
1                  8.751            3.569                0.357         2.715         1.356            0.276       0.479       0.000  