In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb

# LOAD SAVED LIGHTGBM PIPELINE
lgb_preprocessing_pipeline = joblib.load("/kaggle/input/lgb_preprocessing_pipeline/scikitlearn/default/1/lgb_preprocessing_pipeline.joblib")
lgb_label_encoder = joblib.load("/kaggle/input/lgb_label_encoder/scikitlearn/default/1/lgb_label_encoder.joblib")
final_model_lightgbm = joblib.load("/kaggle/input/final_model_lightgbm/keras/default/1/final_model_lightgbm.joblib")

# LOAD NEURAL NETWORK MODEL
final_model_nn = load_model("/kaggle/input/final_model_cosmicclassifier/keras/default/1/final_model_cosmicclassifier.h5")
nn_preprocessing_pipeline = joblib.load("/kaggle/input/processing_pipeline/scikitlearn/default/1/preprocessing_pipeline.pkl")
nn_label_encoder = joblib.load("/kaggle/input/label_encoder/scikitlearn/default/1/label_encoder.pkl")
# We'll treat nn_label_encoder as the master encoder for final decoding.

# LOAD XGBOOST PIPELINE
xgb_preprocessing_pipeline = joblib.load("/kaggle/input/xgb_preprocessing_pipeline_real/scikitlearn/default/1/xgb_preprocessing_pipeline_real.joblib")
xgb_label_encoder = joblib.load("/kaggle/input/xgb_label_encoder_real/scikitlearn/default/1/xgb_label_encoder_real.joblib")
final_model_xgb = joblib.load("/kaggle/input/final_model_xgboost_real/scikitlearn/default/1/final_model_xgboost_real.joblib")

# REORDER PROBABILITY COLUMNS 
def reorder_probability_columns(model_prob, model_classes, master_classes):
    def is_nan(x):
        return isinstance(x, float) and np.isnan(x)
    
    # Build sets excluding NaN for comparison.
    model_set = {x for x in model_classes if not is_nan(x)}
    master_set = {x for x in master_classes if not is_nan(x)}
    
    model_has_nan = any(is_nan(x) for x in model_classes)
    master_has_nan = any(is_nan(x) for x in master_classes)
    
    if model_set != master_set or model_has_nan != master_has_nan:
        raise ValueError(
            f"Cannot reorder columns because class sets differ.\n"
            f"Model classes: {model_classes}\n"
            f"Master classes: {master_classes}"
        )
    
    reorder_indices = []
    for cls in master_classes:
        found_index = None
        for idx, m_cls in enumerate(model_classes):
            if is_nan(cls) and is_nan(m_cls):
                found_index = idx
                break
            elif cls == m_cls:
                found_index = idx
                break
        if found_index is None:
            raise ValueError(f"Class {cls} not found in model classes {model_classes}.")
        reorder_indices.append(found_index)
    
    return model_prob[:, reorder_indices]

# READ file AND PREPARE FEATURES
new_data_path = "/kaggle/input/testing-data-cogni1/cosmicclassifierTest.csv"  
df_new = pd.read_csv(new_data_path)

features = [
    "Atmospheric Density",
    "Surface Temperature",
    "Gravity",
    "Water Content",
    "Mineral Abundance",
    "Orbital Period",
    "Proximity to Star",
    "Magnetic Field Strength",
    "Radiation Levels",
    "Atmospheric Composition Index"
]

def category_to_float(val):
    if isinstance(val, str) and val.startswith("Category_"):
        return float(val.replace("Category_", ""))
    return val

for col in ["Magnetic Field Strength", "Radiation Levels"]:
    if col in df_new.columns:
        df_new[col] = df_new[col].apply(category_to_float)

possible_target_cols = {"Output", "Prediction"}
found_target = list(possible_target_cols.intersection(df_new.columns))

if found_target:
    target_col = found_target[0]
    y_true = df_new[target_col].values
    # Using the master (NN) label encoder 
    y_true_encoded = nn_label_encoder.transform(y_true)
else:
    target_col = None
    y_true = None
    y_true_encoded = None

# PREPARE THE FEATURES FOR EACH MODEL
X_new = df_new[features].copy()

X_lgb = lgb_preprocessing_pipeline.transform(X_new)
X_nn = nn_preprocessing_pipeline.transform(X_new)
X_xgb = xgb_preprocessing_pipeline.transform(X_new)

# GET PREDICTED PROBABILITIES FROM EACH MODEL
# LightGBM predictions and reordering to master class order
probs_lgb_raw = final_model_lightgbm.predict_proba(X_lgb)
probs_lgb = reorder_probability_columns(
    model_prob=probs_lgb_raw,
    model_classes=lgb_label_encoder.classes_,
    master_classes=nn_label_encoder.classes_
)

# Neural Network predictions (already using master order)
probs_nn = final_model_nn.predict(X_nn)

# XGBoost predictions and reordering
probs_xgb_raw = final_model_xgb.predict_proba(X_xgb)
probs_xgb = reorder_probability_columns(
    model_prob=probs_xgb_raw,
    model_classes=xgb_label_encoder.classes_,
    master_classes=nn_label_encoder.classes_
)

# COMBINE PROBABILITIES VIA SOFT VOTING AND OBTAIN FINAL PREDICTIONS
ensemble_probs = (probs_lgb + probs_nn + probs_xgb) / 3.0
ensemble_preds_encoded = np.argmax(ensemble_probs, axis=1)
ensemble_preds_labels = nn_label_encoder.inverse_transform(ensemble_preds_encoded)

# EVALUATE PERFORMANCE IF TRUE LABELS ARE AVAILABLE & CREATE SUBMISSION FILE
if y_true is not None:
    acc = accuracy_score(y_true_encoded, ensemble_preds_encoded)
    print(f"Ensemble Accuracy: {acc:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_true_encoded, ensemble_preds_encoded))

id_col = "Planet_ID"
if id_col not in df_new.columns:
    df_new[id_col] = np.arange(len(df_new))

submission = pd.DataFrame({
    id_col: df_new[id_col],
    "Final_Ensemble_Prediction": ensemble_preds_labels
})

submission.to_csv("ensemble_submission.csv", index=False)
print("\nEnsemble submission saved as 'ensemble_submission.csv'.")
