In [None]:
import pandas as pd

def ensemble_submission_csvs(file_paths, id_col='id', prediction_col='Calories', output_filename='ensemble_submission.csv'):
    """
    Ensembles predictions from multiple submission CSV files by averaging.

    Args:
        file_paths (list): A list of paths to the submission CSV files.
        id_col (str): The name of the ID column in the CSV files.
        prediction_col (str): The name of the column containing the predictions.
        output_filename (str): The name of the CSV file to save the ensembled predictions.
    """
    if not file_paths:
        print("Error: No file paths provided for ensembling.")
        return

    # Read the first submission file
    try:
        ensemble_df = pd.read_csv(file_paths[0])
        ensemble_df = ensemble_df[[id_col, prediction_col]].copy()
        ensemble_df.rename(columns={prediction_col: f'{prediction_col}_1'}, inplace=True)
    except FileNotFoundError:
        print(f"Error: File not found at {file_paths[0]}")
        return
    except KeyError:
        print(f"Error: '{id_col}' or '{prediction_col}' not found in {file_paths[0]}")
        return

    # Read and merge subsequent submission files
    for i, file_path in enumerate(file_paths[1:], start=2):
        try:
            current_df = pd.read_csv(file_path)
            current_df = current_df[[id_col, prediction_col]].copy()
            current_df.rename(columns={prediction_col: f'{prediction_col}_{i}'}, inplace=True)
            ensemble_df = pd.merge(ensemble_df, current_df, on=id_col, how='inner') # Use inner to ensure common IDs
        except FileNotFoundError:
            print(f"Warning: File not found at {file_path}. Skipping this file.")
            continue
        except KeyError:
            print(f"Warning: '{id_col}' or '{prediction_col}' not found in {file_path}. Skipping this file.")
            continue

    # Identify prediction columns to average
    pred_cols_to_average = [col for col in ensemble_df.columns if col.startswith(prediction_col + '_')]

    if not pred_cols_to_average:
        print("Error: No prediction columns found to average after merging.")
        return

    # Calculate the average of the predictions
    ensemble_df[prediction_col] = ensemble_df[pred_cols_to_average].mean(axis=1)

    # Prepare the final submission DataFrame
    final_submission_df = ensemble_df[[id_col, prediction_col]]

    # Save the ensembled predictions to a new CSV file
    try:
        final_submission_df.to_csv(output_filename, index=False)
        print(f"\nEnsembled predictions saved to {output_filename}")
        print("First 5 rows of the ensembled submission:")
        print(final_submission_df.head())
    except Exception as e:
        print(f"Error saving the ensembled submission file: {e}")

In [None]:
# --- Example Usage ---
if __name__ == "__main__":
    # Replace these with the actual paths to your submission CSV files
    submission_files = [
        '/kaggle/input/ensemble-for-pce-s5e5/Random_Forest_Regressor_predictions.csv',   # Output from your Random Forest model
        '/kaggle/input/ensemble-for-pce-s5e5/LightGBM_Regressor_predictions.csv', # Output from your LightGBM model
        # '/kaggle/input/ensemble-for-pce-s5e5/XGBoost_Regressor_predictions.csv',# Add more files here if you have them, e.g., 'submission_xgb.csv'
    ]

    # Call the function to ensemble
    ensemble_submission_csvs(
        file_paths=submission_files,
        id_col='id',            # Make sure this matches your ID column name
        prediction_col='Predictions', # Make sure this matches your prediction column name
        output_filename='averaged_ensemble_submission.csv'
    )
