In [1]:
import pandas as pd
import numpy as np

# --- 1. Load the training targets and submission file ---
print("Loading data...")
train_df = pd.read_csv("/kaggle/input/ariel-data-challenge-2025/train.csv")
sample_submission = pd.read_csv("/kaggle/input/ariel-data-challenge-2025/sample_submission.csv")

# Isolate the target columns (wavelengths)
wl_cols = [f'wl_{i}' for i in range(1, 284)]
train_targets = train_df[wl_cols]

# --- 2. Calculate the mean and standard deviation for each wavelength ---
print("Calculating mean and std dev for each wavelength...")
mean_spectrum = train_targets.mean(axis=0)
std_spectrum = train_targets.std(axis=0)

# --- 3. Create the submission DataFrame ---
print("Building submission file...")
# Get the planet_id from the sample submission (for the test set)
submission_df = sample_submission[['planet_id']].copy()

# Create columns for all the wl and sigma predictions
# This is a bit of pandas magic to create the columns in the right order
wl_sigma_cols = []
for i in range(1, 284):
    wl_sigma_cols.append(f'wl_{i}')
    wl_sigma_cols.append(f'sigma_{i}')

# Recreate the submission DataFrame with the correct columns, initialized to zero
final_submission = pd.DataFrame(columns=['planet_id'] + wl_sigma_cols)
final_submission['planet_id'] = submission_df['planet_id']


# --- 4. Populate the submission file ---
# Assign the calculated mean to all the 'wl_' columns
for i, col in enumerate(wl_cols):
    final_submission[col] = mean_spectrum[i]

# Assign the calculated standard deviation to all the 'sigma_' columns
sigma_cols = [f'sigma_{i}' for i in range(1, 284)]
for i, col in enumerate(sigma_cols):
    # We use the std dev of the wl columns as our sigma estimate
    final_submission[col] = std_spectrum[i]


# --- 5. Save the submission file ---
final_submission.to_csv("submission.csv", index=False)
print("submission.csv created successfully!")
print("First 5 columns of submission file:")
print(final_submission.head().iloc[:, :5])

Loading data...
Calculating mean and std dev for each wavelength...
Building submission file...
submission.csv created successfully!
First 5 columns of submission file:
   planet_id      wl_1   sigma_1      wl_2   sigma_2
0    1103775  0.014609  0.010652  0.014588  0.010579


  final_submission[col] = mean_spectrum[i]
  final_submission[col] = std_spectrum[i]
