In [2]:
import os
import numpy as np
import pandas as pd

# Constants
N_MODELS = 45      # Number of models
N_SAMPLES = 75     # Number of samples
CHANNELS = ['channel_44', 'channel_45', 'channel_46']  # Channels

# Input and Output Directories
INPUT_DIR = '/kaggle/input/trojan-horse-hunt-in-space'
CLEAN_MODEL_PATH = os.path.join(INPUT_DIR, 'clean_model')
POISONED_MODELS_PATH = os.path.join(INPUT_DIR, 'poisoned_models')
SUBMISSION_PATH = '/kaggle/working/submission.csv'  # Updated path for submission

DEBUG = False
if DEBUG:
    INPUT_DIR = './data'
    CLEAN_MODEL_PATH = os.path.join(INPUT_DIR, 'clean_model')
    POISONED_MODELS_PATH = os.path.join(INPUT_DIR, 'poisoned_models')
    SUBMISSION_PATH = '/kaggle/working/submission.csv'  # Updated path for submission

print("✅ All configurations are set.")
print(f"Models: {N_MODELS}")
print(f"Samples: {N_SAMPLES}")
print(f"Channels: {CHANNELS}")
print(f"All Channels: {N_SAMPLES * len(CHANNELS)}")

def create_zero_trigger_submission():
    print("📊 Generating submission data...")
    
    zero_trigger = np.zeros(N_SAMPLES * len(CHANNELS))
    print(f"Triggers: {len(zero_trigger)}")
    
    data = np.tile(zero_trigger, (N_MODELS, 1))
    print(f"Data shape: {data.shape}")
    
    df = pd.DataFrame(data)
    channel_cols = [
        f"{ch}_{i+1}"
        for ch in CHANNELS
        for i in range(N_SAMPLES)
    ]
    
    print(f"Channel columns: {len(channel_cols)}")
    print(f"First 5 columns: {channel_cols[:5]}")
    
    df.columns = channel_cols    
    df.insert(0, "model_id", range(1, N_MODELS + 1))    
    df.index = df.index + 1
    
    print("✅ Submission data generated successfully.")
    return df

def save_and_validate_submission(df):
    print("💾 Saving submission data...")
    
    # CSV
    df.to_csv(SUBMISSION_PATH, index=False)
    print(f"✅ Submission saved to: {SUBMISSION_PATH}")
    
    # Validate shape
    print("\n🔍 Data Shape:")
    print(f"Shape: {df.shape}")
    print(f"Expected shape: ({N_MODELS}, {N_SAMPLES * len(CHANNELS) + 1})")  # Including model_id
    
    # Display first few rows
    print(f"\n📋 First 3 rows:")
    print(df.head(3))
    
    # Check for missing values
    missing_values = df.isnull().sum().sum()
    print(f"\nMissing values: {missing_values}")
    
    # File size
    file_size = os.path.getsize(SUBMISSION_PATH)
    print(f"File size: {file_size:,} bytes ({file_size/1024/1024:.2f} MB)")
    
    return True

def main():
    print("🚀 Generating submission...")
    print("=" * 60)
    
    try:
        df = create_zero_trigger_submission()
        save_and_validate_submission(df)
        
        print("\n" + "=" * 60)
        print("🎉 Submission data ready.")
        
        return df
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        raise

if __name__ == "__main__":
    submission_df = main()
    
    print("\n📊 Submission Data:")
    print(f"• Number of rows: {len(submission_df)}")
    print(f"• Number of columns: {len(submission_df.columns) - 1}")  # Excluding model_id
    print(f"• Value range: [{submission_df.iloc[:, 1:].min().min():.3f}, {submission_df.iloc[:, 1:].max().max():.3f}]")
    print(f"• Final path: {SUBMISSION_PATH}")
    
    print("\n🎯 Kaggle Result Ready!")


✅ All configurations are set.
Models: 45
Samples: 75
Channels: ['channel_44', 'channel_45', 'channel_46']
All Channels: 225
🚀 Generating submission...
📊 Generating submission data...
Triggers: 225
Data shape: (45, 225)
Channel columns: 225
First 5 columns: ['channel_44_1', 'channel_44_2', 'channel_44_3', 'channel_44_4', 'channel_44_5']
✅ Submission data generated successfully.
💾 Saving submission data...
✅ Submission saved to: /kaggle/working/submission.csv

🔍 Data Shape:
Shape: (45, 226)
Expected shape: (45, 226)

📋 First 3 rows:
   model_id  channel_44_1  channel_44_2  channel_44_3  channel_44_4  \
1         1           0.0           0.0           0.0           0.0   
2         2           0.0           0.0           0.0           0.0   
3         3           0.0           0.0           0.0           0.0   

   channel_44_5  channel_44_6  channel_44_7  channel_44_8  channel_44_9  ...  \
1           0.0           0.0           0.0           0.0           0.0  ...   
2           0.0   