In [1]:
import pandas as pd
import numpy as np
import os
import copy

# ==========================================
# CONFIGURATION
# ==========================================
PATH = './' 
OUTPUT_FILE = 'submission_optimized_854362.csv'

# TWEAK 1: Sharpen the trend correction slightly (1.00118 -> 1.00119)
CT1 = 1.00119
CT2 = 1.00119

# ==========================================
# LOGIC FUNCTIONS
# ==========================================
def h_blend(params, _update={}):
    if 'subwts' in _update:
        params['subwts'] = _update['subwts']
    
    dk = copy.deepcopy(params)
    dk['asc'] = params['type_sort'][1]
    dk['desc'] = params['type_sort'][2]
    dk['id'] = params['id_target'][0]
    dk['target'] = params['id_target'][1]

    def read_file(dk, i):
        name = dk["subm"][i]["name"]
        return pd.read_csv(os.path.join(dk['path'], f"{name}.csv")).rename(columns={dk["target"]: name})
        
    def merge_submissions(dfs):
        df_merged = dfs[0]
        for i in range(1, len(dfs)):
            df_merged = pd.merge(df_merged, dfs[i], on=[dk['id']])
        return df_merged

    def perform_blending(dk, direction):
        dfs = [read_file(dk, i) for i in range(len(dk["subm"]))]
        df_subms = merge_submissions(dfs)
        cols = [col for col in df_subms.columns if col != dk['id']]
        
        def get_sorted_cols(x, reverse=(direction == 'desc')):
            vals = {c: x[c] for c in cols}.items()
            return [t[0] for t in sorted(vals, key=lambda k: k[1], reverse=reverse)]

        df_subms['alls'] = df_subms.apply(lambda x: get_sorted_cols(x), axis=1)
        weights = [subm['weight'] for subm in dk["subm"]]
        sub_weights = dk["subwts"]

        def calculate_score(x):
            indices = [x['alls'].index(c) for c in cols]
            return sum([x[cols[j]] * (weights[j] + sub_weights[indices[j]]) for j in range(len(cols))])

        df_subms[dk["target"]] = df_subms.apply(calculate_score, axis=1)
        return df_subms[[dk['id'], dk['target']]]
   
    df_desc = perform_blending(dk, 'desc')
    df_asc = perform_blending(dk, 'asc')
    
    combined_scores = dk['desc'] * df_desc[dk['target']] + dk['asc'] * df_asc[dk['target']]
    return pd.DataFrame({dk['id']: df_desc[dk['id']], dk['target']: combined_scores})

def blend_aux(df_main, weights, df_aux, name):
    # Matches the notebook's b2 logic
    sub = pd.DataFrame({'id': df_main['id']})
    sub['exam_score'] = df_aux['exam_score'] * weights[0] + df_main['exam_score'] * weights[1]
    sub.to_csv(f"{name}.csv", index=False)
    return sub

def process_aux_params(df_main, weights, dfs_aux, params_aux):
    for i in range(len(dfs_aux)):
        blend_aux(df_main, weights, dfs_aux[i], params_aux['subm'][i]['name'])
    return copy.deepcopy(params_aux)

# ==========================================
# MAIN PARAMETERS
# ==========================================
# TWEAK 2: Optimize Main Weights to favor the better model
# '8.54465' is your best single model. We boost it from 0.31 -> 0.32
# '8.54633' is weaker. We drop it from 0.328 -> 0.318
params_Main = {
    'path': PATH,
    'id_target': ['id', "exam_score"],          
    'type_sort': ['asc/desc', 0.30, 0.70],
    'subm': [
        {'name': '8.54465', 'weight': 0.320}, # INCREASED (Was 0.31)
        {'name': '8.54633', 'weight': 0.318}, # DECREASED (Was 0.328)
        {'name': '8.54610', 'weight': 0.172},
        {'name': '8.54822', 'weight': 0.190},
    ]
}

params_Aux = {
    'path': PATH,
    'id_target': ['id', "exam_score"],          
    'type_sort': ['asc/desc', 0.30, 0.70],
    'subwts': [-0.25, 0.00, 0.55, -0.30],
    'subm': [
        {'name': 'Main+24', 'weight': 0.21},
        {'name': 'Main+25', 'weight': 0.08},
        {'name': 'Main+28', 'weight': 0.23},
        {'name': 'Main+29', 'weight': 0.48},
    ]
}

# TWEAK 3: Slightly more aggressive Stage 3 mixing (0.817 vs 0.818)
weights1 = [0.96, 0.04]
weights2 = [0.89, 0.11]
weights3 = [0.817, 0.183] 

# Load the Aux files (The ones you downloaded from PS-s6e1-25)
print(">>> Loading Data...")
# Note: Ensure these filenames match exactly what is in your folder
df24 = pd.read_csv(os.path.join(PATH, '8.54466.ct2.csv'))
df25 = pd.read_csv(os.path.join(PATH, '8.54476.csv'))
df28 = pd.read_csv(os.path.join(PATH, '8.54465.ct2.csv'))
df29 = pd.read_csv(os.path.join(PATH, '8.54462.ct2.csv'))
dfs_Aux = [df24, df25, df28, df29]

print(">>> Computing Optimized Ensemble...")
# Stage 1
m1 = h_blend(params_Main, _update={'subwts': [+0.55, -0.10, -0.20, -0.25]})
df1 = h_blend(process_aux_params(m1, weights1, dfs_Aux, params_Aux))

# Stage 2
m2 = h_blend(params_Main, _update={'subwts': [+0.11, -0.01, -0.03, -0.07]})
df2 = h_blend(process_aux_params(m2, weights2, dfs_Aux, params_Aux))

# Stage 3
m3 = h_blend(params_Main, _update={'subwts': [+0.55, -0.10, -0.20, -0.25]})
df3 = h_blend(process_aux_params(m3, weights3, dfs_Aux, params_Aux))

# Trend Logic
print(">>> Applying Trend Correction...")
df1.rename(columns={'exam_score': 'es1'}, inplace=True)
df2.rename(columns={'exam_score': 'es2'}, inplace=True)
df3.rename(columns={'exam_score': 'es3'}, inplace=True)
df_final = df1.merge(df2, on='id').merge(df3, on='id')

def calculate_trend(x):
    e1, e2, e3 = x['es1'], x['es2'], x['es3']
    if e1 < e3 and e2 < e3: 
        return e3 * (CT1 - 0.0001 * (e3 - e1))
    if e1 > e2 and e2 > e3: 
        return e3 / (CT2 - 0.0001 * (e1 - e3))
    return e3

df_final['exam_score'] = df_final.apply(calculate_trend, axis=1)

# Clean up
for name in ['Main+24', 'Main+25', 'Main+28', 'Main+29']:
    f = os.path.join(PATH, f"{name}.csv")
    if os.path.exists(f): os.remove(f)

# Save
df_final[['id', 'exam_score']].to_csv(OUTPUT_FILE, index=False)
print(f"✅ SUCCESS! Generated: {OUTPUT_FILE}")

>>> Loading Data...
>>> Computing Optimized Ensemble...
>>> Applying Trend Correction...
✅ SUCCESS! Generated: submission_optimized_854362.csv
