In [None]:
import os, copy, ast
import numpy as np
import pandas as pd
from bokeh.plotting import figure, gridplot 
from bokeh.io import output_file, show, output_notebook

output_notebook()

# ---------------------------
# 1. Function to select color
# ---------------------------
def color_scheme(dk, color):
    clr_dollar1 = ['darkgreen','forestgreen','limegreen','palegreen','green']
    clr_dollar2 = ['gold','gainsboro','darkgoldenrod','khaki','olive']
    clr_dollar3 = ['saddlebrown','sienna','peru','chocolate','tan']
    clr_dollar4 = ['darkgreen','forestgreen','gold','saddlebrown','limegreen']
    clr_dollar5 = ['darkgreen','gold','forestgreen','saddlebrown','khaki']

    l = len(dk['subm'])
    color_dict = {
        'usd1': clr_dollar1,
        'usd2': clr_dollar2,
        'usd3': clr_dollar3,
        'usd4': clr_dollar4,
        'usd5': clr_dollar5
    }
    return color_dict.get(color, ['green','gold','brown','lime','olive'])[0:l]

# --------------------------------------
# 2. The main function of Bokeh plotting
# --------------------------------------
def bokeh_show(params, df_cross, colors, show_figures1, show_figures2, wps_fig2, color_cross):
    def dossier(js, subms, cols):
        def quant(i, js, subms, cols):
            return {"c": i, "q": sum([1 for subm in cols[i] if subm == subms[js]])}
        return {'name': subms[js], 'q_in': [quant(i, js, subms, cols) for i in range(len(subms))]}

    alls = pd.read_csv(f'tida_desc.csv')
    matrix = [ast.literal_eval(str(row.alls)) for row in alls.itertuples()]
    subms = sorted(matrix[0])
    cols = [[data[i] for data in matrix] for i in range(len(subms))]
    df_subms = pd.DataFrame({f'col_{i}': [x[i] for x in matrix] for i in range(len(subms))})
    dossiers = [dossier(js, subms, cols) for js in range(len(subms))]
    subm_names = [one_dossier['name'] for one_dossier in dossiers]

    figures1, qss, i = [], [], 0
    height = 85 if len(colors)==2 else 134 if len(colors)==3 else (154 if len(colors)==4 else 174)

    for one_dossier in dossiers:
        qs = [one['q'] for one in one_dossier['q_in']]
        x_names = [name.replace("Group","").replace("subm_","") for name in subm_names]
        width = 157 if len(colors) == 5 else 133
        f = figure(x_range=x_names, width=width, height=height, title='alls. '+str(i))
        f.vbar(x=x_names, width=0.585, top=qs, color=colors)
        figures1.append(f)
        qss.append(qs)
        i += 1

    grid = gridplot([figures1])
    output_file('tida_alls.html')
    if show_figures1: show(grid)

    sub_wts = params['subwts']
    main_wts = [subm['weight'] for subm in params['subm']]
    mms, acc_mass = [], []

    for j in range(len(dossiers)):
        one_dossier = dossiers[j]
        qs = [one['q'] for one in one_dossier['q_in']]
        mm = [qs[h] * (main_wts[j] + sub_wts[h]) for h in range(len(sub_wts))]
        mms.append(mm)
        acc_mass.append(round(sum(mm)))

    y_names = [name + " - " + str(mass) for name,mass in zip(subm_names,acc_mass)]
    f1 = figure(y_range=y_names, width=313, height=height, title='relations of general masses')
    f1.hbar(y=y_names, height=0.585, right=acc_mass, left=0, color=colors)

    alls = [f'alls.{i}' for i in range(len(dossiers))]
    subm = [f'sub{i}'   for i in range(len(dossiers))]
    mmsT  = np.asarray(mms).T
    data = {'cols' : alls}
    for i in range(len(dossiers)): data[f'sub{i}'] = mmsT[i,:]
    f2 = figure(y_range=alls, height=height, width=274, title=" ( relations of columns masses )")
    f2.hbar_stack(subm, y='cols', height=0.585, color=colors, source=data)

    qssT  = np.asarray(qss).T
    data = {'cols' : alls}
    for i in range(len(dossiers)): data[f'sub{i}'] = qssT[i,:]
    f3 = figure(y_range=alls, height=height, width=215, title="ratios in columns")
    f3.hbar_stack(subm, y='cols', height=0.585, color=colors, source=data)

    grid = gridplot([[f3,f2,f1]])
    show(grid)

    if show_figures2:
        def read(params,i):
            FiN = params["path"] + params["subm"][i]["name"] + ".csv"
            target_name_back = {'target':params["target"],'pred':params["target"]}
            return pd.read_csv(FiN).rename(columns=target_name_back)
        dfs = [read(params,i) for i in range(len(params["subm"]))] + [df_cross]
        f = figure(width=800, height=274)
        f.title.text = 'Click on legend entries to mute the corresponding lines'
        b,e = 21000,21121
        line_x = [dfs[i][b:e]['id'] for i in range(len(dfs))]
        line_y = [dfs[i][b:e]['loan_paid_back'] for i in range(len(dfs))]
        color = colors + [color_cross]
        alpha = [0.8 for i in range(len(dfs)-1)] + [0.95]
        lws = [1.0 for i in range(len(dfs)-1)] + [1.0]
        legend = subm_names + ['cross']
        for i in range(len(legend)):
            f.line(line_x[i], line_y[i], line_width=lws[i], color=color[i], alpha=alpha[i],
                   muted_color='white', legend_label=legend[i])
        f.legend.location = "top_left"
        f.legend.click_policy="mute"
        show(f)


# -------------------------------
# 3. The main function of h_blend
# -------------------------------
def h_blend(params,color,cross='silver',figures1=False,figures2=False,wf2=555,details=False):
    color_cross = cross
    dk = copy.deepcopy(params)
    show_details, show_figures1, show_figures2 = details, figures1, figures2
    type_sort = params['type_sort'][0]
    dk['asc'], dk['desc'] = params['type_sort'][1], params['type_sort'][2]
    dk['id'], dk['target'] = params['id_target'][0], params['id_target'][1]

    def read(dk,i):
        tnm = dk["subm"][i]["name"]
        FiN = dk["path"] + tnm + ".csv"
        return pd.read_csv(FiN).rename(columns={'target':tnm, 'pred':tnm, dk["target"]:tnm})

    def merge(dfs_subm):
        df_subms = pd.merge(dfs_subm[0], dfs_subm[1], on=[dk['id']])
        for i in range(2, len(dk["subm"])): 
            df_subms = pd.merge(df_subms, dfs_subm[i], on=[dk['id']])
        return df_subms

    def da(dk,sorting_direction,show_details):
        df_subms = merge([read(dk,i) for i in range(len(dk["subm"]))])
        cols = [col for col in df_subms.columns if col != dk['id']]
        short_name_cols = [c for c in cols]

        import random
        def alls1(x, sd=sorting_direction, cs=cols):
            reverse = True if sd=='desc' else False
            tes = {c: x[c] for c in cs}.items()
            return [t[0] for t in sorted(tes,key=lambda k:k[1],reverse=reverse)]
        def alls2(x, sd=sorting_direction, cs=cols):
            tes = {c: x[c] for c in cs}.items()
            subms_random = [t[0] for t in tes]
            random.shuffle(subms_random)
            return subms_random

        alls = alls1 if type_sort == 'asc/desc' else alls2
        wts = [[[e['weight'] for e in dk["subm"]], [w for w in dk["subwts"]]]]

        def correct(x, cs=cols, wts=wts):
            i = [x['alls'].index(c) for c in short_name_cols]
            return sum([x[cs[j]] * (wts[0][0][j] + wts[0][1][i[j]]) for j in range(len(cs))])

        df_subms['alls'] = df_subms.apply(lambda x: alls(x), axis=1)
        df_subms[dk["target"]] = df_subms.apply(lambda x: correct(x), axis=1)
        df_subms = df_subms.rename(columns={dk["target"]:"ensemble"})
        df_subms.insert(loc=1, column=' _ ', value=['   '] * len(df_subms))
        vcols = [dk['id']]+[' _ '] + short_name_cols + [' _ ']+['alls']+[' _ ']+['ensemble']
        df_subms = df_subms[vcols]
        df_subms = df_subms.rename(columns={"ensemble":dk["target"]})
        df_subms.to_csv(f'tida_{sorting_direction}.csv', index=False)
        return df_subms[[dk['id'],dk['target']]]

    def ensemble_da(dk, show_details):
        dfD = da(dk,'desc', show_details)
        dfA = da(dk,'asc', show_details)
        dfA[dk['target']] = dk['desc']*dfD[dk['target']] + dfA[dk['target']]*dk['asc']
        return dfA

    da_result = ensemble_da(dk, show_details)
    colors_final = color_scheme(dk, color)
    bokeh_show(dk, da_result, colors_final, show_figures1, show_figures2, wf2, color_cross)
    return da_result


# --------------
# 4. Submission
# --------------
path = f'/kaggle/input/03-november-2025-ps-s5e11/submission '

params = {
    'path'     : path,
    'id_target': ['id',"loan_paid_back"],
    'type_sort': ['asc/desc',0.35,0.65],
    'subwts'   : [+0.069,+0.04,-0.025,-0.035,-0.075],
    'subm'     : [
        {'name': f'0.92601','weight':+0.22},
        {'name': f'0.92655','weight':+0.195},
        {'name': f'0.92672','weight':+0.15},
        {'name': f'0.92683','weight':+0.205},
        {'name': f'0.92684','weight':+0.23},
    ]
}

df_cross = h_blend(params, color='usd4', figures1=True, figures2=True, details=False)
df_cross.to_csv('submission.csv', index=False)
df_cross
