In [3]:
from multiprocessing import Pool
import pandas as pd
import numpy as np
# RLE 디코딩 함수
def rle_decode(mask_rle, shape=(224,224)):

    if mask_rle == -1 or mask_rle == '-1':
        return np.zeros(shape)
     
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)


# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def process_image(args):
    id_, submissions, weights, threshold, shape, no_building_threshold = args
    masks = [rle_decode(submit.loc[submit['img_id']==id_, 'mask_rle'].values[0], shape) for submit in submissions]
    weights_ = [weight if np.any(mask != 0) else 0 for mask, weight in zip(masks, weights)]
    no_building_count = weights_.count(0)
    if no_building_count >= 3:
        return '-1'
    else:
    # no_building_weight = sum([weight for mask, weight in zip(masks, weights) if np.all(mask == 0)])
    # if no_building_weight >= no_building_threshold:
    #     return '-1'
    # else:
        ensemble_mask = np.zeros(shape)
        for mask, weight in zip(masks, weights):
            ensemble_mask += weight * mask
        ensemble_mask = (ensemble_mask >= threshold).astype(np.uint8)
        if np.all(ensemble_mask == 0):
            return '-1'
        else:
            return rle_encode(ensemble_mask)
        # return rle_encode(ensemble_mask)

def ensemble_parallel(submissions, weights, threshold=50, shape=(224,224), no_building_threshold=50):
    # 각 제출물의 ID 열이 일치하도록 확인합니다
    for submit in submissions[1:]:
        assert np.all(submissions[0]['img_id'] == submit['img_id'])

    final_submit = pd.DataFrame()
    final_submit['img_id'] = submissions[0]['img_id']

    with Pool() as p:
        final_masks = p.map(process_image, [(id_, submissions, weights, threshold, shape, no_building_threshold) for id_ in final_submit['img_id']])

    final_submit['mask_rle'] = final_masks

    return final_submit

# 앙상블 진행할 각 모델의 csv파일
submit1 = pd.read_csv('/home/ubin108/0728_unet_convNext_xlarge_mask0.55.csv')
submit2 = pd.read_csv('/home/ubin108/0728_uper_convnext_xlarge_best_8178_mask_0.55.csv')
submit3 = pd.read_csv('/home/ubin108/0728_unet_convnext_large_in22ft1k_mask0.6.csv')
submit4 = pd.read_csv('/home/ubin108/0727_upernet_convnext_large_0.80845_mask0.6.csv')
submit5 = pd.read_csv('/home/ubin108/0727_unet_efficientnetV2_l_80803_mask0.6.csv')
submit6 = pd.read_csv('/home/ubin108/0727_unet_convNext_base_8062_mask0.6.csv')

final_submit = ensemble_parallel([submit1, submit2, submit3, submit4, submit5, submit6], [30, 30, 20, 20, 20, 20])
# unet convxlarge, uper convxlarge, uper convlarge, unet efflarge, unet convlarge, unet convbase voting weight
final_submit.to_csv('./ensemble_final_submit.csv', index=False)