In [1]:
import pandas as pd
from collections import defaultdict
import numpy as np
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [2]:
# -----------------------------------------------------
#  1. 스케줄 문자열 → 토큰 시퀀스로 파싱
# -----------------------------------------------------
def parse_to_sequence(text):
    lines = text.strip().split('\n')
    tokens = []
    for line in lines:
        parts = line.strip().split()
        if len(parts) < 4:
            continue
        item = parts[0].replace('??', '')
        machine = parts[1]
        date = parts[2]
        tokens.append(f"{item}_{machine}_{date}")
    return tokens


In [3]:
# -----------------------------------------------------
#  2. BLEU-4 점수 계산 (구조적 유사도)
# -----------------------------------------------------
def compute_bleu_score(reference, hypothesis):
    smoothie = SmoothingFunction().method4
    return sentence_bleu(
        [reference], hypothesis, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smoothie
    )

In [4]:
# -----------------------------------------------------
#  3. 목적함수: 기계별 부하 분산(표준편차)
# -----------------------------------------------------
def machine_load_variance(text):
    machine_loads = defaultdict(int)
    for line in text.strip().split('\n'):
        parts = line.strip().split()
        if len(parts) < 4:
            continue
        machine = parts[1]
        qty = int(parts[3])
        machine_loads[machine] += qty
    return np.std(list(machine_loads.values()))

In [5]:
# -----------------------------------------------------
#  4. 점수 결합: BLEU + 부하 분산 점수
# -----------------------------------------------------
def combined_score(bleu, gt_std, pred_std, alpha=0.5):
    eps = 1e-6
    variance_score = max(0.0, 1 - (pred_std - gt_std) / (gt_std + eps))
    return alpha * bleu + (1 - alpha) * variance_score, variance_score

In [6]:
# -----------------------------------------------------
#  5. 전체 샘플에 대해 평가 실행
# -----------------------------------------------------
def evaluate_from_csv(csv_path, alpha=0.5):
    df = pd.read_csv(csv_path)
    total_bleu, total_var_score, total_final = 0, 0, 0

    for i, row in df.iterrows():
        gt_seq = parse_to_sequence(row['gt'])
        pred_seq = parse_to_sequence(row['output'])

        bleu = compute_bleu_score(gt_seq, pred_seq)
        gt_std = machine_load_variance(row['gt'])
        pred_std = machine_load_variance(row['output'])
        final_score, var_score = combined_score(bleu, gt_std, pred_std, alpha)

        print(f"\n 샘플 {i+1}")
        print(f" - BLEU-4 구조 유사도    : {bleu:.4f}")
        print(f" - 부하 분산 점수        : {var_score:.4f}")
        print(f" - 최종 종합 점수        : {final_score:.4f}")

        total_bleu += bleu
        total_var_score += var_score
        total_final += final_score

    n = len(df)
    print("\n==========================")
    print(f" 전체 평균 BLEU-4 점수   : {total_bleu / n:.4f}")
    print(f" 전체 평균 부하 점수     : {total_var_score / n:.4f}")
    print(f" 전체 평균 종합 점수     : {total_final / n:.4f}")
    print("==========================")

In [7]:
# -----------------------------------------------------
#  실행 예시
# -----------------------------------------------------
if __name__ == "__main__":
    evaluate_from_csv("infilling_gt.csv", alpha=0.5)



 샘플 1
 - BLEU-4 구조 유사도    : 0.0000
 - 부하 분산 점수        : 1.4334
 - 최종 종합 점수        : 0.7167

 전체 평균 BLEU-4 점수   : 0.0000
 전체 평균 부하 점수     : 1.4334
 전체 평균 종합 점수     : 0.7167
