# Library Loading

In [1]:
import datetime
import pandas as pd
import math
import json
import matplotlib.pyplot as plt
import seaborn as sns
import os
import joblib
import numpy as np
import itertools

#  Models / Scalers / Labelencoder Loading

In [2]:
# 载入XGBoost回归模型
xgb_reg_model_path = "/Users/JackRuan/文稿(un-iCloud)/VScode_ipynb/CAPSTONE/Pipelines/xgb_reg_model_2025-08-02_23-39-32_r2_0.97.pkl"
xgb_reg_model = joblib.load(xgb_reg_model_path)

# 加载MOC模型
loaded_model = joblib.load('/Users/JackRuan/文稿(un-iCloud)/VScode_ipynb/CAPSTONE/Pipelines/tabtransformer_moc_model_2025-08-02_23-34-05_mAP0.8165.pkl')

# 载入scaler
scaler_dir = "/Users/JackRuan/文稿(un-iCloud)/VScode_ipynb/CAPSTONE/Pipelines/scalers"
uv_scaler = joblib.load(
    os.path.join(
        scaler_dir, [f for f in os.listdir(scaler_dir) if f.startswith("uv") and f.endswith(".pkl")][0]))
search_scaler = joblib.load(
    os.path.join(
        scaler_dir, [f for f in os.listdir(scaler_dir) if f.startswith("search") and f.endswith(".pkl")][0]))
ocr_scaler = joblib.load(
    os.path.join(
        scaler_dir, [f for f in os.listdir(scaler_dir) if f.startswith("ocr") and f.endswith(".pkl")][0]))

# 载入label mapping
label_mapping_path = "/Users/JackRuan/文稿(un-iCloud)/VScode_ipynb/CAPSTONE/Pipelines/label_mapping.json"
with open(label_mapping_path, "r", encoding="utf-8") as f:
    label_mapping = json.load(f)

# Functions

In [3]:
# 将ad_stra中的np.int64数字用label_mapping进行解码
def decode_ad_stra(results, mapping = label_mapping):
    material_labels = {v: k for k, v in mapping['material'].items()}
    placement_labels = {v: k for k, v in mapping['placement'].items()}
    payment_labels = {v: k for k, v in mapping['payment'].items()}
    sellingpoint_labels = {v: k for k, v in mapping['sellingpoint'].items()}

    decoded_results = []
    for combo in results:
        decoded = [
            material_labels.get(int(combo[0]), combo[0]),
            placement_labels.get(int(combo[1]), combo[1]),
            payment_labels.get(int(combo[2]), combo[2]),
            sellingpoint_labels.get(int(combo[3]), combo[3])
        ]
        decoded_results.append(decoded)
    return decoded_results

In [4]:
def ad_strategy_recommender_with_topk(input_uv, input_search, k_num=3):
    input_data = np.array([[input_uv, input_search]])  # shape=(1, 特征数)

    # 预测输出（每个维度的 label 概率）
    preds = loaded_model.predict(input_data)

    # 为每个输出维度提取 Top-K 候选标签索引及其概率
    topk_indices_per_dim = []
    topk_probs_per_dim = []

    for dim in preds:
        probs = dim[0]
        topk_idx = probs.argsort()[-k_num:][::-1]
        topk_probs = probs[topk_idx]
        topk_indices_per_dim.append(topk_idx)
        topk_probs_per_dim.append(topk_probs)

    # 笛卡尔积生成所有可能的组合及对应概率乘积
    strategy_candidates = list(itertools.product(*topk_indices_per_dim))
    score_candidates = []

    for combo in strategy_candidates:
        prob_product = 1.0
        for i in range(4):
            idx = list(topk_indices_per_dim[i]).index(combo[i])
            prob = topk_probs_per_dim[i][idx]
            prob_product *= prob
        score_candidates.append((combo, prob_product))

    # 选出置信度 Top-K 的组合
    top_k_sorted = sorted(score_candidates, key=lambda x: x[1], reverse=True)[:k_num]
    results = [item[0] for item in top_k_sorted]

    # 解码并输出
    decoded_results = decode_ad_stra(results)
    for i, decoded in enumerate(decoded_results):
        print(f"Top-{i+1} strategy candidate: {decoded}")

    return results, decoded_results

In [5]:
def ocr_predictor(raw_input):
    # 数值特征归一化
    uv_scaled = uv_scaler.transform(pd.DataFrame({'uv': [raw_input['uv']]}))[0][0]
    search_scaled = search_scaler.transform(pd.DataFrame({'search': [raw_input['search']]}))[0][0]

    # 类别特征编码
    material_encoded = label_mapping['material'][raw_input['material']]
    placement_encoded = label_mapping['placement'][raw_input['placement']]
    payment_encoded = label_mapping['payment'][raw_input['payment']]
    sellingpoint_encoded = label_mapping['sellingpoint'][raw_input['sellingpoint']]

    # 构造模型输入
    input_data = pd.DataFrame([{
        'uv': uv_scaled,
        'search': search_scaled,
        'material': material_encoded,
        'placement': placement_encoded,
        'payment': payment_encoded,
        'sellingpoint': sellingpoint_encoded
    }])

    # 预测
    prediction_scaled = xgb_reg_model.predict(input_data)
    # 反归一化
    prediction = ocr_scaler.inverse_transform([[prediction_scaled[0]]])[0][0]

    return prediction

# Prediction

In [6]:
# 构造一组输入
input_uv = 150
input_search = 450
k_num = 5
multi_k = 4

In [7]:
import psutil

start_time = datetime.datetime.now()

_, decoded_ad_stra = ad_strategy_recommender_with_topk(input_uv, (input_search/100), k_num=k_num*multi_k)

top_k_strategy_ocr = {}

for i in range(0, multi_k * k_num):
    data_input = {
        'uv': input_uv,
        'search': input_search,
        'material': decoded_ad_stra[i][0],
        'placement': decoded_ad_stra[i][1],
        'payment': decoded_ad_stra[i][2],
        'sellingpoint': decoded_ad_stra[i][3]
    }
    ocr_value = ocr_predictor(data_input)
    top_k_strategy_ocr[i] = {
        'input_data': data_input,
        'predicted_ocr': float(ocr_value)
    }

end_time = datetime.datetime.now()
process = psutil.Process(os.getpid())
mem_info = process.memory_info()
print(f"\nMemory usage: {mem_info.rss / 1024 ** 2:.2f} MB")

end_time = datetime.datetime.now()
duration = (end_time - start_time).total_seconds()
print(f"\nTotal time taken: {duration:.4f} seconds")

# 按照 predicted_ocr 从大到小排序并打印，重置 idx 使 OCR 最高的为 Top-1
sorted_results = sorted(top_k_strategy_ocr.items(), key=lambda x: x[1]['predicted_ocr'], reverse=True)

print(f"\nTop-{k_num} AD Strategy Candidates with Predicted OCR:")
n = 1
for rank, (orig_idx, result) in enumerate(sorted_results, start=1):
    if n<=k_num:
        n += 1
        print(f"\nTop-{rank}. AD Strategy Candidate with Predicted OCR")
        print(f"Input Data: ", result['input_data'])
        print("OCR:\t {:.4f}".format(result['predicted_ocr']))
    else:
        break



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Top-1 strategy candidate: ['swf', 'tips', 'cpm', 'Discount']
Top-2 strategy candidate: ['swf', 'tips', 'cpm', 'Spend & Save']
Top-3 strategy candidate: ['swf', 'tips', 'cpm', 'Price Drop']
Top-4 strategy candidate: ['swf', 'tips', 'cpm', 'Gift with Purchase']
Top-5 strategy candidate: ['swf', 'tips', 'cpm', 'Flash Sale']
Top-6 strategy candidate: ['swf', 'tips', 'cpm', 'Cash/Coupon-Back']
Top-7 strategy candidate: ['swf', 'tips', 'roi', 'Discount']
Top-8 strategy candidate: ['swf', 'tips', 'roi', 'Spend & Save']
Top-9 strategy candidate: ['swf', 'tips', 'roi', 'Price Drop']
Top-10 strategy candidate: ['swf', 'tips', 'roi', 'Gift with Purchase']
Top-11 strategy candidate: ['swf', 'tips', 'roi', 'Flash Sale']
Top-12 strategy candidate: ['swf', 'tips', 'roi', 'Cash/Coupon-Back']
Top-13 strategy candidate: ['swf', 'tips', 'cpd', 'Discount']
Top-14 strategy candidate: ['swf', 'tips', 'cpd', 'Spend & Save']
Top-15 strate