In [4]:
import json
import pandas as pd
import math

# 0. 先把 log 檔讀進來
with open('log_202409_part1.json', 'r', encoding='utf-8') as f:
    logs = json.load(f)

# 1. 讀使用者＋搜尋紀錄（同目錄下）
df_user_search = pd.read_csv('使用者資料（含搜尋紀錄）.csv', index_col=0)
# 2. 讀產品資料
df_products     = pd.read_csv('產品資料.csv')

# 3. 讀地點標籤 JSON
with open('updated_international_location_tags.json', 'r', encoding='utf-8') as f:
    location_tags = json.load(f)

def flatten_locations(d):
    kws = set()
    if isinstance(d, dict):
        for v in d.values(): kws |= flatten_locations(v)
    elif isinstance(d, list):
        kws |= set(d)
    return kws

location_keywords = flatten_locations(location_tags)

# 4. 活動標籤抽取
tags_dict = {
    # 貼上之前的 tags_dict 內容
}
tag_lookup = [(t, kw) for t, kws in tags_dict.items() for kw in kws]
def extract_activity_tags(name):
    matches = {t for t, kw in tag_lookup if kw in name}
    return ', '.join(sorted(matches)) if matches else '其他'

# 5. 價格區間函式
def get_price_range(price):
    if pd.isna(price): return None
    lower = (int(price) // 5000) * 5000
    return f"{lower}-{lower+4999}"

# 6. 使用者平均價格區間
user_prices = {}
for entry in logs:
    u = entry.get("luid")
    p = entry.get("prod_info", {}).get("ProdPrice")
    if u and p is not None:
        user_prices.setdefault(u, []).append(p)

user_price_range = {
    u: get_price_range(sum(v)/len(v))
    for u, v in user_prices.items()
}

# 7. 點擊次數（view）
click_counts = {}
for entry in logs:
    u  = entry.get("luid")
    pid = entry.get("prod_info", {}).get("ProdNo")
    if u and pid:
        click_counts[(u, pid)] = click_counts.get((u, pid), 0) + 1

# 8. 最終推薦函式
def recommend_for_user(user_id, top_n=5, price_w=1.0, click_w=0.5):
    # 搜尋標籤
    sr = df_user_search.loc[df_user_search['使用者ID']==user_id, '搜尋紀錄']
    kws = sr.iloc[0].split(', ') if not sr.empty and sr.iloc[0]!='無' else []
    locs = [kw for kw in kws if kw in location_keywords]
    acts = extract_activity_tags(' '.join(kws)).split(', ') if kws and extract_activity_tags(' '.join(kws))!='其他' else []
    search_tags = set(locs + acts)

    # 瀏覽標籤
    viewed = [e['prod_info']['ProdNo'] for e in logs
              if e.get('luid')==user_id and e.get('prod_info',{}).get('ProdNo')]
    viewed_tags = set()
    for pid in viewed:
        row = df_products[df_products['產品編號']==pid]
        if not row.empty:
            viewed_tags |= set(row['標籤'].iloc[0].split(', '))

    interest = search_tags | viewed_tags
    upr = user_price_range.get(user_id)

    recs = []
    for _, pr in df_products.iterrows():
        pid = pr['產品編號']
        if pid in viewed: continue
        p_tags = set(pr['標籤'].split(', '))
        ts = len(p_tags & interest)
        if ts == 0: continue
        ps = 1 if upr and get_price_range(pr['價格']) == upr else 0


In [6]:
import pandas as pd
import json
import math

# 1. 讀取 CSV 檔案
df_user_search = pd.read_csv('使用者資料（含搜尋紀錄）.csv', index_col=0)
df_products = pd.read_csv('產品資料.csv')

# 2. 載入並平坦化地點標籤資料
with open('updated_international_location_tags.json', 'r', encoding='utf-8') as f:
    location_tags = json.load(f)

def flatten_locations(d):
    kws = set()
    if isinstance(d, dict):
        for v in d.values():
            kws |= flatten_locations(v)
    elif isinstance(d, list):
        kws |= set(d)
    return kws

location_keywords = flatten_locations(location_tags)

# 3. 定義活動標籤抽取
tags_dict = {
    "主題樂園": ["環球影城","迪士尼","樂園","遊樂園"],
    "自然景觀": ["峽灣","森林","湖","溫泉","瀑布","山","海灘","草原","自然","極光","星空","雲海","楓","櫻","櫻花","楓葉","觀景","落羽松"],
    "文化歷史": ["博物館","神社","寺","古城","遺跡","文化","歷史","古蹟","宮殿","皇宮","教堂","城堡","古老","古代","古文明","古文物","祭典","世界遺產"],
    "美食": ["螃蟹","美食","饗宴","餐廳","料理","海鮮","燒肉","壽司","拉麵","咖哩","甜點"],
    "購物": ["購物","市場","商場","百貨"],
    "親子旅遊": ["親子","動物園","兒童","家庭","水族館"],
    "豪華": ["五星","高級","豪華","度假","渡假"],
    "海島旅遊": ["海灘","潛水","島","度假村","海島","海洋","浮潛","海底"],
    "雪景": ["滑雪","冰川","極光","雪景","戲雪","冰上活動"],
    "火車旅遊": ["火車","列車","鐵道"],
    "溫泉之旅": ["溫泉","溫泉鄉","溫泉區","溫泉街"],
    "藝文體驗": ["音樂會","表演","劇場","歌劇","美術館","藝文","展覽","藝術"],
    "戶外活動": ["登山","健行","露營","野餐","攀岩","滑索","泛舟","獨木舟"],
    "網美打卡": ["打卡","網美","IG","拍照","玻璃屋","天空之鏡"],
    "婚禮蜜月": ["蜜月","婚禮","情侶","戀人","浪漫","紀念日"],
    "夜間活動": ["夜景","夜市","夜生活","燈光秀","夜拍","夜遊"]
}
tag_lookup = [(t, kw) for t, kws in tags_dict.items() for kw in kws]

def extract_activity_tags(name: str) -> str:
    matches = {t for t, kw in tag_lookup if kw in name}
    return ', '.join(sorted(matches)) if matches else '其他'

# 4. 價格區間函式
def get_price_range(price):
    if pd.isna(price):
        return None
    lower = (int(price) // 5000) * 5000
    return f"{lower}-{lower+4999}"

# 5. 計算使用者平均價格區間 (來自 logs 變數)
# logs 需事先定義並包含歷史瀏覽/購買「prod_info.ProdNo」與「prod_info.ProdPrice」
user_prices = {}
for entry in logs:
    luid = entry.get("luid")
    price = entry.get("prod_info", {}).get("ProdPrice")
    if luid and price is not None:
        user_prices.setdefault(luid, []).append(price)
user_price_range = {
    u: get_price_range(sum(prices)/len(prices))
    for u, prices in user_prices.items()
}

# 6. 計算歷史點擊次數 (view) 加權
click_counts = {}
for entry in logs:
    luid = entry.get("luid")
    prod_no = entry.get("prod_info", {}).get("ProdNo")
    # 假設所有日誌皆算作 view
    if luid and prod_no:
        click_counts[(luid, prod_no)] = click_counts.get((luid, prod_no), 0) + 1

# 7. 推薦函式
def recommend_for_user(user_id, top_n=5, price_w=1.0, click_w=0.5):
    # 搜尋標籤
    sr = df_user_search.loc[df_user_search['使用者ID']==user_id, '搜尋紀錄']
    keywords = sr.iloc[0].split(', ') if not sr.empty and sr.iloc[0]!='無' else []
    loc_tags = [kw for kw in keywords if kw in location_keywords]
    act_tags = extract_activity_tags(' '.join(keywords)).split(', ') if keywords and extract_activity_tags(' '.join(keywords))!='其他' else []
    search_tags = set(loc_tags + act_tags)
    # 瀏覽標籤
    viewed = [e['prod_info']['ProdNo'] for e in logs if e.get('luid')==user_id and e.get('prod_info', {}).get('ProdNo')]
    viewed_tags = set()
    for pid in viewed:
        row = df_products.loc[df_products['產品編號']==pid]
        if not row.empty:
            viewed_tags |= set(row['標籤'].iloc[0].split(', '))
    interest_tags = search_tags | viewed_tags
    upr = user_price_range.get(user_id)
    recs=[]
    for _, prod in df_products.iterrows():
        pid=prod['產品編號']
        if pid in viewed: continue
        p_tags=set(prod['標籤'].split(', '))
        tag_score=len(p_tags & interest_tags)
        if tag_score==0: continue
        price_score = 1 if upr and get_price_range(prod['價格'])==upr else 0
        click_score = click_counts.get((user_id,pid), 0)
        total = tag_score + price_w*price_score + click_w*click_score
        recs.append({
            '產品編號': pid,
            '產品名稱': prod['產品名稱'],
            '價格': prod['價格'],
            '標籤': prod['標籤'],
            'tag_score': tag_score,
            'price_score': price_score,
            'click_score': click_score,
            'total_score': total
        })
    return pd.DataFrame(recs).sort_values('total_score', ascending=False).head(top_n)

# 8. 使用示例
if __name__ == '__main__':
    uid = input("請輸入使用者ID：")
    print(recommend_for_user(uid, top_n=5).to_string(index=False))


        產品編號                                                     產品名稱      價格                                       標籤  tag_score  price_score  click_score  total_score
25JF101BRK-K            九州｜自然野生動物園.叢林巴士.金鱗湖~湯布院.南阿蘇鐵道.居酒屋料理.溫泉五日｜高雄直飛 37900.0 湯布院, 溫泉之旅, 火車旅遊, 美食, 自然景觀, 親子旅遊, 金鱗湖, 高雄          8            1            0          9.0
25JF205BRK-K            九州｜自然野生動物園.叢林巴士.金鱗湖~湯布院.南阿蘇鐵道.居酒屋料理.溫泉五日｜高雄直飛 40900.0 湯布院, 溫泉之旅, 火車旅遊, 美食, 自然景觀, 親子旅遊, 金鱗湖, 高雄          8            0            0          8.0
25JF131ITK-K      迎春折2000｜九州｜自然野生動物園.叢林巴士.金鱗湖湯布院.雙鐵道.螃蟹吃到飽.雙溫泉五日｜高雄直飛 62900.0 湯布院, 溫泉之旅, 火車旅遊, 美食, 自然景觀, 親子旅遊, 金鱗湖, 高雄          8            0            0          8.0
24JFD14BRK-K 九州｜早鳥贈小費｜特色列車由布院之森．入住五星酒店品三大蟹．湯布院金鱗湖．和服體驗．海地獄．溫泉五日【高雄直飛】 38900.0  海地獄, 湯布院, 溫泉之旅, 火車旅遊, 自然景觀, 豪華, 金鱗湖, 高雄          6            1            0          7.0
25JF104BRK-K 九州｜早鳥贈小費｜特色列車由布院之森．入住五星酒店品三大蟹．湯布院金鱗湖．和服體驗．海地獄．溫泉五日【高雄直飛】 39900.0  海地獄, 湯布院, 溫泉之旅, 火車旅遊, 自然景觀, 豪華, 金鱗湖, 高雄          6            1            0

In [7]:
user_id = '005f3246-6cfd-474f-a61f-4f2173a32603'
user_data = df_user_search[df_user_search['使用者ID'] == user_id]
print(user_data)

                                  使用者ID                  地點／景點  \
0  005f3246-6cfd-474f-a61f-4f2173a32603  湯布院, 熊本, 熊本城, 金鱗湖, 高雄   

                           活動           購買區間 搜尋紀錄  
0  溫泉之旅, 火車旅遊, 美食, 自然景觀, 親子旅遊  35000 - 39999  冰魚節  
