# 旅遊行程推薦演算法

本演算法基於使用者的偏好，進行地點評分、群集選擇，並規劃行程。

以下是演算法流程的步驟：

1. **資料擷取**
   取得使用者偏好 (JSON)。同時，從資料庫中獲取住宿、食物、景點相關資料。

2. **偏好評分**
   根據使用者偏好，對住宿、食物、景點各地點進行適合度評分。

3. **資料整合**
   將住宿、食物、景點資料整合為一個統一的資料表。

4. **群集辨識**
   使用地理座標進行分群，生成多個群集 (clusters)。

5. **群集選擇**
   根據旅遊天數，選擇最佳數量的群集 (N 個) 作為遊玩目標。

6. **行程規劃**
   為每個選定的群集安排一天的行程。建立行程資料框 (DataFrame) 以儲存行程細節。

7. **輸出生成**
   將行程傳回。


In [None]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from shapely.geometry import Point
from geopy.distance import geodesic
import warnings
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
warnings.filterwarnings("ignore")

In [None]:
# 1. 資料擷取

# 使用者偏好
pref = {
    "county": "台南市",
    "dates": 3,
    "departure_date": "2023-07-24",
    "return_date": "2023-07-26",

    "hotel_level": 5,
    "food_level": 5,
    "viewpoint_level": 5,

    "hotel_like_tag": ["民宿", ],
    "hotel_price_tag": 10000,
    "hotel_other_tag": ["SPA", "停車場"],

    "food_taste_tag": ["燒烤", "日式", ],
    "food_price_tag": 2,
    "food_other_tag": ["親子餐廳"],

    "viewpoint_other_tag": ["戶外活動"],
    "transportation": 1,
}

# 替換成資料庫
df_viewpoint = pd.read_excel("df_viewpoint.xlsx")
df_viewpoint = df_viewpoint[df_viewpoint["city"] == pref['county']]
df_viewpoint = gpd.GeoDataFrame(df_viewpoint, crs="EPSG:4326", geometry=[Point(lnglat) for lnglat in zip(df_viewpoint['lng'], df_viewpoint['lat'])])

df_food = pd.read_excel("df_food.xlsx")
df_food = df_food[df_food["city"] == pref['county']]
df_food = gpd.GeoDataFrame(df_food, crs="EPSG:4326", geometry=[Point(lnglat) for lnglat in zip(df_food['lng'], df_food['lat'])])

df_hotel = pd.read_excel("df_hotel.xlsx")
df_hotel = df_hotel[df_hotel["city"] == pref['county']]
df_hotel = gpd.GeoDataFrame(df_hotel, crs="EPSG:4326", geometry=[Point(lnglat) for lnglat in zip(df_hotel['lng'], df_hotel['lat'])])


In [None]:
# 2. 偏好評分

from my_score import calculate_price_score, calculate_price_score_2, calculate_tag_score

# 給Hotel分數
df_hotel['like_score'] = calculate_tag_score(df_hotel['keyword'], pref["hotel_like_tag"])
df_hotel['price_score'] = calculate_price_score(df_hotel['lower_price'], df_hotel['ceiling_price'], pref['hotel_price_tag'])
df_hotel['other_score'] = calculate_tag_score(df_hotel['tag'], pref["hotel_other_tag"])

# 計算Hotel加權平均分數 
weights = pd.Series([0.4, 0.3, 0.3])
df_hotel['overall_score'] = np.average(df_hotel[['like_score', 'price_score', 'other_score']], axis=1, weights=weights)

# 給Food分數
df_food['taste_score'] = calculate_tag_score(df_food['tag'], pref["food_taste_tag"])
df_food['price_score'] = calculate_price_score_2(df_food['price_level'], pref['food_price_tag'])
df_food['other_score'] = calculate_tag_score(df_food['rest_tag'], pref["food_other_tag"])

# 計算Food加權平均分數 
weights = pd.Series([0.4, 0.3, 0.3])
df_food['overall_score'] = np.average(df_food[['taste_score', 'price_score', 'other_score']], axis=1, weights=weights)


# 給ViewPoint分數
df_viewpoint['other_score'] = 100
df_viewpoint['transportation_score'] = 100

# 計算ViewPoint加權平均分數 
weights = pd.Series([0.4, 0.6])
df_viewpoint['overall_score'] = np.average(df_viewpoint[['other_score', 'transportation_score']], axis=1, weights=weights)




In [None]:
# 3. 資料整合
df_food['source']='food'
df_viewpoint['source']='viewpoint'
df_hotel['source']='hotel'
concat_df = pd.concat([df_food, df_viewpoint, df_hotel])


In [None]:
# 4. 群集辨識

# 分出群集: 用經緯度 將所有的地點進行分群(clusters)，並畫 map1 以檢視"所有的"clusters分布的狀況。(地圖僅做檢查用 最後要拉掉)
import geopandas as gpd
from sklearn.cluster import DBSCAN
from sklearn.metrics.pairwise import haversine_distances
from math import radians
import random
import folium
from folium.plugins import MarkerCluster

geo_df = pd.concat([df_food, df_viewpoint, df_hotel])

# 将经纬度转换为弧度
geo_df['lat_rad'] = geo_df['lat'].apply(radians)
geo_df['lng_rad'] = geo_df['lng'].apply(radians)

# 计算距离矩阵
haversine_matrix = haversine_distances(
    geo_df[['lat_rad', 'lng_rad']].values,
    geo_df[['lat_rad', 'lng_rad']].values
) * 6371000.0  # 6371000.0 is Earth's radius in meters

# 运行DBSCAN进行聚类
dbscan = DBSCAN(eps=2000, min_samples=3, metric='precomputed')
geo_df['cluster'] = dbscan.fit_predict(haversine_matrix)

# 分析聚类结果
num_clusters = len(set(geo_df['cluster'])) - (1 if -1 in geo_df['cluster'] else 0)
print(f"Number of clusters: {num_clusters}")

# 创建地图1：显示所有地点的地图
m1 = folium.Map(location=[geo_df['lat'].mean(), geo_df['lng'].mean()], zoom_start=12)

# 添加 MarkerCluster 层
marker_cluster1 = MarkerCluster().add_to(m1)

# 在地图上添加地点
for idx, row in geo_df.iterrows():
    folium.Marker(
        location=[row['lat'], row['lng']],
        popup=row['name'],
        icon=None  # 使用默认图标
    ).add_to(marker_cluster1)

# 保存地图1为HTML文件 (或者 display)
m1.save("map1_all_clusters.html")
# display(m1)




In [None]:
# 5. 群集選擇

# 挑選群集: 旅遊天數N天則挑最大的 N 個 clusters，並畫 map2 以檢視"所挑選的"clusters分布的狀況。(地圖僅做檢查用 最後要拉掉)
cluster_sizes = geo_df['cluster'].value_counts()

# 打印所有群集的大小和信息
print("Cluster Sizes and Information:")
for cluster_id, size in cluster_sizes.items():
    cluster_points = geo_df[geo_df['cluster'] == cluster_id]
    avg_score = cluster_points['overall_score'].mean()
    
    print(f"Cluster {cluster_id}:")
    print(f" - Size: {size} points")
    print(f" - Average Overall Score: {avg_score:.2f}")
    print(f" - Points:")
    for idx, row in cluster_points.iterrows():
        print(f"   - {row['name']} (Lat: {row['lat']}, Lng: {row['lng']}, Overall Score: {row['overall_score']})")
    print("")
    
# 找出最大的 N 个群集 (N 用旅遊天數決定)
n = pref['dates']
cluster_sizes = geo_df['cluster'].value_counts()
largest_clusters = cluster_sizes.nlargest(n).index


# 创建地图2：显示最大3个群集的地点
m2 = folium.Map(location=[geo_df['lat'].mean(), geo_df['lng'].mean()], zoom_start=12)

# 添加 MarkerCluster 层
marker_cluster2 = MarkerCluster().add_to(m2)

# 在地图上添加最大3个群集的地点
for cluster_id in largest_clusters:
    cluster_points = geo_df[geo_df['cluster'] == cluster_id]
    for idx, row in cluster_points.iterrows():
        folium.Marker(
            location=[row['lat'], row['lng']],
            popup=row['name'],
            icon=None  # 使用默认图标
        ).add_to(marker_cluster2)

# 保存地图2为HTML文件 (或者 display)
m2.save("map2.html")
# display(m2)

In [None]:
# 6. 行程規劃


"""

geo_df 範例資料:
    name	lat	lng	geometry	overall_score	source
    6	和牛涮 台南中華西店	22.9935	120.187	POINT (120.18700 22.99350)	午餐,晚餐	44.0	food
    42	小豪洲沙茶爐中正總店	22.9932	120.200	POINT (120.20000 22.99320)	午餐,晚餐	44.0	food
    63	輕井澤鍋物 台南店	22.9869	120.186	POINT (120.18600 22.98690)	午餐,晚餐	44.0	food
    73	阿裕牛肉涮涮鍋一崑崙店	22.9404	120.261	POINT (120.26100 22.94040)	午餐,晚餐	38.0	food
    75	是吉祥精緻火鍋館	22.9780	120.186	POINT (120.18600 22.97800)	午餐,晚餐	44.0	food
    ...	...	...	...	...	...	...
    7 助哥親民民宿	22.9970	120.195	POINT (120.19500 22.99700)	Nan	55.1	hotel
    31 開山芳庭	22.9908	120.207	POINT (120.20700 22.99080)	Nan	26.0	hotel
    63 安平安逸	23.0024	120.158	POINT (120.15800 23.00240)	Nan	44.0	hotel
    ...	...	...	...	...	...	...
    88 某景點	22.9970	120.195	POINT (120.19500 22.99700)	Nan	55.1	viewpoint



挑選方式說明:
    pref的天數假設是3天，那麼 第一天的行程會在最大的cluster，第二天在第二大的cluster...以此類推。
    每一天的地點都在同一個cluster中挑選
    每天行程  viewpoint1 -> food1 -> viewpoint2 -> viewpoint3 -> food3 -> hotel 

詳細說明:
    viewpoint1  :  挑選source=veiwpoint 中 ， overall_score 欄位值最大的那一個。需印出此起始點的經緯度。
    food1 : source=food 中，  篩選出距離viewpoint1  500公尺內的地點(若無則每次加大100公尺直到有至少10筆資料。從篩選出的資料中，挑選 overall_score 欄位值最大的那一個。需印出與上一個地點的距離。
    viewpoint2: source=veiwpoint 中， 篩選出距離food1 600公尺內的地點(若無則每次加大200公尺直到有至少2筆資料。從篩選出的資料中，挑選 overall_score 欄位值最大的那一個。需印出與上一個地點的距離。不得重複挑選。
    viewpoint3: source=veiwpoint  中，篩選出距離viewpoint2 700公尺內的地點(若無則每次加大300公尺直到有至少2筆資料。從篩選出的資料中，挑選 overall_score 欄位值最大的那一個。需印出與上一個地點的距離。不得重複挑選。
    food2 : source=food  中，篩選出距離viewpoint3 500公尺內的地點(若無則每次加大100公尺直到有至少10筆資料。從篩選出的資料中，挑選 overall_score 欄位值最大的那一個。需印出與上一個地點的距離。
    hotel: source=hotel  中，篩選出距離viewpoint3 600公尺內的地點(若無則每次加大300公尺直到有至少15筆資料。從篩選出的資料中，挑選 overall_score 欄位值最大的那一個。需印出與上一個地點的距離。


"""

# 行程安排: 一天玩一個cluster，該日的地點都在此cluster中，以df儲存行程資料、畫 map3 以檢視所挑選的地點分布的狀況。(地圖僅做檢查用 最後要拉掉)

# 创建地图
m3 = folium.Map(location=[geo_df['lat'].mean(), geo_df['lng'].mean()], zoom_start=12)

# 设置不同的颜色，用于不同天的行程
colors = ['red', 'blue', 'green']

# 初始化每天的行程
daily_itineraries = []

# 初始化一个 DataFrame 以存储选定的行程
selected_itinerary_df = pd.DataFrame(columns=['day', 'step', 'source', 'name', 'lat', 'lng', 'distance']) # distance (跟上一步的距離)

# 根据聚类拆分数据
cluster_sizes = geo_df['cluster'].value_counts()
largest_clusters = cluster_sizes.nlargest(pref['dates']).index

# 遍历每个聚类
for day, cluster_id in enumerate(largest_clusters):
    cluster_color = colors[day % len(colors)]
    cluster_data = geo_df[geo_df['cluster'] == cluster_id]
    
    # 存储每天的行程点和信息
    daily_points = []
    daily_info = []
    step = 1
    
    # 挑選viewpoint1
    viewpoint1 = cluster_data[cluster_data['source'] == 'viewpoint'].nlargest(1, 'overall_score').iloc[0]
    daily_points.append(viewpoint1)
    daily_info.append(f"Source: {viewpoint1['source']}, Distance: 0 meters")
    
    # 挑選food1
    distance_threshold = 500
    food1 = cluster_data[(cluster_data['source'] == 'food') &
                         (cluster_data.geometry.distance(viewpoint1.geometry)*111.32*1000 <= distance_threshold)]
    while len(food1) < 1:
        distance_threshold += 100
        food1 = cluster_data[(cluster_data['source'] == 'food') &
                             (cluster_data.geometry.distance(viewpoint1.geometry)*111.32*1000 <= distance_threshold)]
    
    food1 = food1.nlargest(1, 'overall_score').iloc[0]
    daily_points.append(food1)
    distance_to_previous = food1.geometry.distance(viewpoint1.geometry)*111.32*1000
    daily_info.append(f"Source: {food1['source']}, Distance: {distance_to_previous:.2f} meters")
    
    # 挑選viewpoint2
    distance_threshold = 600
    viewpoint2 = cluster_data[(cluster_data['source'] == 'viewpoint') &
                              (cluster_data.geometry.distance(food1.geometry)*111.32*1000 <= distance_threshold) &
                              (cluster_data['id'] != viewpoint1['id'])].nlargest(1, 'overall_score').iloc[0]
    daily_points.append(viewpoint2)
    distance_to_previous = viewpoint2.geometry.distance(food1.geometry)*111.32*1000
    daily_info.append(f"Source: {viewpoint2['source']}, Distance: {distance_to_previous:.2f} meters")
    
    # 挑選viewpoint3
    distance_threshold = 700
    viewpoint3 = cluster_data[(cluster_data['source'] == 'viewpoint') &
                              (cluster_data.geometry.distance(viewpoint2.geometry)*111.32*1000 <= distance_threshold) &
                              (cluster_data['id'] != viewpoint1['id']) &
                              (cluster_data['id'] != viewpoint2['id'])].nlargest(1, 'overall_score').iloc[0]
    daily_points.append(viewpoint3)
    distance_to_previous = viewpoint3.geometry.distance(viewpoint2.geometry)*111.32*1000
    daily_info.append(f"Source: {viewpoint3['source']}, Distance: {distance_to_previous:.2f} meters")
    
    # 挑選food2
    distance_threshold = 500
    food2 = cluster_data[(cluster_data['source'] == 'food') &
                         (cluster_data.geometry.distance(viewpoint3.geometry)*111.32*1000 <= distance_threshold) &
                         (cluster_data['id'] != food1['id'])].nlargest(1, 'overall_score').iloc[0]
    daily_points.append(food2)
    distance_to_previous = food2.geometry.distance(viewpoint3.geometry)*111.32*1000
    daily_info.append(f"Source: {food2['source']}, Distance: {distance_to_previous:.2f} meters")
    
    # 挑選hotel
    distance_threshold = 600
    hotel = cluster_data[(cluster_data['source'] == 'hotel') &
                         (cluster_data.geometry.distance(viewpoint3.geometry)*111.32*1000 <= distance_threshold)].nlargest(1, 'overall_score').iloc[0]
    daily_points.append(hotel)
    distance_to_previous = hotel.geometry.distance(viewpoint3.geometry)*111.32*1000
    daily_info.append(f"Source: {hotel['source']}, Distance: {distance_to_previous:.2f} meters")
    
    # 将每天的行程点存入每天的行程表中
    daily_itineraries.append((daily_points, daily_info))
    
    # 将每天的行程信息添加到 DataFrame
    for point, info in zip(daily_points, daily_info):
        selected_itinerary_df = pd.concat([selected_itinerary_df, pd.DataFrame([{'day': day + 1,
                                                              'name': point['name'],
                                                              'lat': point['lat'],
                                                              'lng': point['lng'],
                                                              'source': point['source'],
                                                              'distance': info.split(": ")[-1],
                                                              'step': step}])], ignore_index=True)

        step += 1
    
    # 在地图上标示每天的行程点
    marker_cluster = MarkerCluster().add_to(m3)
    for point, info in zip(daily_points, daily_info):
        folium.Marker(
            location=[point['lat'], point['lng']],
            popup=f"{point['name']} - {info}",
            icon=folium.Icon(color=cluster_color)
        ).add_to(marker_cluster)

# 保存地图为HTML文件 (或者 display)
m3.save("map3.html")
# display(m3)


In [None]:
# 7. 輸出生成


# Output DataFrame:
selected_itinerary_df
