In [None]:
from faker import Faker
import random
import csv
import uuid
from concurrent.futures import ThreadPoolExecutor

# 初始化Faker
fake = Faker('zh_TW')

def generate_taiwan_mobile_number():
    return '9' + ''.join([str(random.randint(0, 9)) for _ in range(8)])

def generate_member(existing_emails):
    email_providers = ['gmail.com', 'yahoo.com', 'hotmail.com', 'outlook.com', 'icloud.com']
    email_provider = random.choices(email_providers, weights=[10, 2, 1, 1, 1], k=1)[0]
    while True:
        email = fake.user_name() + str(uuid.uuid4().hex[:4]) + '@' + email_provider
        if email not in existing_emails:
            existing_emails.add(email)
            break

    return {
        'Role': 'Member',
        'Password': fake.password(length=8, special_chars=False, digits=True, upper_case=True, lower_case=True),
        'Name': fake.name(),  # 隨機生成中文名字
        'Phone': generate_taiwan_mobile_number(),
        'Gender': random.choice(['M', 'F']),
        'Birthdate': fake.date_of_birth(minimum_age=15, maximum_age=50).strftime('%Y-%m-%d'),
        'Email': email,
        'Points': random.randint(0, 1000),
        'Status': random.choices(['Active', 'Inactive'], weights=[800, 1], k=1)[0],
    }

def generate_members_parallel(num_members, output_file):
    existing_emails = set()
    members = []
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(generate_member, existing_emails) for _ in range(num_members)]
        for future in futures:
            members.append(future.result())

    # 寫入CSV
    with open(output_file, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=['Role', 'Password', 'Name', 'Phone', 'Gender', 'Birthdate', 'Email', 'Points', 'Status'])
        writer.writeheader()
        writer.writerows(members)

# 執行
generate_members_parallel(113652, 'members1.csv')


In [70]:
# 生成消費紀錄

import random
from faker import Faker
from datetime import datetime, timedelta

# 初始化 Faker 用來生成隨機時間
fake = Faker()

# 隨機生成消費紀錄
def generate_consumption_records(num_records=262144):
    records = []
    # 設定日期範圍：這學期
    end_date = datetime.today()
    start_date = end_date - timedelta(days=94)

    # 設定時間範圍 11:00 到 20:00
    start_time = timedelta(hours=11)  # 11:00
    end_time = timedelta(hours=20)    # 20:00

    restaurant_ids = [101, 102, 103, 104, 105, 107, 108, 109, 110, 123, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 232, 233, 234, 235, 236, 237, 238, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 401, 402, 403, 405, 406, 407, 408, 410, 411, 412, 413, 414, 415, 416, 417, 419, 501, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 526, 527, 528, 601, 604, 605, 606, 607, 609, 610, 611, 612, 613, 614, 703, 736, 710, 712, 713, 709, 714, 715, 716, 717, 718, 719, 720, 721, 817, 818, 806, 807, 809, 821, 823, 824, 810, 822, 825, 811, 812, 813, 814, 815, 820, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 1401, 1402, 1403, 1404, 1405, 1406, 1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1704, 1705, 1706, 1707, 1708, 1709, 1710, 1711, 1712, 1713, 1714, 1715, 1801, 1802, 1803, 1804, 1805, 1806, 1807, 1901, 1902, 1903, 1904, 1905, 2001, 2002, 2003, 2101, 2102, 2103, 2104, 2301, 2302, 2303, 2304, 2401]


    for _ in range(num_records):
        # 隨機生成會員 ID（1 ~ 112652）
        member_id = random.randint(1, 112652)

        # 隨機生成餐廳 ID
        restaurant_id = random.choice(restaurant_ids)

        # 隨機生成消費時間，範圍為過去 90 天內的 11:00 到 20:00
        random_date = start_date + timedelta(seconds=random.randint(0, int((end_date - start_date).total_seconds())))
        
        # 限制時間範圍在 11:00 到 20:00 之間
        random_time_in_range = random_date.replace(hour=11, minute=0, second=0, microsecond=0) + timedelta(seconds=random.randint(0, int((end_time - start_time).total_seconds())))

        formatted_time = random_time_in_range.strftime('%Y-%m-%d %H:%M:%S')  # 格式化為 YYYY-MM-DD HH:MM:SS


        records.append({
            'Restaurant_id': restaurant_id,
            'Member_Id': member_id,
            'DateTime': formatted_time,
        })
    
    return records

# 生成消費紀錄
consumption_records = generate_consumption_records(262144)

# 顯示部分生成的紀錄
for record in consumption_records[:5]:  # 顯示前5筆資料
    print(record)


# 將資料寫入 CSV 文件
with open('consumption_records.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['Restaurant_id', 'Member_Id', 'DateTime'])
    writer.writeheader()  # 寫入標題
    writer.writerows(consumption_records)  # 寫入消費紀錄

{'Restaurant_id': 907, 'Member_Id': 42030, 'DateTime': '2024-10-31 11:36:31'}
{'Restaurant_id': 415, 'Member_Id': 4609, 'DateTime': '2024-11-14 11:33:00'}
{'Restaurant_id': 1807, 'Member_Id': 43777, 'DateTime': '2024-09-07 16:12:14'}
{'Restaurant_id': 933, 'Member_Id': 104492, 'DateTime': '2024-11-07 16:55:18'}
{'Restaurant_id': 817, 'Member_Id': 4662, 'DateTime': '2024-10-10 19:02:43'}


In [6]:
# 從現有的消費紀錄隨機生成消費明細

import pandas as pd
import random

# 載入資料
db_final = pd.ExcelFile("DB_final.xlsx")

# 讀取各表
consumption_record = db_final.parse("Consumption_Record")
restaurant = db_final.parse("Restaurant")
menu = db_final.parse("Menu")

# 檢查表是否為空
if consumption_record.empty:
    raise ValueError("Consumption_Record 表是空的，請檢查資料來源。")
if restaurant.empty:
    raise ValueError("Restaurant 表是空的，請檢查資料來源。")
if menu.empty:
    raise ValueError("Menu 表是空的，請檢查資料來源。")

# 創建 Compose 表
def generate_compose(consumption_record, restaurant, menu, num_records=548574):
    compose_data = []

    # 建立 Consumption_Record 與 BrandID 的對應關係
    consumption_brand_map = consumption_record.merge(restaurant, on="Restaurant_id")[["Consumption_id", "Brand_id"]]

    # 建立 BrandID 與 MenuID 的對應集合，確保 BrandID 與 MenuID 正確匹配
    brand_menu_map = menu.groupby("BrandID")["MenuID"].apply(list).to_dict()

    for _ in range(num_records):
        # 隨機選擇一個 Consumption_Record_id
        if consumption_record.empty:
            raise ValueError("Consumption_Record 表在過程中出現空數據，請檢查資料來源。")
        
        consumption_record_row = consumption_record.sample(1).iloc[0]
        consumption_record_id = consumption_record_row["Consumption_id"]

        # 取得關聯的 BrandID
        brand_row = consumption_brand_map[consumption_brand_map["Consumption_id"] == consumption_record_id]
        if brand_row.empty:
            continue  # 如果關聯的 BrandID 不存在，跳過此次迭代

        brand_id = brand_row["Brand_id"].values[0]

        # 確保該 BrandID 有有效的 MenuID
        if brand_id in brand_menu_map and len(brand_menu_map[brand_id]) > 0:
            # 從對應 BrandID 的有效 MenuID 中隨機選取
            menu_id = random.choice(brand_menu_map[brand_id])
            
            # 隨機生成數量
            quantity = random.randint(1, 5)

            compose_data.append({
                "Consumption_Record_id": consumption_record_id,
                "BrandID": brand_id,
                "MenuID": menu_id,
                "Quantity": quantity
            })
        else:
            # 跳過無有效 MenuID 的情況
            continue
    
    return pd.DataFrame(compose_data)

# 生成 Compose 表
compose_df = generate_compose(consumption_record, restaurant, menu)

# 儲存生成的 Compose 表
compose_df.to_excel("Compose_Generated.xlsx", index=False)

print("Compose 表生成完成，已儲存至 'Compose_Generated.xlsx'")


KeyboardInterrupt: 

In [23]:
# 計算價錢

import pandas as pd

# 讀取 Excel 中的資料表
file_path = "DB_final.xlsx"
compose_df = pd.read_excel(file_path, sheet_name="Compose")
menu_df = pd.read_excel(file_path, sheet_name="Menu")
consumption_record_df = pd.read_excel(file_path, sheet_name="Consumption_Record")

# 合併 Compose 表與 Menu 表，根據 BrandID 和 MenuID
merged_df = compose_df.merge(menu_df, on=["Brand_ID", "Menu_ID"], how="left")

# 確保資料合併成功，並計算單品消費金額
merged_df["Item_Total"] = merged_df["Quantity"] * merged_df["Price"]

# 計算每筆 Consumption_Record_id 的總金額
total_amount_df = merged_df.groupby("Consumption_Record_ID")["Item_Total"].sum().reset_index()
total_amount_df.rename(columns={"Item_Total": "Total_Amount"}, inplace=True)

# 合併計算的金額回 Consumption_Record 表
consumption_record_df = consumption_record_df.merge(total_amount_df, left_on="Consumption_Record_ID", right_on="Consumption_Record_ID", how="left")

# 用計算的金額更新 Amount 欄位
consumption_record_df["Amount"] = consumption_record_df["Total_Amount"]

# 移除不必要的欄位
consumption_record_df.drop(columns=["Consumption_Record_ID", "Total_Amount"], inplace=True)

# 儲存回 Excel 或檢視結果
consumption_record_df.to_excel("Updated_Consumption_Record.xlsx", index=False)
print(consumption_record_df.head())


   Restaurant_ID  Member_ID            DateTime  Amount
0            322      31289 2024-09-02 11:00:03    2672
1            924     109179 2024-09-02 11:00:40    2131
2            326      44090 2024-09-02 11:00:47    3800
3            326      58187 2024-09-02 11:00:48    3663
4            904      42788 2024-09-02 11:00:52      87


In [4]:
import pandas as pd
import random

# 文件路徑
file_path = "DB_final.xlsx"

# 60 個隨機評價句子
feedback_sentences = [
    "菜色豐富且份量足夠，非常滿意。", "餐廳環境非常舒適，光線剛好。",
    "服務生態度非常親切，令人感到溫暖。", "餐點味道出乎意料地好，值得推薦。",
    "飲品的選擇多樣化，口感很棒。", "主廚推薦的菜品非常美味，沒有踩雷！",
    "孩子們非常喜歡這裡的氛圍和食物。", "甜點部分非常精緻且好吃，下次還會點。", "用餐區域的佈置非常典雅，讓人心情愉快。",
    "這裡的價格實在太實惠了，超值！", "湯品的味道濃郁，讓人回味無窮。", "餐廳的位置非常方便，交通便利。", "服務生非常細心，照顧到了每個需求。",
    "食材的新鮮度無可挑剔，口感非常好。", "用餐過程中非常順利，感謝工作人員。", "菜單設計清楚明瞭，選擇起來很方便。", "特別喜歡這裡的季節限定菜，太棒了！",
    "飲料的創意搭配令人驚豔。", "非常適合家庭聚餐，氣氛很融洽。", "感謝服務員提供的貼心服務，下次還會再來。",
    "餐點的烹飪方式很有特色，口感很好。", "裝潢設計讓人眼前一亮，非常喜歡。", "整體的用餐經驗非常愉快。", "這裡的餐具設計也非常用心。",
    "餐廳提供了非常棒的拍照區域，值得一去！", "服務生的專業程度讓人印象深刻。", "餐廳環境整潔，感覺非常安心。",
    "特別推薦這裡的早餐，超棒的選擇！", "背景音樂選得很棒，氣氛非常好。", "餐廳的戶外座位非常有情調。", "朋友聚會的絕佳選擇，大家都很滿意。",
    "整體性價比非常高，不虧是熱門餐廳。", "食材的處理非常細緻，每一口都是享受。",
    "謝謝工作人員的細心服務，非常感動。", "這裡的菜品讓人感覺非常有誠意。",
    "無論是前菜還是主菜，都表現得非常出色。", "這裡的海鮮非常新鮮，口感極佳。",
    "飲料的配方非常特別，值得嘗試。", "餐廳非常乾淨，讓人覺得很安心。",
    "特別喜歡這裡的燒烤類菜品，超棒！", "員工的服務意識非常強，讓人感覺被照顧。",
    "甜點的創新設計令人眼前一亮。", "孩子們也能找到他們喜歡的菜品，真是太好了。",
    "餐廳提供的免費停車服務非常方便。", "謝謝餐廳準備了特別的驚喜，太感動了！",
    "用餐體驗完美，推薦給所有朋友！", "非常感謝這裡的友善服務，讓人感覺賓至如歸。",
    "這裡的餐廳管理讓人感到非常專業。", "特別喜歡這裡的特色調味，讓人耳目一新。",
    "小吃和點心的口味非常好，值得一試。", "特別推薦這裡的冷菜部分，非常有創意。",
    "主廚的創新能力令人驚艷，每道菜都非常用心。", "這裡的燈光設計非常好，讓人感覺舒適。",
    "餐廳在服務細節上做得非常到位。", "用餐後感覺非常滿足，真是一個好地方。",
    "非常喜歡這裡的飲品搭配，讓人愉快。", "謝謝這裡的員工讓我們度過了一個愉快的晚上。",
    "每道菜品的配色和擺盤都非常講究。", "餐廳的用餐氛圍非常放鬆且舒適。"
    "食物新鮮且美味。", "服務態度非常熱情。", "餐廳環境整潔舒適。", "菜品的擺盤非常精美。",
    "服務速度很快，非常滿意。", "價格合理，物超所值。", "推薦給朋友來這裡用餐。", "下次一定會再來。",
    "菜品口味獨特，非常喜歡。", "工作人員的態度令人感到溫暖。", "點餐系統便捷高效。", "用餐體驗愉快。",
    "飲品種類豐富，味道也很好。", "餐廳裝潢很有特色。", "衛生條件令人滿意。", "特別喜歡這裡的招牌菜。",
    "每次來都有驚喜。", "甜點的味道令人驚豔。", "餐廳氣氛輕鬆愉快。", "服務員非常貼心。",
    "湯品非常可口，值得嘗試。", "適合家庭聚餐的好地方。", "朋友聚會的理想選擇。", "孩子們也非常喜歡。",
    "餐點的分量非常足夠。", "主廚的推薦菜非常值得一試。", "用餐區域的光線柔和適中。", "小吃和點心種類豐富。",
    "廚房透明化讓人安心。", "餐桌佈置精緻而不失親切感。", "每道菜都令人印象深刻。", "有許多健康飲食選擇。",
    "飲料的創意讓人驚喜。", "大廳的背景音樂很好聽。", "洗手間非常乾淨整潔。", "擁有無障礙設施，非常貼心。",
    "與家人一起用餐非常開心。", "非常適合情侶約會的地點。", "菜單設計得很吸引人。", "性價比非常高。",
    "提供了特別的素食選項。", "用餐過程非常流暢。", "感謝服務員的周到服務。", "主廚的創意令人敬佩。",
    "整體用餐體驗非常棒。", "孩子們享受得非常開心。", "裝修風格很有品味。", "非常推薦這裡的早餐套餐。",
    "特別喜歡這裡的戶外用餐區。", "飲品的搭配非常得宜。", "謝謝餐廳提供免費的停車位。", "下次會帶更多朋友來！",
    "每次來都覺得很溫馨。", "非常感謝這裡提供的特別服務。", "餐廳的地理位置非常方便。", "環境氛圍非常棒。",
    "這裡的用餐設計很適合拍照。", "推薦給所有喜歡美食的人！" ,"性價比很高，下次還會再來。", "餐點份量足夠，令人滿意。", "服務速度有點慢，但態度很好。",
    "餐廳位置方便，容易找到。", "價格稍高，但品質值得。", "甜點非常出色，強烈推薦！", "服務人員非常貼心。", "餐廳氣氛很好，適合聚會。", "點餐系統非常高效。",
    "適合家庭聚餐的好地方。", "菜品種類豐富，選擇很多。", "用餐過程很愉快。", "肉類料理火候剛好，口感很好。", "小朋友也非常喜歡這裡的食物。",
    "推薦的招牌菜真的很好吃。", "飲料選項很多，特別是果汁。", "餐具乾淨整齊，細節用心。",
    "上菜速度很快，服務一流。", "餐廳背景音樂很輕鬆愉快。", "下次還會帶朋友一起來！",
    "停車方便，整體體驗很棒。", "一些菜品味道稍淡，可以改進。", "燈光設計柔和，讓人放鬆。", "最喜歡他們的甜點拼盤。", "服務生的服務態度很好。",
    "食材搭配很用心，健康又美味。", "餐廳內部裝潢很有特色。", "餐點價格適中，很值得。", "店員介紹菜品很詳細且專業。",
    "喜歡這裡的開放式廚房設計。", "每道菜都讓人感到驚喜。", "餐廳的用餐環境很有質感。", "飲品的口味很獨特。", "餐廳的氛圍很放鬆。",
    "店內空調溫度剛剛好。", "餐廳特製的醬料味道一流。", "餐廳提供的茶水很特別。", "地方雖然小但很溫馨。", "餐廳的裝潢很有特色。",
    "服務生的推薦非常可靠。", "用餐期間覺得非常放鬆。", "假日人多，建議提前訂位。", "希望菜品份量可以再多一點。", "餐廳的環境很有情調。",
    "店家還提供免費小點心，驚喜！", "湯品很好喝，令人回味。", "餐廳的特調飲品值得一試。", "很適合情侶約會的地方。",
    "炸物非常酥脆，味道很好。", "蔬菜很新鮮，沒有多餘調味。", "餐廳的用餐環境很有質感。", "飲品的口味很獨特。", "餐廳的氛圍很放鬆。",
    "餐廳的位置稍偏，但值得前往。", "整體來說很滿意，值得推薦。", "服務生細心解答問題，很友善。", "餐點搭配得宜，十分精緻。",
    "餐具提供充分，讓人用餐方便。", "餐廳內的香味讓人食慾大開。", "飲品的溫度剛剛好，令人滿意。", "現場有趣的裝飾增添了氛圍。",
    "適合帶父母來的地方。", "孩子們特別喜歡這裡的氛圍。", "招待的小菜讓人驚豔。", "會再推薦朋友來這裡。"
]

# 讀取消費紀錄表
consumption_record_df = pd.read_excel(file_path, sheet_name="Consumption_Record")

# 隨機抽取約 5% 的消費紀錄
sample_size = int(len(consumption_record_df) * 0.05)  # 計算 5% 的數量
sampled_records = consumption_record_df.sample(n=sample_size, random_state=42).reset_index(drop=True)

# 建立 Feedback_Record 表，僅包含 Consumption_id 和 Member_Id
feedback_record_df = sampled_records[["Consumption_id", "Member_Id"]]

# 隨機為每筆紀錄分配一個評價句子
feedback_record_df["Content"] = feedback_record_df.apply(
    lambda _: random.choice(feedback_sentences), axis=1
)

# 將更新後的表存回 Excel
with pd.ExcelWriter(file_path, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
    feedback_record_df.to_excel(writer, sheet_name="Feedback_Record", index=False)

print("Feedback_Record 表已成功生成，包含 Consumption_id, Member_Id 和 Content 三欄。")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feedback_record_df["Content"] = feedback_record_df.apply(


Feedback_Record 表已成功生成，包含 Consumption_id, Member_Id 和 Content 三欄。


In [5]:
import pandas as pd
import random

# 讀取資料
file_path = "DB_final.xlsx"
consumption_record = pd.read_excel(file_path, sheet_name="Consumption_Record")
member = pd.read_excel(file_path, sheet_name="Member")

# 隨機選出一半的消費紀錄
sampled_records = consumption_record.sample(frac=0.5, random_state=42).reset_index(drop=True)

# 建立訂位紀錄
reservation_records = []
for index, row in sampled_records.iterrows():
    restaurant_id = row["Restaurant_id"]
    member_id = row["Member_Id"]
    datetime = row["DateTime"]

    # 安全獲取會員的 Phone 值
    phone_row = member.loc[member["Member_Id"] == member_id]
    if phone_row.empty:
        # 如果找不到對應的 Member_Id，記錄一個警告並跳過這筆資料
        print(f"Warning: Member_Id {member_id} not found in Member table. Skipping this record.")
        continue
    phone_number = phone_row["Phone"].values[0]

    # 隨機生成 Guest_cnt
    guest_cnt = random.randint(1, 4)

    reservation_records.append({
        "Reservation_Record_id": index + 1,  # 自動生成序號
        "Restaurant_id": restaurant_id,
        "Member_Id": member_id,
        "DateTime": datetime,
        "Guest_cnt": guest_cnt,
        "Phone_number": phone_number,
        "Notes": ""  # 預設為空白
    })

# 轉換為 DataFrame
reservation_df = pd.DataFrame(reservation_records)

# 儲存生成的訂位紀錄表
output_path = "Reservation_Record_Generated.xlsx"
reservation_df.to_excel(output_path, index=False)

print(f"Generated reservation records saved to {output_path}")


Generated reservation records saved to Reservation_Record_Generated.xlsx


In [2]:
import pandas as pd

# 原檔案路徑
file_path = "DB_final.xlsx"
# 新檔案路徑
output_path = "DB_final_cleaned.xlsx"

# 讀取 Excel 檔案中的 Consumption_Record 表
df = pd.read_excel(file_path, sheet_name="Consumption_Record")

# 刪除 Amount 欄位為空值的列
df_cleaned = df.dropna(subset=["Amount"])

# 將結果另存為新檔案
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
    df_cleaned.to_excel(writer, sheet_name="Consumption_Record", index=False)

print(f"已刪除 Amount 欄為空值的列，並將清理後的資料儲存為 {output_path}")


已刪除 Amount 欄為空值的列，並將清理後的資料儲存為 DB_final_cleaned.xlsx


In [9]:
import pandas as pd

# 載入資料
file_path = "DB_final.xlsx"
excel_data = pd.ExcelFile(file_path)

# 讀取所有相關的表
member_df = excel_data.parse("Member")
consumption_record_df = excel_data.parse("Consumption_Record")
reservation_record_df = excel_data.parse("Reservation_Record")
feedback_record_df = excel_data.parse("Feedback_Record")
points_df = excel_data.parse("Points")

# 找出重複的 Phone
duplicate_phones = member_df[member_df.duplicated(subset="Phone", keep=False)]

# 取得重複的 Member_Id
duplicate_member_ids = duplicate_phones["Member_ID"].unique()

# 過濾掉相關表中的資料
member_df_cleaned = member_df[~member_df["Member_ID"].isin(duplicate_member_ids)]
consumption_record_df_cleaned = consumption_record_df[~consumption_record_df["Member_ID"].isin(duplicate_member_ids)]
reservation_record_df_cleaned = reservation_record_df[~reservation_record_df["Member_ID"].isin(duplicate_member_ids)]
feedback_record_df_cleaned = feedback_record_df[~feedback_record_df["Member_ID"].isin(duplicate_member_ids)]
points_df_cleaned = points_df[~points_df["Member_ID"].isin(duplicate_member_ids)]

# 將清理後的資料保存到新的 Excel 文件
with pd.ExcelWriter("DB_final_cleaned.xlsx", engine="openpyxl") as writer:
    member_df_cleaned.to_excel(writer, sheet_name="Member", index=False)
    consumption_record_df_cleaned.to_excel(writer, sheet_name="Consumption_Record", index=False)
    reservation_record_df_cleaned.to_excel(writer, sheet_name="Reservation_Record", index=False)
    feedback_record_df_cleaned.to_excel(writer, sheet_name="Feedback_Record", index=False)
    points_df_cleaned.to_excel(writer, sheet_name="Points", index=False)

print("重複項及相關資料已成功刪除並保存至 DB_final_cleaned.xlsx")


重複項及相關資料已成功刪除並保存至 DB_final_cleaned.xlsx


In [17]:
# 使用 point 表

import pandas as pd
import random
from datetime import datetime

# 讀取資料
file_path = "DB_final.xlsx"
consumption_record = pd.read_excel(file_path, sheet_name="Consumption_Record")

# 隨機抽取一些消費紀錄作為點數使用紀錄
sample_size = int(len(consumption_record) * 0.2)  # 抽取約 20% 的消費紀錄
sampled_records = consumption_record.sample(n=sample_size, random_state=42).reset_index(drop=True)

# 生成點數使用紀錄
points_usage_records = []

for _, row in sampled_records.iterrows():
    member_id = row["Member_ID"]
    consumption_id = row["Consumption_Record_ID"]
    created_at = row["DateTime"]

    # 隨機生成 Points_Change (-5 的倍數，範圍 5~100)
    points_change = -random.choice([x for x in range(5, 101, 5)])

    # 設定 Points_Remark
    points_remark = "點數兌換"

    points_usage_records.append({
        "Member_ID": member_id,
        "Consumption_Record_ID": consumption_id,
        "Points_Change": points_change,
        "Points_Remark": points_remark,
        "Created_At": created_at
    })

# 轉換為 DataFrame
points_usage_df = pd.DataFrame(points_usage_records)

# 儲存生成的點數使用紀錄表
output_path = "Points_Usage_Generated.xlsx"
points_usage_df.to_excel(output_path, index=False)

print("點數使用紀錄表生成完成並已儲存至:", output_path)


# 生成 Points 表
points_records = []

for _, row in consumption_record.iterrows():
    member_id = row["Member_ID"]
    consumption_id = row["Consumption_Record_ID"]
    amount = row["Amount"]
    created_at = row["DateTime"]

    # 檢查 Amount 是否為空
    if pd.isna(amount):
        continue

    else:
        # 計算 Points_Change (假設為金額的 10%)
        points_change = int(amount / 100)
        points_remark = "消費累點"

    points_records.append({
        "Member_ID": member_id,
        "Consumption_Record_ID": consumption_id,
        "Points_Change": points_change,
        "Points_Remark": points_remark,
        "Created_At": created_at
    })

# 轉換為 DataFrame
points_df = pd.DataFrame(points_records)

# 儲存生成的 Points 表
output_path = "Points_Generated.xlsx"
points_df.to_excel(output_path, index=False)

print("Points 表生成完成並已儲存至:", output_path)


# 合成檔案

import pandas as pd

# 讀取兩個檔案
file1_path = "Points_Generated.xlsx"  # 第一個檔案的路徑
file2_path = "Points_Usage_Generated.xlsx"

# 讀取檔案，假設兩個檔案都有相同的欄位名稱
df1 = pd.read_excel(file1_path)
df2 = pd.read_excel(file2_path)

# 合併兩個資料框
combined_df = pd.concat([df1, df2], ignore_index=True)

# 將合併後的資料儲存為新的檔案
output_path = "combined_Point_file.xlsx"
combined_df.to_excel(output_path, index=False)

點數使用紀錄表生成完成並已儲存至: Points_Usage_Generated.xlsx
Points 表生成完成並已儲存至: Points_Generated.xlsx


In [None]:
import pandas as pd
import random

# 設定檔案路徑
file_path = "DB_final.xlsx"

# Step 1: 抽取消費紀錄的 30% 並分配優惠券
def generate_coupon_record_with_coupons(consumption_record_df, restaurant_df, coupons_df, sample_fraction=0.3, random_state=42):
    # 隨機抽取指定比例的消費紀錄
    sample_size = int(len(consumption_record_df) * sample_fraction)  # 計算樣本數量
    sampled_records = consumption_record_df.sample(n=sample_size, random_state=random_state)
    
    # 將 Consumption_Record 與 Restaurant 表連結以取得 Brand_id
    consumption_with_brand = pd.merge(
        sampled_records,
        restaurant_df[['Restaurant_id', 'Brand_id']],
        how='left',
        on='Restaurant_id'
    )
    
    # 為每筆記錄分配優惠券
    def assign_coupon(row):
        brand_id = row['Brand_id']
        # 篩選出該品牌的優惠券
        available_coupons = coupons_df[coupons_df['Brand_id'] == brand_id]
        if not available_coupons.empty:
            # 隨機選擇一個優惠券 ID
            return random.choice(available_coupons['Coupon_Id'].tolist())
        return None

    # 為每筆記錄分配優惠券
    consumption_with_brand['Coupon_Id'] = consumption_with_brand.apply(assign_coupon, axis=1)
    
    # 選擇需要的欄位並移除缺失值
    coupon_record_with_coupons = consumption_with_brand[['Consumption_id', 'Member_Id', 'Coupon_Id']].dropna()
    return coupon_record_with_coupons

# 主程式邏輯
def main():
    # 讀取必要的表
    consumption_record_df = pd.read_excel(file_path, sheet_name="Consumption_Record")
    restaurant_df = pd.read_excel(file_path, sheet_name="Restaurant")
    coupons_df = pd.read_excel(file_path, sheet_name="Coupons")
    
    # 生成包含優惠券分配的表
    coupon_record_with_coupons_df = generate_coupon_record_with_coupons(consumption_record_df, restaurant_df, coupons_df)
    
    # 儲存結果到 Excel
    output_path = "Coupon_Record_With_Coupons.xlsx"
    coupon_record_with_coupons_df.to_excel(output_path, index=False)
    print(f"結果已儲存到 {output_path}")

# 執行主程式
if __name__ == "__main__":
    main()


In [18]:
import pandas as pd

# 讀取 Excel 檔案
excel_file = "DB_final.xlsx"
excel_data = pd.ExcelFile(excel_file)

# 將每個 Sheet 存為單獨的 CSV
for sheet_name in excel_data.sheet_names:
    df = excel_data.parse(sheet_name)
    output_file = f"{sheet_name}.csv"
    df.to_csv(output_file, index=False)
    print(f"Saved {sheet_name} as {output_file}")


Saved Restaurant as Restaurant.csv
Saved Brand as Brand.csv
Saved Menu as Menu.csv
Saved Member as Member.csv
Saved Points as Points.csv
Saved Coupon_Record as Coupon_Record.csv
Saved Coupons as Coupons.csv
Saved Consumption_Record as Consumption_Record.csv
Saved Compose as Compose.csv
Saved Reservation_Record as Reservation_Record.csv
Saved Feedback_Record as Feedback_Record.csv


In [28]:
import pandas as pd

# 讀取 Excel 檔案
excel_file = "DB_final.xlsx"
sheet_name = "Points"  # 指定只處理 Compose 表

# 讀取指定的 Sheet
df = pd.read_excel(excel_file, sheet_name=sheet_name)

# 儲存為 CSV
output_file = f"{sheet_name}.csv"
df.to_csv(output_file, index=False)

print(f"Saved {sheet_name} as {output_file}")


Saved Points as Points.csv


In [22]:
import pandas as pd

# 讀取資料
compose_path = "/Users/lck/workspace/DB/DB_finalproject/Compose.csv"
consumption_record_path = "/Users/lck/workspace/DB/DB_finalproject/Consumption_Record.csv"

compose_df = pd.read_csv(compose_path)
consumption_record_df = pd.read_csv(consumption_record_path)

# 過濾 Compose 中無效的 Consumption_Record_ID
valid_ids = set(consumption_record_df["Consumption_Record_ID"])
filtered_compose_df = compose_df[compose_df["Consumption_Record_ID"].isin(valid_ids)]

# 保存修正後的 Compose.csv
filtered_compose_df.to_csv(compose_path, index=False)
print(f"修正後的 Compose.csv 已儲存至 {compose_path}")



修正後的 Compose.csv 已儲存至 /Users/lck/workspace/DB/DB_finalproject/Compose.csv


In [26]:
import pandas as pd

# 讀取相關資料
consumption_record_df = pd.read_excel("DB_final.xlsx", sheet_name="Consumption_Record")
coupon_record_df = pd.read_csv("Coupon_Record.csv")

# 檢查哪些 Consumption_Record_ID 存在於 Consumption_Record 表中
valid_ids = set(consumption_record_df["Consumption_Record_ID"])
coupon_record_df = coupon_record_df[coupon_record_df["Consumption_Record_ID"].isin(valid_ids)]

# 儲存清理後的數據
coupon_record_df.to_csv("Coupon_Record_Cleaned.csv", index=False)
print("無效的 Consumption_Record_ID 已移除。")



無效的 Consumption_Record_ID 已移除。
