# Step 1：安裝套件

In [1]:
!pip install duckduckgo_search
!pip install opencv-python
!pip install numpy
!pip install face_recognition
import os


Collecting duckduckgo_search
  Downloading duckduckgo_search-8.1.1-py3-none-any.whl.metadata (16 kB)
Collecting primp>=0.15.0 (from duckduckgo_search)
  Downloading primp-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading duckduckgo_search-8.1.1-py3-none-any.whl (18 kB)
Downloading primp-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: primp, duckduckgo_search
Successfully installed duckduckgo_search-8.1.1 primp-0.15.0
Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl.metadata (21 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25

# Step 2：設定 IVE 成員列表 + 自動建立資料夾

In [2]:
members = [
    "Yujin",
    "Wonyoung",
    "Rei",
    "Liz",
    "Leeseo"
]

base_dir = "/content/ive_members"

os.makedirs(base_dir, exist_ok=True)

for m in members:
    os.makedirs(f"{base_dir}/{m}", exist_ok=True)

print("資料夾建立完成")


資料夾建立完成


# Step 3：爬蟲自動抓取圖片

In [3]:
import os
import requests
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
import time

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
}

def google_image_scrape(query, folder, limit=20):
    print(f"正在抓取：{query}")

    # Google 圖片搜尋 URL
    url = f"https://www.google.com/search?q={query}&tbm=isch"

    html = requests.get(url, headers=headers).text
    soup = BeautifulSoup(html, "html.parser")

    imgs = soup.find_all("img")

    count = 0
    for img in imgs:
        if count >= limit:
            break

        src = img.get("src")
        if not src or not src.startswith("http"):
            continue

        try:
            img_data = requests.get(src, timeout=10, headers=headers).content
            image = Image.open(BytesIO(img_data)).convert("RGB")

            save_path = f"{folder}/{query}_{count}.jpg"
            image.save(save_path)
            count += 1

            time.sleep(0.2)  # 避免被 Google 阻擋

        except Exception:
            continue

    print(f"完成：{query} 共下載 {count} 張\n")


# 逐一抓取 IVE 成員
for m in members:
    google_image_scrape(
        query=f"IVE {m} face close-up kpop",
        folder=f"{base_dir}/{m}",
        limit=20
    )


正在抓取：IVE Yujin face close-up kpop
完成：IVE Yujin face close-up kpop 共下載 20 張

正在抓取：IVE Wonyoung face close-up kpop
完成：IVE Wonyoung face close-up kpop 共下載 20 張

正在抓取：IVE Rei face close-up kpop
完成：IVE Rei face close-up kpop 共下載 20 張

正在抓取：IVE Liz face close-up kpop
完成：IVE Liz face close-up kpop 共下載 20 張

正在抓取：IVE Leeseo face close-up kpop
完成：IVE Leeseo face close-up kpop 共下載 20 張



# Step 4：生成人臉 Encoding（與 AI-Demo 模組三相同邏輯）

In [9]:
!pip install insightface onnxruntime-gpu

import os
import cv2
import numpy as np
from insightface.app import FaceAnalysis

# 初始化人臉分析器（使用 GPU）
app = FaceAnalysis(name="buffalo_l")
app.prepare(ctx_id=0, det_size=(640, 640))

def get_face_embedding(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return None

    faces = app.get(img)
    if len(faces) == 0:
        return None

    # 取第一張臉
    return faces[0].embedding

# 建立 embedding 資料庫
face_db = {}
for member in members:
    folder = f"{base_dir}/{member}"
    embeddings = []
    for img_name in os.listdir(folder):
        img_path = os.path.join(folder, img_name)
        emb = get_face_embedding(img_path)
        if emb is not None:
            embeddings.append(emb)
    face_db[member] = embeddings
    print(f"{member}: {len(embeddings)} embeddings")


Collecting insightface
  Downloading insightface-0.7.3.tar.gz (439 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/439.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.5/439.5 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.4 kB)
Collecting onnx (from insightface)
  Downloading onnx-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.0 kB)
Collecting coloredlogs (from onnxruntime-gpu)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-gpu)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl

100%|██████████| 281857/281857 [00:04<00:00, 58649.22KB/s]


*************** EP Error ***************
EP Error /onnxruntime_src/onnxruntime/core/providers/cuda/cuda_call.cc:129 std::conditional_t<THRW, void, onnxruntime::common::Status> onnxruntime::CudaCall(ERRTYPE, const char*, const char*, SUCCTYPE, const char*, const char*, int) [with ERRTYPE = cudaError; bool THRW = true; SUCCTYPE = cudaError; std::conditional_t<THRW, void, common::Status> = void] /onnxruntime_src/onnxruntime/core/providers/cuda/cuda_call.cc:121 std::conditional_t<THRW, void, onnxruntime::common::Status> onnxruntime::CudaCall(ERRTYPE, const char*, const char*, SUCCTYPE, const char*, const char*, int) [with ERRTYPE = cudaError; bool THRW = true; SUCCTYPE = cudaError; std::conditional_t<THRW, void, common::Status> = void] CUDA failure 35: CUDA driver version is insufficient for CUDA runtime version ; GPU=-1 ; hostname=37876ea86061 ; file=/onnxruntime_src/onnxruntime/core/providers/cuda/cuda_execution_provider.cc ; line=282 ; expr=cudaSetDevice(info_.device_id); 

 when using 

# Step 5：AI PK（輸入圖片 → 判斷是哪位 IVE 成員）

In [11]:
from numpy.linalg import norm
import numpy as np

def predict_member(img_path):
    query_emb = get_face_embedding(img_path)
    if query_emb is None:
        return "無法偵測到臉"

    scores = {}
    for member, embs in face_db.items():
        if len(embs) == 0:
            continue
        sims = [np.dot(query_emb, e) / (norm(query_emb) * norm(e)) for e in embs]
        scores[member] = np.mean(sims)

    # 回傳相似度最高的成員
    return max(scores, key=scores.get)


In [13]:
from google.colab import files
import cv2

# 上傳圖片
uploaded = files.upload()  # 會跳出選擇檔案視窗

for filename in uploaded.keys():
    print(f"正在辨識：{filename}")
    result = predict_member(filename)
    print("預測結果：", result)

    # 顯示圖片
    img = cv2.imread(filename)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    from matplotlib import pyplot as plt
    plt.imshow(img)
    plt.axis('off')
    plt.show()


Saving sensors-24-00637.pdf to sensors-24-00637.pdf
正在辨識：sensors-24-00637.pdf
預測結果： 無法偵測到臉


error: OpenCV(4.12.0) /io/opencv/modules/imgproc/src/color.cpp:199: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'
