In [93]:
import mysql.connector
from DrissionPage import ChromiumPage
from mysql.connector import Error

# 連接到 MySQL 資料庫
def create_connection():
    try:
        connection = mysql.connector.connect(
            host='127.0.0.1',
            database='kol_1017',
            user='root',
            password='123456789',
            port=3306
        )
        if connection.is_connected():
            print("Connected to MySQL database.")
            return connection
    except Error as e:
        print(f"Error while connecting to MySQL: {e}")
    return None

# 從資料庫中獲取 URL 列表
def fetch_urls(connection):
    cursor = connection.cursor()
    cursor.execute("SELECT kol_link FROM kol_radar_link")
    urls = cursor.fetchall()
    cursor.close()
    return [url[0] for url in urls]

# 爬取資料
import time
import re

def scrape_data_with_retry(url, retries=2, delay=5):
    for attempt in range(retries + 1):
        try:
            # 嘗試爬取
            print(f"嘗試第 {attempt + 1} 次爬取...")
            return scrape_data(url)
        except Exception as e:
            print(f"錯誤: {e}")
            if attempt < retries:
                print(f"等待 {delay} 秒後重新嘗試...")
                time.sleep(delay)
            else:
                print("已達到最大嘗試次數，跳過此URL。")
                return {"error": f"Failed to scrape {url} after {retries + 1} attempts."}

def scrape_data(url):
    page = ChromiumPage()
    page.get(url)
    page.wait(10)

    try:
        # 取得KOL基本數據
        kol_name = page.ele('.ant-flex css-var-r4 ant-flex-align-center').html
        kol_name = re.search(r'<span[^>]*>([^<|]+)', kol_name).group(1).strip()
    except Exception:
        kol_name = "未能取得名稱"

    try:
        # 網紅類型
        kol_type = page.ele('.basic-info__CustomizedNamesBlock-sc-e3553b7c-4 hdhFjC').text.replace('網紅類型', '').replace('\n', '').replace(' · ', '、')
    except Exception:
        kol_type = "未能取得類型"

    # try:
    #     # IG數據總攬
    #     kol_data_titles = page.eles('.kol-detail-statistic__Title-sc-97698071-4 hBbmvW')    
    #     kol_data_values = page.eles('.kol-detail-statistic__Value-sc-97698071-8 gHYCoJ')
    #     kol_sex_ratio = page.eles('.audience-analysis__Percentage-sc-8366c75d-2 fHbKYa')[0].text
    #     kol_age_ratio = page.eles('.audience-analysis__Percentage-sc-8366c75d-2 fHbKYa')[1].text
    kol_ig_link = page.ele('.kol-platform-link__SocialLinkText-sc-549f2da9-1 cspYLa').text
    kol_ig_id = kol_ig_link.replace('https://www.instagram.com/', '').replace('/', '')
    if "?" in kol_ig_id:
        kol_ig_id = kol_ig_id.split("?")[0]
        
    # except Exception:
    #     kol_data_titles, kol_data_values, kol_sex_ratio, kol_age_ratio = [], [], "未能取得性別比率", "未能取得年齡比率"
    #     kol_ig_link, kol_ig_id = "未能取得IG連結", "未能取得帳號名稱"

    # 建構KOL數據字典
    new_kol_data_dict = {}
    new_kol_data_dict['kol_link'] = url
    new_kol_data_dict['kol_name'] = kol_name
    new_kol_data_dict['kol_ig_id'] = kol_ig_id
    new_kol_data_dict['kol_ig_link'] = kol_ig_link
    new_kol_data_dict['type'] = kol_type

    # # 新增KOL其他數據
    # for i in range(len(kol_data_titles)):
    #     try:
    #         new_kol_data_dict[kol_data_titles[i].text] = kol_data_values[i].text
    #         if kol_data_titles[i].text == '女性':
    #             kol_data_titles[i].text = 0
    #         elif kol_data_titles[i].text == '男性':
    #             kol_data_titles[i].text = 1
            
    #     except Exception:
    #         continue

    print(new_kol_data_dict)
    return new_kol_data_dict

    

In [None]:

# 主函數
def main():
    connection = create_connection()
    if connection:
        try:
            urls = fetch_urls(connection)
            for url in urls[1:3]:
                print(url)
                data = scrape_data(url)                    
        finally:
            connection.close()
            print("Database connection closed.")
    
# 執行主函數
main()

Connected to MySQL database.
https://app.kolradar.com//kol/2b5601d6-7464-44c1-9ec0-69021e59d0c2?from=socialAccountRanking
{'kol_link': 'https://app.kolradar.com//kol/2b5601d6-7464-44c1-9ec0-69021e59d0c2?from=socialAccountRanking', '名稱': '⚡️派瑞 Perry Green', '帳號名稱': 'perrygreeeen', '社群連結': 'https://www.instagram.com/perrygreeeen?igsh=N3VpbzE5MHB2bzFi', '類型': '台灣旅遊、藝術與娛樂', '性別': '男', '主要語言': '繁體中文', '追蹤數': '76,835', '互動率': '20.7%', '觀看率': '667%', '漲粉率': '35.7%', '受眾主要性別': '女性', '主要性別占比%': '76%', '受眾主要年齡': '25-34 歲', '主要年齡層占比%': '76%', '商業合作內容覆蓋比例': '30%', '漲粉數': '28,830', '成長趨勢': '高潛力', '貼文預估觸及數': '3.8萬-4.5萬', '影音預估觸及數': '348.9萬-1.1億', '受眾主要國家/地區': 'Taiwan', '圖文合作費用預估': '$500 - $1,000', '影音合作費用預估': '$1,200 - $12,000', '圖文口碑價值': '$139,599', '影音口碑價值': '$139,599', '按讚評論比': '5 : 100', '平均互動數': '15,868', '平均按讚數': '15,767', '平均留言數': '101', 'Reels 互動率': '18%', 'Reels 平均互動數': '1,235', 'Reels 平均按讚數': '1,213', 'Reels 平均留言數': '22', 'Reels 觀看率': '667%', 'Reels 平均觀看數': '512,450', '期間內容數': '30', '發文頻率'

## 爬取網紅於KOLR上的資訊


In [100]:
import mysql.connector
from DrissionPage import ChromiumPage
from mysql.connector import Error
import time
import re

# 建立資料庫連線
def create_connection():
    try:
        connection = mysql.connector.connect(
            host='127.0.0.1',
            database='kol_1017',
            user='root',
            password='123456789',
            port=3306
        )
        if connection.is_connected():
            print("Connected to MySQL database.")
            return connection
    except Error as e:
        print(f"Error while connecting to MySQL: {e}")
    return None

# 從資料庫中獲取 URL 列表
def fetch_urls(connection):
    try:
        cursor = connection.cursor()
        cursor.execute("SELECT kol_link FROM kol_radar_link")
        urls = cursor.fetchall()
        cursor.close()
        return [url[0] for url in urls]
    except Error as e:
        print(f"Error while fetching URLs: {e}")
        return []

# 插入數據到資料庫
def insert_kol_data(connection, data):
    try:
        cursor = connection.cursor()
        insert_query = """
            INSERT INTO kol_radar_info (kol_link, kol_name, kol_ig_id, kol_ig_link, type)
            VALUES (%s, %s, %s, %s, %s)
        """
        cursor.execute(insert_query, (
            data['kol_link'],
            data['kol_name'],
            data['kol_ig_id'],
            data['kol_ig_link'],
            data['type']
        ))
        connection.commit()
        print(f"成功插入資料: {data['kol_name']}")
    except Error as e:
        print(f"Error while inserting data: {e}")

# 爬取資料（支援重試機制）
def scrape_data_with_retry(url, retries=2, delay=5):
    for attempt in range(retries + 1):
        try:
            print(f"嘗試第 {attempt + 1} 次爬取...")
            return scrape_data(url)
        except Exception as e:
            print(f"錯誤: {e}")
            if attempt < retries:
                print(f"等待 {delay} 秒後重新嘗試...")
                time.sleep(delay)
            else:
                print("已達到最大嘗試次數，跳過此URL。")
                return None

# 爬取資料
def scrape_data(url):
    page = ChromiumPage()
    page.get(url)
    page.wait(10)

    try:
        # 取得KOL基本數據
        kol_name = page.ele('.ant-flex css-var-r4 ant-flex-align-center').html
        kol_name = re.search(r'<span[^>]*>([^<|]+)', kol_name).group(1).strip()
    except Exception:
        kol_name = "未能取得名稱"

    try:
        # 網紅類型
        kol_type = page.ele('.basic-info__CustomizedNamesBlock-sc-e3553b7c-4 hdhFjC').text.replace('網紅類型', '').replace('\n', '').replace(' · ', '、')
    except Exception:
        kol_type = "未能取得類型"

    try:
        # Instagram 資料
        kol_ig_link = page.ele('.kol-platform-link__SocialLinkText-sc-549f2da9-1 cspYLa').text
        kol_ig_id = kol_ig_link.replace('https://www.instagram.com/', '').replace('/', '')
        if "?" in kol_ig_id:
            kol_ig_id = kol_ig_id.split("?")[0]
    except Exception:
        kol_ig_link, kol_ig_id = "未能取得鏈接", "未能取得帳號"

    # 建構KOL數據字典
    new_kol_data_dict = {
        'kol_link': url,
        'kol_name': kol_name,
        'kol_ig_id': kol_ig_id,
        'kol_ig_link': kol_ig_link,
        'type': kol_type
    }
    print(new_kol_data_dict)
    return new_kol_data_dict

# 主函數
def main():
    connection = create_connection()
    if connection:
        try:
            urls = fetch_urls(connection)
            for url in urls[11:]:
                print(f"處理 URL: {url}")
                data = scrape_data_with_retry(url)
                if data:  # 確保數據有效
                    insert_kol_data(connection, data)
        finally:
            connection.close()
            print("Database connection closed.")

In [101]:
# 執行主函數
if __name__ == "__main__":
    main()


Connected to MySQL database.
處理 URL: https://app.kolradar.com//kol/0e63845e-9a3c-464b-8930-a175d06a7752?from=socialAccountRanking
嘗試第 1 次爬取...
{'kol_link': 'https://app.kolradar.com//kol/0e63845e-9a3c-464b-8930-a175d06a7752?from=socialAccountRanking', 'kol_name': '吳玳慈 阿布', 'kol_ig_id': 'dianna__914', 'kol_ig_link': 'https://www.instagram.com/dianna__914/', 'type': '台灣旅遊、職場、奢侈品'}
成功插入資料: 吳玳慈 阿布
處理 URL: https://app.kolradar.com//kol/6f35c360-bb66-4279-9bd0-e468e8e61d92?from=socialAccountRanking
嘗試第 1 次爬取...
{'kol_link': 'https://app.kolradar.com//kol/6f35c360-bb66-4279-9bd0-e468e8e61d92?from=socialAccountRanking', 'kol_name': '赤鬼伯伯akaoni', 'kol_ig_id': 'akaonikou', 'kol_ig_link': 'https://www.instagram.com/akaonikou/', 'type': '電腦遊戲、日常話題、遊戲'}
成功插入資料: 赤鬼伯伯akaoni
處理 URL: https://app.kolradar.com//kol/e32c4478-83da-446d-9b50-00d7b308f529?from=socialAccountRanking
嘗試第 1 次爬取...
{'kol_link': 'https://app.kolradar.com//kol/e32c4478-83da-446d-9b50-00d7b308f529?from=socialAccountRanking', 'kol_na