In [5]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import time

# -------------------------------------------------------
# 1. スクレイピング対象の GitHub リポジトリ URL
#    ※ Google organization の中から選ぶ
# -------------------------------------------------------
TARGET_URL = "https://github.com/google/googletest"

# -------------------------------------------------------
# 2. HTML を取得
# -------------------------------------------------------
def fetch_html(url):
    print(f"Fetching: {url}")
    time.sleep(1)  # 課題ルール：必ず 1 秒待つ
    response = requests.get(url)
    response.raise_for_status()
    return response.text

# -------------------------------------------------------
# 3. リポジトリ情報の抽出
# -------------------------------------------------------
def parse_repo_info(html):
    soup = BeautifulSoup(html, "html.parser")

    # リポジトリ名
    repo_name = soup.find("strong", {"itemprop": "name"}).get_text(strip=True)

    # 使用言語
    language_tag = soup.find("span", {"itemprop": "programmingLanguage"})
    language = language_tag.get_text(strip=True) if language_tag else "Unknown"

    # スター数
    star_tag = soup.select_one("a[href$='/stargazers']")
    raw = star_tag.get_text(strip=True).lower() if star_tag else "0"

    # "37.5k" → 37500
    raw = raw.replace("stars", "").strip()

    if raw.endswith("k"):
        stars = float(raw[:-1]) * 1000
    else:
        stars = float(raw)

    return repo_name, language, int(stars)


# -------------------------------------------------------
# 4. DB 作成
# -------------------------------------------------------
def init_db():
    conn = sqlite3.connect("repos.db")
    cur = conn.cursor()

    cur.execute("""
        CREATE TABLE IF NOT EXISTS repos (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            name TEXT,
            language TEXT,
            stars INTEGER
        )
    """)

    conn.commit()
    return conn, cur

# -------------------------------------------------------
# 5. データ保存
# -------------------------------------------------------
def save_to_db(cur, conn, name, language, stars):
    cur.execute(
        "INSERT INTO repos (name, language, stars) VALUES (?, ?, ?)",
        (name, language, stars)
    )
    conn.commit()

# -------------------------------------------------------
# 6. SELECT して表示
# -------------------------------------------------------
def show_all_repos(cur):
    cur.execute("SELECT * FROM repos")
    rows = cur.fetchall()

    print("\n=== Saved Repositories ===")
    for row in rows:
        print(row)

# -------------------------------------------------------
# main
# -------------------------------------------------------
def main():
    # 1. HTML 取得
    html = fetch_html(TARGET_URL)

    # 2. パースして必要情報を抽出
    name, language, stars = parse_repo_info(html)
    print(f"\nScraped Repository:")
    print(f"Name: {name}")
    print(f"Language: {language}")
    print(f"Stars: {stars}")

    # 3. DB 初期化
    conn, cur = init_db()

    # 4. DB 保存
    save_to_db(cur, conn, name, language, stars)

    # 5. SELECT 表示
    show_all_repos(cur)

    conn.close()


if __name__ == "__main__":
    main()


Fetching: https://github.com/google/googletest

Scraped Repository:
Name: googletest
Language: Unknown
Stars: 37500

=== Saved Repositories ===
(1, 'googletest', 'Unknown', 37500)
