In [87]:
import requests
from bs4 import BeautifulSoup
import time

url = "https://github.com/google?type=all&sort=updated"

try:
    response = requests.get(url)
    response.raise_for_status()
except requests.exceptions.HTTPError as e:
    print("HTTPエラーが発生しました:", e)
    response = None
except requests.exceptions.RequestException as e:
    print("リクエストエラーが発生しました:", e)
    response = None
else:
    print("Webページに正常にアクセスできました。")
finally:
    time.sleep(1)

repos = []

if response is not None:
    soup = BeautifulSoup(response.text, "html.parser")

    for li in soup.select("div.org-repos li"):

        name_tag = li.select_one('a[data-hovercard-type="repository"]')
        if not name_tag:
            name_tag = li.select_one("h3 a")
        if not name_tag:
            name_tag = li.select_one('a[href^="/google/"]')

        if name_tag:
            raw_name = name_tag.get_text(strip=True)
            parts = [p for p in raw_name.split("/") if p]
            name = parts[-1]
        else:
            name = "Unknown"

        lang_tag = li.select_one('span[itemprop="programmingLanguage"]')
        language = lang_tag.get_text(strip=True) if lang_tag else "Unknown"

        stars_tag = li.select_one('a[href$="/stargazers"]')
        stars = stars_tag.get_text(strip=True) if stars_tag else "0"

        repos.append((name, language, stars))

print("スクレイピング件数:", len(repos))
print(repos[:5])


Webページに正常にアクセスできました。
スクレイピング件数: 10
[('skia-buildbot', 'Go', '158'), ('or-tools', 'C++', '12,715'), ('angle', 'C++', '3,843'), ('tunix', 'Python', '1,916'), ('nomulus', 'Java', '1,768')]


In [88]:
import sqlite3

conn = sqlite3.connect('google_repos.db')
cur = conn.cursor()

for name, language, stars in repos:
    stars_int = int(stars.replace(",", ""))
    sql = "INSERT INTO repos (name, language, stars) VALUES (?, ?, ?);"
    cur.execute(sql, (name, language, stars_int))

conn.commit()
conn.close()

print("DB保存完了！")


DB保存完了！


In [89]:
import sqlite3

conn = sqlite3.connect('google_repos.db')
cur = conn.cursor()

cur.execute("SELECT name, language, stars FROM repos;")

print("---- DB内のデータ ----")
for name, language, stars in cur:
    print(f"{name}\n{language}\n{stars}\n-----")

conn.close()


---- DB内のデータ ----
tcmalloc
C++
5001
-----
perfetto
C++
5019
-----
dawn
C++
781
-----
tunix
Python
1916
-----
meridian
Python
1186
-----
device-infra
Java
58
-----
filonov
Python
11
-----
angle
C++
3843
-----
site-kit-wp
JavaScript
1338
-----
nomulus
Java
1768
-----
perfetto
C++
5019
-----
koladata
C++
27
-----
desugar_jdk_libs
Java
389
-----
conscrypt
Java
1358
-----
tcmalloc
C++
5001
-----
dawn
C++
781
-----
tunix
Python
1916
-----
meridian
Python
1186
-----
device-infra
Java
58
-----
filonov
Python
11
-----
nomulus
Java
11
-----
device-infra
Java
11
-----
osv-scalibr
Go
11
-----
dwh-migration-tools
Java
11
-----
perfetto
C++
11
-----
koladata
C++
11
-----
desugar_jdk_libs
Java
11
-----
conscrypt
Java
11
-----
tcmalloc
C++
11
-----
dawn
C++
11
-----
or-tools
C++
11
-----
angle
C++
11
-----
tunix
Python
11
-----
nomulus
Java
11
-----
device-infra
Java
11
-----
osv-scalibr
Go
11
-----
dwh-migration-tools
Java
11
-----
perfetto
C++
11
-----
koladata
C++
11
-----
desugar_jdk_libs
Java
11


In [90]:
import sqlite3

conn = sqlite3.connect('google_repos.db')
cur = conn.cursor()

cur.execute("DROP TABLE IF EXISTS car;")
cur.execute("DROP TABLE IF EXISTS google_repos;")

conn.commit()
conn.close()

print("不要なテーブルを削除しました。")


不要なテーブルを削除しました。
