In [17]:
from github import Github
import os, re, json

# ─── CONFIG ─────────────────────────────────────────────────────────────────────
token = os.getenv("GITHUB_TOKEN", "")
repo_name = "meta-llama/llama"

# ─── AUTH & REPO ────────────────────────────────────────────────────────────────
g = Github(token)
repo = g.get_repo(repo_name)

# ─── BASIC METADATA ─────────────────────────────────────────────────────────────
info = {
    "id": repo.id,
    "name": repo.name,
    "full_name": repo.full_name,
    "description": repo.description,
    "html_url": repo.html_url,
    "clone_url": repo.clone_url,
    "ssh_url": repo.ssh_url,
    "homepage": repo.homepage,
    "topics": repo.get_topics(),
    "default_branch": repo.default_branch,
    "created_at": repo.created_at.isoformat(),
    "updated_at": repo.updated_at.isoformat(),
    "pushed_at": repo.pushed_at.isoformat(),
    "size_kb": repo.size,
    "watchers_count": repo.watchers_count,
    "stargazers_count": repo.stargazers_count,
    "forks_count": repo.forks_count,
    "open_issues_count": repo.open_issues_count,
}

# ─── LICENSE ────────────────────────────────────────────────────────────────────
try:
    lic = repo.get_license()
    info["license"] = {
        "key": lic.license.key,
        "name": lic.license.name,
        "spdx_id": lic.license.spdx_id,
        "url": lic.html_url
    }
except:
    info["license"] = None

# ─── LANGUAGE BREAKDOWN ─────────────────────────────────────────────────────────
info["languages"] = repo.get_languages()

# ─── CONTRIBUTORS ───────────────────────────────────────────────────────────────
info["top_contributors"] = [
    {"login": c.login, "contributions": c.contributions}
    for c in repo.get_contributors()[:20]
]

# ─── FILE TREE ──────────────────────────────────────────────────────────────────
# (requires GitHub API v3; might be paginated)
branch = repo.get_branch(repo.default_branch)
tree = repo.get_git_tree(branch.commit.sha, recursive=True).tree
info["file_tree_count"] = len(tree)
# e.g. list first 10 entries
info["file_tree_sample"] = [{"path": t.path, "type": t.type} for t in tree[:10]]

# ─── ISSUES & PULL REQUESTS ────────────────────────────────────────────────────
info["issues_count"] = repo.get_issues(state="all").totalCount
info["pulls_count"]  = repo.get_pulls(state="all").totalCount

# ─── RECENT ISSUES (sample) ─────────────────────────────────────────────────────
info["recent_issues"] = [
    {"number": i.number, "title": i.title, "state": i.state}
    for i in repo.get_issues(state="all", sort="created", direction="desc")[:5]
]

# ─── RECENT PULL REQUESTS (sample) ───────────────────────────────────────────────
info["recent_pulls"] = [
    {"number": p.number, "title": p.title, "state": p.state}
    for p in repo.get_pulls(state="all", sort="created", direction="desc")[:5]
]

# ─── COMMIT HISTORY ─────────────────────────────────────────────────────────────
# grab last 20 commits
info["recent_commits"] = [
    {
        "sha": c.sha,
        "author": c.commit.author.name,
        "date": c.commit.author.date.isoformat(),
        "message": c.commit.message.split("\n")[0]
    }
    for c in repo.get_commits()[:20]
]

# ─── README & EXTERNAL LINKS ────────────────────────────────────────────────────
readme = repo.get_readme()
content = readme.decoded_content.decode("utf-8", errors="ignore")
info["readme_text"] = content
info["external_links_in_readme"] = list(set(
    re.findall(r"https?://[^\s)]+", content)
))

# ─── OUTPUT ─────────────────────────────────────────────────────────────────────
print(json.dumps(info, indent=2))

{
  "id": 601538369,
  "name": "llama",
  "full_name": "meta-llama/llama",
  "description": "Inference code for Llama models",
  "html_url": "https://github.com/meta-llama/llama",
  "clone_url": "https://github.com/meta-llama/llama.git",
  "ssh_url": "git@github.com:meta-llama/llama.git",
  "homepage": "",
  "topics": [],
  "default_branch": "main",
  "created_at": "2023-02-14T09:29:12+00:00",
  "updated_at": "2025-06-21T21:43:55+00:00",
  "pushed_at": "2025-01-26T21:42:26+00:00",
  "size_kb": 1150,
  "watchers_count": 58405,
  "stargazers_count": 58405,
  "forks_count": 9778,
  "open_issues_count": 490,
  "license": {
    "key": "other",
    "name": "Other",
    "spdx_id": "NOASSERTION",
    "url": "https://github.com/meta-llama/llama/blob/main/LICENSE"
  },
  "languages": {
    "Python": 45017,
    "Shell": 2544
  },
  "top_contributors": [
    {
      "login": "jspisak",
      "contributions": 42
    },
    {
      "login": "ruanslv",
      "contributions": 14
    },
    {
      "lo