# Extracție Issues

Salvăm toate issues (open+closed): metadate, body, labels și reacții.

In [1]:
%run ./utils.ipynb
import pandas as pd

In [4]:
def extract_issues(repo_full_name: str, max_issues: int = None) -> pd.DataFrame:
    repo = gh.get_repo(repo_full_name)
    rows = []
    for i, issue in enumerate(repo.get_issues(state="all")):
        if max_issues and i >= max_issues:
            break
        r = issue.raw_data.get("reactions", {})
        rows.append({
            "repo_full_name":   repo_full_name,
            "issue_id":         issue.id,
            "number":           issue.number,
            "title":            issue.title,
            "body":             issue.body,
            "user_login":       issue.user.login,
            "user_id":          issue.user.id,
            "state":            issue.state,
            "locked":           issue.locked,
            "comments_count":   issue.comments,
            "created_at":       issue.created_at.isoformat(),
            "updated_at":       issue.updated_at.isoformat(),
            "closed_at":        issue.closed_at.isoformat() if issue.closed_at else None,
            "labels":           ";".join([lbl["name"] for lbl in issue.raw_data.get("labels", [])]),
            "reactions_total":  r.get("total_count", 0),
            "reactions_plus1":  r.get("+1", 0),
            "reactions_minus1": r.get("-1", 0),
            "reactions_laugh":  r.get("laugh", 0),
            "reactions_hooray": r.get("hooray", 0),
            "reactions_confused": r.get("confused", 0),
            "reactions_heart":  r.get("heart", 0)
        })
    df = pd.DataFrame(rows)
    folder = ensure_repo_folder(repo_full_name)
    df.to_csv(folder / "issues.csv", index=False)
    return df

In [5]:
# Rulează extracția pe toate repo-urile
repos = read_repo_list("shallow_data.csv")
for full_name in repos:
    log(f"Issues → {full_name}")
    extract_issues(full_name, max_issues=2)

[2025-05-11T13:23:21.881348] Issues → microsoft/ML-For-Beginners
[2025-05-11T13:23:24.663521] Issues → apache/superset
[2025-05-11T13:23:27.174211] Issues → keras-team/keras
[2025-05-11T13:23:29.580701] Issues → scikit-learn/scikit-learn
[2025-05-11T13:23:31.792632] Issues → pandas-dev/pandas
[2025-05-11T13:23:34.096996] Issues → jakevdp/PythonDataScienceHandbook
[2025-05-11T13:23:36.504742] Issues → apache/airflow
[2025-05-11T13:23:39.015404] Issues → streamlit/streamlit
[2025-05-11T13:23:41.321777] Issues → GokuMohandas/Made-With-ML
[2025-05-11T13:23:43.633731] Issues → gradio-app/gradio
[2025-05-11T13:23:46.040812] Issues → ray-project/ray
[2025-05-11T13:23:48.248587] Issues → explosion/spaCy
[2025-05-11T13:23:50.554479] Issues → AMAI-GmbH/AI-Expert-Roadmap
[2025-05-11T13:23:52.862549] Issues → Lightning-AI/pytorch-lightning
[2025-05-11T13:23:55.271275] Issues → microsoft/Data-Science-For-Beginners
[2025-05-11T13:23:58.584579] Issues → donnemartin/data-science-ipython-notebooks
[202

In [3]:
print("s")

s
