# Extracție Comentarii Issue

Salvăm fiecare comentariu (text, timestamp, autor, reacții).

In [1]:
%run ./utils.ipynb

In [2]:
import pandas as pd

In [3]:
def extract_issue_comments(repo_full_name: str,
                           max_comments: int = None) -> pd.DataFrame:
    """
    Extrage comentariile tuturor issue-urilor dintr-un repo.
    - repo_full_name: 'owner/repo'
    - max_comments: dacă nu e None, numărul maxim total de comentarii
    """
    repo  = gh.get_repo(repo_full_name)
    rows  = []
    count = 0

    for issue in repo.get_issues(state="all"):
        for comment in issue.get_comments():
            if max_comments and count >= max_comments:
                break
            r = comment.raw_data.get("reactions", {})

            rows.append({
                "repo_full_name":    repo_full_name,
                "issue_id":          issue.id,
                "comment_id":        comment.id,
                "user_login":        comment.user.login,
                "user_id":           comment.user.id,
                "created_at":        comment.created_at.isoformat(),
                "updated_at":        comment.updated_at.isoformat(),
                "body":              comment.body,
                "reactions_total":   r.get("total_count", 0),
                "reactions_plus1":   r.get("+1", 0),
                "reactions_minus1":  r.get("-1", 0),
                "reactions_laugh":   r.get("laugh", 0),
                "reactions_hooray":  r.get("hooray", 0),
                "reactions_confused":r.get("confused", 0),
                "reactions_heart":   r.get("heart", 0)
            })
            count += 1

        if max_comments and count >= max_comments:
            break

    df = pd.DataFrame(rows)
    folder = ensure_repo_folder(repo_full_name)
    df.to_csv(folder / "issue_comments.csv", index=False)
    return df

In [4]:
# Citește lista de repo-uri pentru test
repos = read_repo_list("shallow_data.csv")
print("Primele 3 repo-uri:", repos[:3])

Primele 3 repo-uri: ['microsoft/ML-For-Beginners', 'apache/superset', 'keras-team/keras']


In [5]:
# Test extract_issue_comments pe primele 20 comentarii ale primului repo
test_repo = repos[0]
df_test = extract_issue_comments(test_repo, max_comments=20)
print("Test shape:", df_test.shape)
df_test.head()

Test shape: (20, 15)


Unnamed: 0,repo_full_name,issue_id,comment_id,user_login,user_id,created_at,updated_at,body,reactions_total,reactions_plus1,reactions_minus1,reactions_laugh,reactions_hooray,reactions_confused,reactions_heart
0,microsoft/ML-For-Beginners,3043056961,2854812701,skytin1004,99078115,2025-05-06T14:35:29+00:00,2025-05-06T14:35:29+00:00,"Hi @BethanyJep,\r\nI've added badge links to t...",0,0,0,0,0,0,0
1,microsoft/ML-For-Beginners,3001592818,2811964585,axeleichelmann,77329021,2025-04-17T06:49:32+00:00,2025-04-17T06:49:32+00:00,@microsoft-github-policy-service agree,0,0,0,0,0,0,0
2,microsoft/ML-For-Beginners,2860724293,2665983482,InflixOP,121241401,2025-02-18T15:05:56+00:00,2025-02-18T15:05:56+00:00,@microsoft-github-policy-service agree,1,1,0,0,0,0,0
3,microsoft/ML-For-Beginners,2860724293,2817065020,github-actions[bot],41898282,2025-04-20T08:37:43+00:00,2025-04-20T08:37:43+00:00,This PR has not seen any action for a while! C...,0,0,0,0,0,0,0
4,microsoft/ML-For-Beginners,2859940744,2817065040,github-actions[bot],41898282,2025-04-20T08:37:45+00:00,2025-04-20T08:37:45+00:00,This issue has not seen any action for a while...,0,0,0,0,0,0,0


In [6]:
# Loop complet: extrage toate comentariile pentru fiecare repo
for full_name in repos:
    log(f"Issue comments → {full_name}")
    extract_issue_comments(full_name, max_comments=2)

[2025-05-11T13:33:23.955738] Issue comments → microsoft/ML-For-Beginners
[2025-05-11T13:33:31.198030] Issue comments → apache/superset
[2025-05-11T13:33:35.314120] Issue comments → keras-team/keras
[2025-05-11T13:33:39.930153] Issue comments → scikit-learn/scikit-learn
[2025-05-11T13:33:44.332880] Issue comments → pandas-dev/pandas
[2025-05-11T13:33:47.252752] Issue comments → jakevdp/PythonDataScienceHandbook
[2025-05-11T13:33:54.178417] Issue comments → apache/airflow
[2025-05-11T13:33:57.410309] Issue comments → streamlit/streamlit
[2025-05-11T13:34:01.509822] Issue comments → GokuMohandas/Made-With-ML
[2025-05-11T13:34:06.924426] Issue comments → gradio-app/gradio
[2025-05-11T13:34:10.543695] Issue comments → ray-project/ray
[2025-05-11T13:34:20.470824] Issue comments → explosion/spaCy
[2025-05-11T13:34:24.685951] Issue comments → AMAI-GmbH/AI-Expert-Roadmap
[2025-05-11T13:34:32.414600] Issue comments → Lightning-AI/pytorch-lightning
[2025-05-11T13:34:40.761683] Issue comments → mi