# Extracție Istoric Stele

Colectăm pentru fiecare stargazer data evenimentului, user și metadatele disponibile.

In [1]:
%run ./utils.ipynb

In [2]:
import pandas as pd

In [3]:
def extract_stars(repo_full_name: str, max_stars: int = None) -> pd.DataFrame:
    """
    Extrage evenimentele de tip „star” pentru un repo.
    - repo_full_name: 'owner/repo'
    - max_stars: dacă nu e None, numărul maxim de intrări de extras
    """
    repo  = gh.get_repo(repo_full_name)
    rows  = []
    count = 0

    # get_stargazers_with_dates furnizează NamedTuples cu .user și .starred_at
    for sg in repo.get_stargazers_with_dates():
        if max_stars and count >= max_stars:
            break
        user = sg.user
        rows.append({
            "repo_full_name": repo_full_name,
            "user_login":     user.login,
            "user_id":        user.id,
            "starred_at":     sg.starred_at.isoformat(),
            "user_type":      user.type,
            "user_location":  user.location,
            "user_company":   user.company
        })
        count += 1

    df = pd.DataFrame(rows)
    folder = ensure_repo_folder(repo_full_name)
    df.to_csv(folder / "stars.csv", index=False)
    return df

In [4]:
# Citește lista de repo-uri pentru test
repos = read_repo_list("shallow_data.csv")
print("Primele 3 repo-uri:", repos[:3])

Primele 3 repo-uri: ['microsoft/ML-For-Beginners', 'apache/superset', 'keras-team/keras']


In [5]:
# Test extract_stars: primele 20 de evenimente de star pentru primul repo
test_repo = repos[0]
df_test = extract_stars(test_repo, max_stars=20)
print("Test shape:", df_test.shape)
df_test.head()

Test shape: (20, 7)


Unnamed: 0,repo_full_name,user_login,user_id,starred_at,user_type,user_location,user_company
0,microsoft/ML-For-Beginners,abhi-bhatra,63901956,2021-06-04T09:25:31+00:00,User,JAIPUR,@github
1,microsoft/ML-For-Beginners,paladique,1414307,2021-06-29T18:46:36+00:00,User,"Brooklyn, NY",@Microsoft
2,microsoft/ML-For-Beginners,changhefirst,9982233,2021-06-30T00:23:38+00:00,User,,
3,microsoft/ML-For-Beginners,mrsmiles9,75179483,2021-06-30T04:36:04+00:00,User,Folscom,JB&A
4,microsoft/ML-For-Beginners,tagazok,2637742,2021-06-30T05:55:15+00:00,User,"Paris, France",@aws


In [6]:
# Loop complet: extrage toate evenimentele de star pentru fiecare repo
for full_name in repos:
    log(f"Stars → {full_name}")
    extract_stars(full_name, max_stars=2)

[2025-05-11T13:36:28.083810] Stars → microsoft/ML-For-Beginners
[2025-05-11T13:36:30.277050] Stars → apache/superset
[2025-05-11T13:36:32.436224] Stars → keras-team/keras
[2025-05-11T13:36:34.845006] Stars → scikit-learn/scikit-learn
[2025-05-11T13:36:37.138748] Stars → pandas-dev/pandas
[2025-05-11T13:36:39.186616] Stars → jakevdp/PythonDataScienceHandbook
[2025-05-11T13:36:41.470102] Stars → apache/airflow
[2025-05-11T13:36:43.590556] Stars → streamlit/streamlit
[2025-05-11T13:36:45.783720] Stars → GokuMohandas/Made-With-ML
[2025-05-11T13:36:47.848888] Stars → gradio-app/gradio
[2025-05-11T13:36:49.924026] Stars → ray-project/ray
[2025-05-11T13:36:52.027707] Stars → explosion/spaCy
[2025-05-11T13:36:54.215647] Stars → AMAI-GmbH/AI-Expert-Roadmap
[2025-05-11T13:36:56.524263] Stars → Lightning-AI/pytorch-lightning
[2025-05-11T13:36:58.630122] Stars → microsoft/Data-Science-For-Beginners
[2025-05-11T13:37:01.038707] Stars → donnemartin/data-science-ipython-notebooks
[2025-05-11T13:37:03