In [1]:
import requests

# Function to fetch all pages of data
def fetch_all_pages(url, params, headers):
    all_data = []
    while url:
        response = requests.get(url,params=params, headers=headers)
        if response.status_code == 200:
            data = response.json()
            all_data.extend(data)
            url = response.links.get('next', {}).get('url')  # Get URL for next page if available
        else:
            print(f"Error code {response.status_code}: Failed to fetch data from {url}")
            break
    return all_data

def fetch_GitHub_issues_data(owner, repo):
    per_page = 100
    # GitHub API endpoints
    base_url = f"https://api.github.com/repos/{owner}/{repo}"
    issues_url = f"{base_url}/issues?per_page={per_page}"

    # Define the params and headers just care closed issues
    params = {
        "state": "closed"
    }
    headers = {'User-Agent': 'request'}
    return fetch_all_pages(issues_url, params, headers)

In [2]:
# Define your GitHub owner and repository details
owner = "stanfordnlp"
repo = "dspy"

In [3]:
issues_data = fetch_GitHub_issues_data(owner, repo)

In [4]:
import pandas as pd

# Use dataframe to store relevant data
issues_df = pd.DataFrame.from_records(issues_data)
issues_df.head()

Unnamed: 0,url,repository_url,labels_url,comments_url,events_url,html_url,id,node_id,number,title,...,closed_at,author_association,active_lock_reason,draft,pull_request,body,reactions,timeline_url,performed_via_github_app,state_reason
0,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://github.com/stanfordnlp/dspy/pull/731,2213863584,PR_kwDOIv2ufM5rFpZ3,731,Fix OpenAIVectorizer,...,2024-03-28T18:34:24Z,NONE,,True,{'url': 'https://api.github.com/repos/stanford...,"Fixes #728 \r\n\r\n-[ ] Squash commits, obey c...",{'url': 'https://api.github.com/repos/stanford...,https://api.github.com/repos/stanfordnlp/dspy/...,,
1,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://github.com/stanfordnlp/dspy/pull/727,2212499670,PR_kwDOIv2ufM5rA-MK,727,Fix typo in minimal-example.mdx,...,2024-03-28T07:03:00Z,CONTRIBUTOR,,False,{'url': 'https://api.github.com/repos/stanford...,,{'url': 'https://api.github.com/repos/stanford...,https://api.github.com/repos/stanfordnlp/dspy/...,,
2,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://github.com/stanfordnlp/dspy/pull/716,2206781646,PR_kwDOIv2ufM5qtol2,716,Update Cohere LM to use the `/chat` API and a...,...,2024-03-27T14:23:18Z,COLLABORATOR,,False,{'url': 'https://api.github.com/repos/stanford...,Cohere has deprecated the /generate API in fav...,{'url': 'https://api.github.com/repos/stanford...,https://api.github.com/repos/stanfordnlp/dspy/...,,
3,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://github.com/stanfordnlp/dspy/issues/715,2206761080,I_kwDOIv2ufM6DiIB4,715,Predictors hang when errors happen in multithr...,...,2024-03-25T22:31:19Z,COLLABORATOR,,,,Consider these three programs:\r\n\r\n**Progra...,{'url': 'https://api.github.com/repos/stanford...,https://api.github.com/repos/stanfordnlp/dspy/...,,completed
4,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://api.github.com/repos/stanfordnlp/dspy/...,https://github.com/stanfordnlp/dspy/pull/714,2206709999,PR_kwDOIv2ufM5qtY5U,714,docs: missing links in the doc,...,2024-03-26T16:22:27Z,CONTRIBUTOR,,False,{'url': 'https://api.github.com/repos/stanford...,,{'url': 'https://api.github.com/repos/stanford...,https://api.github.com/repos/stanfordnlp/dspy/...,,


In [5]:
issues_df.columns

Index(['url', 'repository_url', 'labels_url', 'comments_url', 'events_url',
       'html_url', 'id', 'node_id', 'number', 'title', 'user', 'labels',
       'state', 'locked', 'assignee', 'assignees', 'milestone', 'comments',
       'created_at', 'updated_at', 'closed_at', 'author_association',
       'active_lock_reason', 'draft', 'pull_request', 'body', 'reactions',
       'timeline_url', 'performed_via_github_app', 'state_reason'],
      dtype='object')

In [6]:
issues_df.to_csv(f"{owner}_{repo}_issues_data_raw.csv")