In [2]:
import requests
import time
import json
import logging
logging.basicConfig(filename='error.log', level=logging.WARNING, format='%(asctime)s - %(levelname)s - %(message)s')

In [3]:
def fetch_commits(owner, repo, branch='main', token='', per_page=100, pages=5):
    headers = {'Authorization': f'token {token}'} if token else {}
    commits = []
    url = f'https://api.github.com/repos/{owner}/{repo}/commits?sha={branch}&per_page={per_page}'
    current_page = 0

    while url:
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            print("Failed to fetch data:", response.json())  # Print the error message
            break

        page_commits = response.json()
        commits.extend(page_commits)
        current_page += 1

        if 'next' in response.links and (pages is None or current_page < pages):
            url = response.links['next']['url']
        else:
            break

    return commits



def fetch_pull_requests_for_commit(owner, repo, commit_sha, token=''):
    """ Fetch pull requests associated with a specific commit. """
    headers = {'Authorization': f'token {token}'} if token else {}
    url = f'https://api.github.com/repos/{owner}/{repo}/commits/{commit_sha}/pulls'
    try:
        response = requests.get(url, headers=headers, params={'per_page': 100})
        response.raise_for_status()  # Raise an exception for 4xx or 5xx status codes
        pull_requests = response.json()
        return pull_requests
    except requests.exceptions.RequestException as e:
        # Log the error along with the URL
        logging.error(f"Error fetching pull requests for URL: {url}. Error: {e}")
        # You can choose to return None or an empty list, depending on how you want to handle errors
        return None

def label_commits_with_prs(owner, repo, commits, token=''):
    accum = 0
    """ Label commits whether they are associated with a pull request. """
    for commit in commits:
        time.sleep(1)
        #print("Commit is ", commit)
        commit_sha = commit['sha']
        prs = fetch_pull_requests_for_commit(owner, repo, commit_sha, token)
        commit['pull_requests'] = prs
        #print(f"Commit {commit_sha} is associated with {len(prs)} pull request(s).")
        accum += 1
        if accum % 10 == 0:
            print(f"Processed {accum} commits.")
    return commits


# Target repository Specifiction

In [None]:
owner = 'numpy'
repo = 'numpy'
branch = 'main'
token = open("token.secret").read().strip()
commit_data = fetch_commits(owner, repo, branch, token,per_page=10,pages=1)
print(f"Fetched {len(commit_data)} commits.")

Fetched 10 commits.


In [None]:
# Fetch commit data
commit_data = fetch_commits(owner, repo, branch, token,per_page=100,pages=5)

# Label commits with associated pull requests
labeled_commits = label_commits_with_prs(owner, repo, commit_data, token)
json.dump(commit_data, open(f"{repo}.json", "w"), indent=2)

Processed 10 commits.
Processed 20 commits.
Processed 30 commits.
Processed 40 commits.
Processed 50 commits.
Processed 60 commits.
Processed 70 commits.
Processed 80 commits.
Processed 90 commits.
Processed 100 commits.
Processed 110 commits.
Processed 120 commits.
Processed 130 commits.
Processed 140 commits.
Processed 150 commits.
Processed 160 commits.
Processed 170 commits.
Processed 180 commits.
Processed 190 commits.
Processed 200 commits.
Processed 210 commits.
Processed 220 commits.
Processed 230 commits.
Processed 240 commits.
Processed 250 commits.
Processed 260 commits.
Processed 270 commits.
Processed 280 commits.
Processed 290 commits.
Processed 300 commits.
Processed 310 commits.
Processed 320 commits.
Processed 330 commits.
Processed 340 commits.
Processed 350 commits.
Processed 360 commits.
Processed 370 commits.
Processed 380 commits.
Processed 390 commits.
Processed 400 commits.
Processed 410 commits.
Processed 420 commits.
Processed 430 commits.
Processed 440 commit

In [None]:
print(f"Total commits: {len(commit_data)}")