# Scan Repositories

This notebook will scan repositories in order to collect details about `wontfix` label usage

In [7]:
# Use only to install required dependencies into your Juputer kernal (if you don't already have them)
import sys
!{sys.executable} -m pip install numpy pandas requests


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [10]:
import pandas as pd
df = pd.read_csv('../data/repos.csv', index_col='id')

# Convert these strings of semicolon-seperated values into a proper array
df['labels'] = df['labels'].str.split(';')
df['topics'] = df['topics'].apply(lambda x: [] if pd.isna(x) else x.split(';'))

df.head()

Unnamed: 0_level_0,name,isFork,commits,branches,releases,forks,mainLanguage,defaultBranch,license,homepage,...,metrics,lastCommit,lastCommitSHA,hasWiki,isArchived,isDisabled,isLocked,languages,labels,topics
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2049,rzwitserloot/lombok,False,3243,3,3,1932,Java,master,Other,https://projectlombok.org/,...,"[{""blankLines"":6,""codeLines"":236,""commentLines...",2021-04-18T04:36:13,5120abe4741c78d19d7e65404f407cfe57074a47,True,False,False,False,"{""Java"":4263238,""HTML"":236155,""JavaScript"":158...","[accepted, android, aop, awaiting-fix-confirma...",[]
29945,android-async-http/android-async-http,False,899,4,3,4194,Java,master,Apache License 2.0,https://github.com/android-async-http/android-...,...,"[{""blankLines"":65,""codeLines"":331,""commentLine...",2021-01-18T09:40:25,018a0b8d96a0dd569de9f8128cfe5d030e0423ef,True,False,False,False,"{""Java"":311001}","[critical, documentation, duplicate, feature-r...",[]
49010,eclipse-vertx/vert.x,False,6050,58,0,2032,Java,master,Other,http://vertx.io,...,"[{""blankLines"":7,""codeLines"":118,""commentLines...",2024-03-22T08:02:07,23da2658d33278ddcc3513a6aaab8b9e4b6e748a,True,False,False,False,"{""Java"":6129111,""HTML"":203}","[blocker, bug, clustering, critical, deferred,...","[concurrency, event-loop, high-performance, ht..."
88718,signalapp/signal-android,False,14322,9,0,5977,Java,main,GNU Affero General Public License v3.0,https://signal.org,...,"[{""blankLines"":0,""codeLines"":714,""commentLines...",2024-03-19T06:36:32,85929809f067da6af0d60fa964df126babd12f46,True,False,False,False,"{""Java"":11176237,""Kotlin"":8944419,""Handlebars""...","[a11y, acknowledged, backup, calling, camera, ...",[]
166617,openzipkin/zipkin,False,2917,43,86,3060,Java,master,Apache License 2.0,https://zipkin.io/,...,"[{""blankLines"":2,""codeLines"":22,""commentLines""...",2024-03-12T04:23:58,e0f6803947f744a4af5e18f2c78c037ae950bbaf,True,False,False,False,"{""Java"":1906747,""JavaScript"":226653,""TypeScrip...","[bug, cassandra, chore, collector, dependencie...","[distributed-tracing, observability, openzipki..."


## Fetch from GitHub

Now we can go through each repo and fetch `wontfix` issues from GitHub

In [61]:
import requests, os, pprint

GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')

def fetch_issues_for_repo(repo_name: str, label: str = 'wontfix', per_page: int = 100, start_page: int = 1, issue_state: str = 'all') -> list:
    """
    Fetches issues from Github API
    :param repo_name: 
    :param label: 
    :param per_page: 
    :param start_page: 
    :param issue_state: 
    :return: list of all_issues
    """
    headers = {
        'Authorization': GITHUB_TOKEN,
        'Accept': 'application/vnd.github.html+json'
    }

    current_page = start_page
    repo_name = repo_name
    label = label
    all_issues = []

    # GitHub API URL for fetching issues from the repository
    issues_url = f'https://api.github.com/repos/{repo_name}/issues'

    while True:
        params = {
            'labels': label,
            'state': issue_state,  # Change to 'open' if you only want open issues
            'per_page': per_page,  # Adjust per_page to your needs, maximum is 100
            'page': current_page
        }
        
        # Send API Request
        response = requests.get(issues_url, headers=headers, params=params)

        # Check for successful response
        if not response.ok:
            print('Failed to fetch issues:', response.content)
            break
        
        issues = response.json()
        all_issues += issues
        
        link_header = response.headers['Link'] if 'Link' in response.headers else ""
        current_page += 1
        if 'rel="next"' not in link_header:
            break # End of pages
            
    return all_issues

In [71]:
from typing import Tuple, List

def separate_issues_from_prs(items: list) -> Tuple[List[dict], List[dict]]:
    """
    Separate a returned list of issues into a list of Pull Requests (PRs) and Issues that are not PRs
    Technically all PRs are issues, so the Github API returns PRs as an issue, and that is what necessitates this step.
    :param items: 
    :return: (PRs, Issues)
    """
    prs = []
    issues = []
    for item in items:
        if 'pull_request' in item:
            prs.append(item)
        else:
            issues.append(item)
    return issues, prs


In [72]:
results = fetch_issues_for_repo('openzipkin/zipkin', per_page=100)
issues, prs = separate_issues_from_prs(results)
        
print(f"Issues: {len(issues)}")
print(f"PRs: {len(prs)}")

Issues: 15
PRs: 4
