This notebook queries the GitHub API to retrieve a list of all releases for each actions found in `data/steps.csv.gz`. 
It results in `data/releases.csv.gz'. 

In [1]:
import pandas as pd
import requests
from tqdm import tqdm 

In [2]:
# Path to query
PATH = 'https://api.github.com/repos/{owner}/{repo}/releases'

HEADERS = {
    # GitHub API token. Use your own, this one has expired.
    'Authorization': 'token  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',
    'Accept': 'application/vnd.github.v3+json',
}

In [3]:
actions = (
    pd.read_csv('../data/steps.csv.gz')
    [lambda d: ~d.uses.isnull()]
    [lambda d: ~d.uses.str.startswith(('docker://', './', 'http'))]
    [lambda d: d.uses.str.count('/') == 1]
    .assign(action=lambda d: d.uses.str.split('@', n=1).str[0])
    .action
    .drop_duplicates()
    .values
)

In [4]:
def extract_releases(repo):
    owner, repo = repo.split('/')
    url = PATH.format(owner=owner, repo=repo)
    params = {'per_page': 100, 'page': 1}
    output = []
    has_more = True
    
    while has_more:
        r = requests.get(url, params, headers=HEADERS)
        if r.status_code != 200:
            return None
        
        content = r.json()
        
        for release in content:
            output.append((owner, repo, release['tag_name'], pd.to_datetime(release['published_at'])))
        
        if len(content) < 100:
            has_more = False
        else:
            params['page'] = params['page'] + 1
    
    return output

In [5]:
output = []
done = []

inputs = [a for a in actions if a not in done]

for repo in tqdm(inputs):
    result = extract_releases(repo)
    
    if result is not None:
        output.extend(result)
    
    done.append(repo)

100%|███████████████████████████████████████| 4075/4075 [20:38<00:00,  3.29it/s]


In [6]:
len(actions), len(inputs), len(done), len(output)

(4075, 4075, 4075, 29447)

In [7]:
df = pd.DataFrame(output, columns=['owner', 'repo', 'release', 'date'])

In [8]:
df.head()

Unnamed: 0,owner,repo,release,date
0,actions,checkout,v3.0.2,2022-04-21 14:56:58+00:00
1,actions,checkout,v2.4.2,2022-04-21 16:04:02+00:00
2,actions,checkout,v3.0.1,2022-04-14 18:22:54+00:00
3,actions,checkout,v2.4.1,2022-04-14 16:14:18+00:00
4,actions,checkout,v3.0.0,2022-03-01 17:48:27+00:00


In [9]:
df.to_csv('../data/releases.csv.gz', compression='gzip', index=None)