# ISSUE DOWNLOADER

In [1]:
import os
import re
import requests
import json
import base64
import codecs

In [2]:
def load_all_resource(url, auth):
    """
    Downloads JSON from an API URL. Github paginates when many items are
    present; if a requested URL has multiple pages, this function will request
    all the pages and concatenate the results.
    """
    print(url)
    r = requests.get(url, auth=auth)
    if not r.ok:
        raise Exception('Github returned status code {} ({}) when loading {} . Check that '
                        'your username, password, and repo name are correct.'.format(r.status_code, r.reason, url))
    data = r.json()
    # Load data from the next pages, if any
    if 'link' in r.headers:
        pages = {rel: url for url, rel in re.findall(r'<(.*?)>;\s+rel=\"(.*?)\"', r.headers['link'])}
        print(pages)
        if 'next' in pages:
            data.extend(load_all_resource(pages['next'], auth))
    return data


def get_json(username, password, repo):
    """
    Downloads all of the JSON for all of the issues in a repository. Also
    retrieves the comments and events for each issue, and saves those in the
    'comments' and 'events' attributes in the dictionary for each issue.
    """
    data = load_all_resource('https://api.github.com/repos/{}/issues?state=all'.format(repo),
                       auth=(username, password))
    # Load the comments and events on each issue
    for issue in data:
        print('#{}'.format(issue['number']))
        issue['comments'] = load_all_resource(issue['comments_url'],
                                              auth=(username, password))
        issue['events'] = load_all_resource(issue['events_url'],
                                            auth=(username, password))
    return data


def download_embedded_images(json_data, folder):
    """
    Downloads all of the images attached to issues for the repository.
    """
    json_str = json.dumps(json_data)
    for path in re.findall(r'[\("]https:\/\/cloud.githubusercontent.com\/(.*?)[\)"]', json_str):
        img_url = 'https://cloud.githubusercontent.com/'+path
        response = requests.get(img_url, stream=True)
        if not response.ok:
            raise Exception('Got a bad response while download the embedded image from {}! {} {}'.format(img_url, response.status_code, response.reason))
        with open(os.path.join(folder, base64.b64encode(path)+'.'+path.rsplit('.',1)[-1]), 'wb') as f:
            for block in response.iter_content(1024):
                if not block:
                    break
                f.write(block)

                
def mkdown_p(text):
    """
    Generates the markdown syntax for a paragraph.
    """
    return text + '\n'


def build_markdown(repo, data, issue_number):
    """
    Generates the markdown for a repository's issue page. The resulting markdown
    is a crude-but-functional mimicry of Github's issues.
    """
    lines = []
    i = 1
    for issue in sorted(data, key=lambda x: x['number']):
        if i == issue_number:
            lines.append(mkdown_p(issue['body']))
        i = i+1
    return ''.join(lines)

### Issue downloading and saving in JSON/markdown

In [3]:
def get_issue(user, passw, repository, issue_number, output_folder):
    if not os.path.exists(output_folder):
           os.makedirs(output_folder)

    print('\033[32m' + 'Downloading issues...' + '\033[0m')
    issues = get_json(user, passw, repository)

    print('\033[32m' + 'Downloading images attached to issues...' + '\033[0m')
    download_embedded_images(issues, output_folder)

    print('\033[32m' + 'Saving JSON...' + '\033[0m')
    with codecs.open(os.path.join(output_folder, 'issues.json'), 'w', 'utf-8') as f:
        json.dump(issues, f, indent=4)

    print('\033[32m' + 'Saving Markdown...' + '\033[0m')
    markdown = build_markdown(repository, issues, issue_number)
    with codecs.open(os.path.join(output_folder, 'issue.txt'), 'w', 'utf-8') as f:
        f.write(markdown)
    f.close()