In [2]:
import os
import json
from typing import Dict, List, Optional, Union, cast
import requests
from bs4 import BeautifulSoup
from env import github_token, github_username
import pandas
import urllib.request
from html.parser import HTMLParser

In [3]:
REPOS = [
    "gocodeup/codeup-setup-script",
    "gocodeup/movies-application",
    "torvalds/linux",
]

headers = {"Authorization": f"token {github_token}", "User-Agent": github_username}

if headers["Authorization"] == "token " or headers["User-Agent"] == "":
    raise Exception(
        "You need to follow the instructions marked TODO in this script before trying to use it"
    )

In [4]:

def github_api_request(url: str) -> Union[List, Dict]:
    response = requests.get(url, headers=headers)
    #response_data = response.json()
    if response.status_code != 200:
        raise Exception(
            f"Error response from github api! status code: {response.status_code}, "
            #f"response: {json.dumps(response_data)}"
        )
    return response

In [5]:
def get_repo_contents(repo: str) -> List[Dict[str, str]]:
    url = f"https://api.github.com/repos/{repo}/contents/"
    contents = github_api_request(url)
    if type(contents) is list:
        contents = cast(List, contents)
        return contents
    raise Exception(
        f"Expecting a list response from {url}, instead got {json.dumps(contents)}"
    )

In [6]:
def get_readme_download_url(files: List[Dict[str, str]]) -> str:
    """
    Takes in a response from the github api that lists the files in a repo and
    returns the url that can be used to download the repo's README file.
    """
    for file in files:
        if file["name"].lower().startswith("readme"):
            return file["download_url"]
    return " "

In [11]:
test_links = []
main_link = 'https://github.com/'
start_link = 'https://github.com/search?q=space&type=Repositories'
filename = 'repos.json'
headers = {"Authorization": f"token {github_token}", "User-Agent": github_username}
response = requests.get(start_link, headers=headers)
soup = BeautifulSoup(response.content,'html.parser')
all_blocks = soup.find_all('li', class_='repo-list-item hx_hit-repo d-flex flex-justify-start py-4 public source')
for block in all_blocks:
    print(block.find('span', itemprop='programmingLanguage').text)
    link =  block.find('a', class_='v-align-middle')['href'][1:]
    print(link)
    test_links.append(link)
def language_and_links(search_url):



Lua
SpaceVim/SpaceVim
Emacs Lisp
syl20bnr/spacemacs
JavaScript
r-spacex/SpaceX-API
C#
KeenSoftwareHouse/SpaceEngineers
Python
a1studmuffin/SpaceshipGenerator
HTML
MicrosoftDocs/mslearn-tailspin-spacegame-web
Shell
spaceship-prompt/spaceship-prompt
Rust
spacedriveapp/spacedrive
JavaScript
SublimeText/Spacegray
JavaScript
sar-gupta/space


In [8]:
def get_repo_language(repo: str) -> str:
    url = f"https://api.github.com/repos/{repo}"
    repo_info = github_api_request(url)
    if type(repo_info) is dict:
        repo_info = cast(Dict, repo_info)
        if "language" not in repo_info:
            raise Exception(
                "'language' key not round in response\n{}".format(json.dumps(repo_info))
            )
        return repo_info["language"]
    raise Exception(
        f"Expecting a dictionary response from {url}, instead got {json.dumps(repo_info)}"
    )

In [9]:
def process_repo(repo: str) -> Dict[str, str]:
    """
    Takes a repo name like "gocodeup/codeup-setup-script" and returns a
    dictionary with the language of the repo and the readme contents.
    """
    contents = get_repo_contents(repo)
    readme_download_url = get_readme_download_url(contents)
    if readme_download_url == "":
        readme_contents = ""
    else:
        readme_contents = requests.get(readme_download_url).text
    return {
        "repo": repo,
        "language": get_repo_language(repo),
        "readme_contents": readme_contents,
    }

In [13]:

process_repo('SpaceVim/SpaceVim')


TypeError: Object of type Response is not JSON serializable

In [70]:
github_api_request('https://github.com//SpaceVim/SpaceVim')

<Response [200]>

In [None]:
start_link = 'https://github.com/search?q=space&type=Repositories'
#filename = 'repos.json'
headers = {"Authorization": f"token {github_token}", "User-Agent": github_username}
response = requests.get(start_link, headers=headers)
all_blocks = soup.find_all('li', class_='repo-list-item hx_hit-repo d-flex flex-justify-start py-4 public source')
for block in all_blocks:
    print(block.find('span', itemprop='programmingLanguage').text)
    link = main_link + block.find('a', class_='v-align-middle')['href']
    print(link)
    test_links.append(link)
page = 2