In [2]:
import os
import json
from typing import Dict, List, Optional, Union, cast

from env import github_token, github_username

from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import requests

# TODO: Make a github personal access token.
#     1. Go here and generate a personal access token: https://github.com/settings/tokens
#        You do _not_ need select any scopes, i.e. leave all the checkboxes unchecked
#     2. Save it in your env.py file under the variable `github_token`
# TODO: Add your github username to your env.py file under the variable `github_username`
# TODO: Add more repositories to the `REPOS` list below.

REPOS = [
    "gocodeup/codeup-setup-script",
    "gocodeup/movies-application",
    "torvalds/linux",
]

headers = {"Authorization": f"token {github_token}", "User-Agent": github_username}

if headers["Authorization"] == "token " or headers["User-Agent"] == "":
    raise Exception(
        "You need to follow the instructions marked TODO in this script before trying to use it"
    )


def github_api_request(url: str) -> Union[List, Dict]:
    response = requests.get(url, headers=headers)
    response_data = response.json()
    if response.status_code != 200:
        raise Exception(
            f"Error response from github api! status code: {response.status_code}, "
            f"response: {json.dumps(response_data)}"
        )
    return response_data


def get_repo_language(repo: str) -> str:
    url = f"https://api.github.com/repos/{repo}"
    repo_info = github_api_request(url)
    if type(repo_info) is dict:
        repo_info = cast(Dict, repo_info)
        if "language" not in repo_info:
            raise Exception(
                "'language' key not round in response\n{}".format(json.dumps(repo_info))
            )
        return repo_info["language"]
    raise Exception(
        f"Expecting a dictionary response from {url}, instead got {json.dumps(repo_info)}"
    )


def get_repo_contents(repo: str) -> List[Dict[str, str]]:
    url = f"https://api.github.com/repos/{repo}/contents/"
    contents = github_api_request(url)
    if type(contents) is list:
        contents = cast(List, contents)
        return contents
    raise Exception(
        f"Expecting a list response from {url}, instead got {json.dumps(contents)}"
    )


def get_readme_download_url(files: List[Dict[str, str]]) -> str:
    """
    Takes in a response from the github api that lists the files in a repo and
    returns the url that can be used to download the repo's README file.
    """
    for file in files:
        if file["name"].lower().startswith("readme"):
            return file["download_url"]
    return ""


def process_repo(repo: str) -> Dict[str, str]:
    """
    Takes a repo name like "gocodeup/codeup-setup-script" and returns a
    dictionary with the language of the repo and the readme contents.
    """
    contents = get_repo_contents(repo)
    readme_download_url = get_readme_download_url(contents)
    if readme_download_url == "":
        readme_contents = ""
    else:
        readme_contents = requests.get(readme_download_url).text
    return {
        "repo": repo,
        "language": get_repo_language(repo),
        "readme_contents": readme_contents,
    }


def scrape_github_data() -> List[Dict[str, str]]:
    """
    Loop through all of the repos and process them. Returns the processed data.
    """
    return [process_repo(repo) for repo in REPOS]


if __name__ == "__main__":
    data = scrape_github_data()
    json.dump(data, open("data.json", "w"), indent=1)

ModuleNotFoundError: No module named 'env'

In [3]:
def web_scrape_repos():
    
    search_topics = "https://github.com/search?p="

    dico = []
    
    for page in range(1, 100):

        req = requests.get(search_topics + str(page) + "&q=" + 'bitcoin' + "&type=Repositories")
        soup = BeautifulSoup(req.text, 'html.parser')

        repos = soup.find_all('a', class_='v-align-middle')
        for link in repos:
            dico.append(link['href'])

In [4]:
dico = ['/followtheart/bitcoin-notes',
 '/citp/BlockSci',
 '/kallerosenbaum/grokkingbitcoin',
 '/slush0/stratum-mining',
 '/dev0p0/BitcoinCrack',
 '/kristapsk/bitcoin-scripts',
 '/keep-network/tbtc',
 '/sebicas/bitcoin-sniffer',
 '/llSourcell/how_to_build_a_bitcoin_startup',
 '/cryptoapi/Bitcoin-Payments-Woocommerce',
 '/gferrin/bitcoin-code',
 '/neocortex/lstm-bitcoin-prediction',
 '/psztorc/Truthcoin',
 '/minium/bitcoin-api-cpp',
 '/mikedeshazer/CreateSendBitcoin',
 '/zxh0/classpy',
 '/enriquez/coinpocketapp.com',
 '/cryptoeax/arbbot',
 '/coinables/Bitcoin-Faucet-Dice-Faucet-Box',
 '/NerdfighterSean/bitcointip',
 '/githubocto/flat-demo-bitcoin-price',
 '/londonappbrewery/BitcoinTicker-iOS12',
 '/rootzoll/raspiblitz',
 '/cryptocoinjs/bs58',
 '/F1LT3R/bitcoin-scraper',
 '/casey/ord',
 '/mynodebtc/mynode',
 '/bitauth/libauth',
 '/a-rodin/btc-trader',
 '/bitpay/wallet',
 '/r-willis/biten',
 '/StockSharp/StockSharp',
 '/consenlabs/token-core-ios',
 '/BitcoinATM/BitcoinATM-php',
 '/thelinuxkid/bitcoinquery',
 '/esotericnonsense/bitcoind-ncurses',
 '/nopara73/DotNetWallet',
 '/solana-labs/solana',
 '/hexonaut/bitcoin-transaction',
 '/chaincodelabs/seminars',
 '/samvrlewis/simple-bitcoin',
 '/streamium/streamium',
 '/londonappbrewery/bitcoin-flutter-final',
 '/CallMeJake/BlockCrawler',
 '/bitpay/bitcore-p2p',
 '/zkSNACKs/WalletWasabi',
 '/libbitcoin/libbitcoin-node',
 '/liuchengxu/blockchain-tutorial',
 '/hivewallet/hive-mac',
 '/BitcoinInterestOfficial/BitcoinInterest',
 '/OutCast3k/coinbin',
 '/interplanaria/bottle',
 '/zw3639/BitcoinCracker',
 '/gitchain/gitchain',
 '/cpacia/bchutil',
 '/arikan/bitcoin-voting',
 '/libbitcoin/libbitcoin-server',
 '/nvk/bitcointreasuries.NET',
 '/meesvw/bitcoin-bruteforce',
 '/vmeazevedo/myCrypto_MercadoBitcoin',
 '/bitaps-com/pybtc',
 '/ruesandora/bitcoin-full-node',
 '/libbitcoin/libbitcoin-database',
 '/gavinandresen/bitcoin_miningsim',
 '/joric/pywallet',
 '/BtcGroupCn/BitcoinDeveloperGuide_zhcn',
 '/sbuss/bitmerchant',
 '/nambrot/blockchain-in-js',
 '/behas/bitcoingraph',
 '/jamaljsr/polar',
 '/libbitcoin/libbitcoin-consensus',
 '/doomhz/coinnext',
 '/chaors/Bitcoin_read',
 '/trezor/connect',
 '/Toporin/BitcoinCore',
 '/Greedi/bitcoin-faucet',
 '/ruigomeseu/bitcoin-address-validation',
 '/BitcoinQnA/BitcoinPrivacyGuide',
 '/Isaacdelly/Plutus',
 '/shadders/BitcoinWallet']