In [1]:
import requests
from dotenv import load_dotenv
import os
load_dotenv()

def get_github(endpoint, apiKey=os.getenv("GITHUB_APIKEY"), query_params={}): 
    """
    Get data from github using query parameters and passing a custom apikey header
    """
    
    # Compose the endpoint url
    baseUrl = "https://api.github.com"
    url = f"{baseUrl}{endpoint}"

    # Create the headers
    headers = {
        "Authorization": f"Bearer {apiKey}"
    }
    # make the request and get the response using HTTP GET verb 
    res = requests.get(url, params=query_params, headers=headers)
   
    print(f"Request data to {res.url} status_code:{res.status_code}")
    data = res.json()
    
    if res.status_code != 200:
        raise ValueError(f'Invalid github api call: {data["message"]}')

    return data


In [4]:
names = get_github("/search/code",query_params={"q":"repo:ironhack-datalabs/scavenger filename:.scavengerhunt"})

Request data to https://api.github.com/search/code?q=repo%3Aironhack-datalabs%2Fscavenger+filename%3A.scavengerhunt status_code:200


In [7]:
fnames = [{"path":item["path"],"name":item["name"]} for item in names["items"]]

In [12]:
fnames = sorted(fnames,key=lambda e: e["name"])

In [13]:
fnames

[{'path': '98750/.0001.scavengerhunt', 'name': '.0001.scavengerhunt'},
 {'path': '88596/.0002.scavengerhunt', 'name': '.0002.scavengerhunt'},
 {'path': '60224/.0003.scavengerhunt', 'name': '.0003.scavengerhunt'},
 {'path': '68848/.0004.scavengerhunt', 'name': '.0004.scavengerhunt'},
 {'path': '44639/.0005.scavengerhunt', 'name': '.0005.scavengerhunt'},
 {'path': '15024/.0006.scavengerhunt', 'name': '.0006.scavengerhunt'},
 {'path': '17020/.0007.scavengerhunt', 'name': '.0007.scavengerhunt'},
 {'path': '15534/.0008.scavengerhunt', 'name': '.0008.scavengerhunt'},
 {'path': '97881/.0009.scavengerhunt', 'name': '.0009.scavengerhunt'},
 {'path': '47830/.0010.scavengerhunt', 'name': '.0010.scavengerhunt'},
 {'path': '50896/.0011.scavengerhunt', 'name': '.0011.scavengerhunt'},
 {'path': '15534/.0012.scavengerhunt', 'name': '.0012.scavengerhunt'},
 {'path': '89338/.0013.scavengerhunt', 'name': '.0013.scavengerhunt'},
 {'path': '49418/.0014.scavengerhunt', 'name': '.0014.scavengerhunt'},
 {'pat

In [16]:
urls = [f'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/{a["path"]}' for a in fnames]

In [17]:
urls

['https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/98750/.0001.scavengerhunt',
 'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/88596/.0002.scavengerhunt',
 'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/60224/.0003.scavengerhunt',
 'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/68848/.0004.scavengerhunt',
 'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/44639/.0005.scavengerhunt',
 'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/15024/.0006.scavengerhunt',
 'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/17020/.0007.scavengerhunt',
 'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/15534/.0008.scavengerhunt',
 'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/97881/.0009.scavengerhunt',
 'https://raw.githubusercontent.com/ironhack-datalabs/scavenger/master/47830/.0010.scavengerhunt',
 'https://

In [20]:
sentence = [requests.get(url).text for url in urls]

In [22]:
sentence = [e.rstrip()  for e in sentence]

In [24]:
' '.join(sentence)

'In data science, 80 percent of time spent is preparing data, 20 percent of time is spent complaining about the need to prepare data.'