In [None]:
from bs4 import BeautifulSoup
import requests
import os.path
from os.path import basename

from markdownify import markdownify

import nbformat as nbf

In [None]:
def pull_webpage(url):
    # Define user-agent headers to mimic a real browser
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
    
    # Send a GET request to the URL with headers
    response = requests.get(url, headers=headers)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the page
        soup = BeautifulSoup(response.content, 'html.parser')

        
        for img in soup.find_all("img"):
            filename = basename(img.get("src")).split('?')[0]
            src = "https://projecteuler.net/"+img.get("src")
            filepath = "resources/"+filename
            img['src'] = filepath

            if not os.path.isfile(filepath):
                    with open(filepath, "wb") as f:
                        f.write(requests.get(src).content)  
        return soup
    else:
        print(f"Failed to retrieve article from {url}. Status code: {response.status_code}")
        return None
    

'''
cells: a list of ordered pairs where for cell in cells, cell[0] is the type of cell where type in ("text", "code") 
and cell[1] is cell contents 
'''
def generate_notebook(filename = None, cells = None, override_existing = False):
    nb = nbf.v4.new_notebook()

    if filename is None:
        filename = "test.ipynb"

    if cells is None:
        text = """\
        # My first automatic Jupyter Notebook
        This is an auto-generated notebook."""

        code = """\
        %pylab inline
        hist(normal(size=2000), bins=50);"""
        print("cells override")

        nb['cells'] = [nbf.v4.new_markdown_cell(text),
                    nbf.v4.new_code_cell(code)]
    
    else:
        nb['cells'] = []
        for cell in cells:
            type = cell[0]
            text = cell[1]
            if type == "text":
                nb['cells'].append(nbf.v4.new_markdown_cell(text))
            elif type == "code":
                nb['cells'].append(nbf.v4.new_code_cell(text))

    print(f"Generating:{filename}")
    if override_existing:
        with open(filename, 'w') as f:
            nbf.write(nb, f)

    #Don't replace file if it exists
    else:
        if not os.path.isfile(filename):
            with open(filename, "w") as f:
                nbf.write(nb, f)




In [None]:
# starting_problem = 960
# problem_count = 10
# urls = [f"https://projecteuler.net/minimal={i}" for i in range(starting_problem, starting_problem+problem_count)]
# soups = [pull_webpage(url) for url in urls]

In [None]:
# cells = []
# for i in range(problem_count):
#     problem_number = starting_problem+i
#     soup = soups[i]
    
#     cells.append(("text", f"# Problem {problem_number}\n [Source](https://projecteuler.net/problem={problem_number})"))
#     cells.append(("text", markdownify(soup.prettify()))) #TODO handle katex syntax errors eg problem 282
#     cells.append(("code", f"# Problem {problem_number} workspace"))
#     cells.append(("text", "## Answer: "))

# final_problem = starting_problem+problem_count-1
# generate_notebook(filename = f"Problems{starting_problem:04}_{final_problem:04}.ipynb", cells = cells, override_existing=True)
    

In [None]:
def create_problem_notebooks(starting_problem = 1, problems_per_notebook = 10, ending_problem = 970, override_existing = False):
    
    current_problem = starting_problem
    # starting_problem = 280
    problem_count = problems_per_notebook
    while current_problem <= ending_problem:
        urls = [f"https://projecteuler.net/minimal={i}" for i in range(current_problem, current_problem+problem_count)]
        soups = [pull_webpage(url) for url in urls]
        
        cells = []
        for i in range(problem_count):
            problem_number = current_problem+i
            soup = soups[i]
            
            cells.append(("text", f"# Problem {problem_number}\n [Source](https://projecteuler.net/problem={problem_number})"))
            #TODO: markdownify todos:
            # handle coloring errors eg problem 11
            # handle katex syntax errors eg problem 282
            # handle links to other problems eg problem 18
            cells.append(("text", markdownify(soup.prettify()))) 
            cells.append(("code", f"# Problem {problem_number} workspace"))
            cells.append(("text", "## Answer: "))
            cells.append(("text", "___"))

        final_problem = current_problem+problem_count-1
        generate_notebook(filename = f"Problems{current_problem:04}_{final_problem:04}.ipynb", cells = cells, override_existing=override_existing)
        current_problem += problem_count


In [None]:
# create_problem_notebooks(override_existing=False)