In [1]:
import os
import sys
import datetime
import glob

from bs4 import BeautifulSoup

projdir = os.path.realpath(os.path.join(sys.path[0], ".."))

In [2]:
year = datetime.date.today().year
yearPath = os.path.join(projdir, str(year))

In [3]:
fleets = {
    'pro': 'Pro',
    'am': 'Amateur',
    'wing': 'Wingfoil',
    'pro-foil': 'Lightwind Foil - Pro',
    'am-foil': 'Lightwind Foil - Amateur',
    'foil': 'Foil',
    'lw-foil-form': 'Lightwind Foil - Formula',
    'lw-foil-iq': 'Lightwind Foil - IQ',
    'lw-foil-free': 'Lightwind Foil - Freeride',
    'mb': 'Master Blaster'
}

In [4]:
def mergeHtmlFiles(path, prefix, title=None):
    '''Merge HTML files in a single folder'''

    results = None

    for fleet in fleets:
        htmlPath = os.path.join(path, f'{prefix}-{fleet}.html')
        if os.path.exists(htmlPath):
            with open(htmlPath, 'r', encoding='utf-8') as htmlFile:
                htmlDoc = htmlFile.read()
                soup = BeautifulSoup(htmlDoc, 'html.parser')

                summaryTitle = soup.find('h3', {'class': 'summarytitle'})
                summaryCaption = soup.find('div', {'class': 'summarycaption'})
                summaryTable = soup.find('table', {'class': 'summarytable'})

                summaryTitle.string = f'{fleets[fleet]} Fleet'

                if results is None:
                    if title:
                        soup.find('h2').string = title
                    results = soup
                else:
                    results.find_all('table', {'class': 'summarytable'})[-1].insert_after(summaryTitle)
                    results.find_all('h3', {'class': 'summarytitle'})[-1].insert_after(summaryCaption)
                    results.find_all('div', {'class': 'summarycaption'})[-1].insert_after(summaryTable)
                
    htmlPath = os.path.join(path, f'{prefix}.html')
    with open(htmlPath, 'w', encoding='utf-8') as htmlFile:
        if results:
            print('Refreshing {}...'.format(htmlPath))
            htmlFile.write(results.prettify())
        else:
            print('WARNING: No results for {}'.format(htmlPath))
            htmlFile.write('<p>NO RESULTS</p>')

In [5]:
eventId = 'slalom-rankings'
mergeHtmlFiles(yearPath, eventId, title='Overall Series')

pattern = os.path.join(yearPath, 'slalom*')
names = sorted(glob.glob(pattern))
for name in names:
    if os.path.isdir(name):
        eventId = os.path.basename(name)
        mergeHtmlFiles(os.path.join(yearPath, eventId), eventId)
        
print('All done!')

Refreshing /home/jovyan/work/ukwa-slalom/2022/slalom-rankings.html...
Refreshing /home/jovyan/work/ukwa-slalom/2022/slalom1-weymouth/slalom1-weymouth.html...
Refreshing /home/jovyan/work/ukwa-slalom/2022/slalom2-littlehampton/slalom2-littlehampton.html...
Refreshing /home/jovyan/work/ukwa-slalom/2022/slalom3-lee-on-solent/slalom3-lee-on-solent.html...
All done!
