In [1]:
import io
import json

from pathlib import Path
from urllib.parse import urlparse, parse_qs

import pandas as pd
import numpy as np

import requests
import clipboard

from PIL import Image
from jinja2 import Template

In [21]:
module_number = 1
module_title = 'Introduction'
module_folder = Path('..') / '01-intro'

meta_json_file = module_folder / 'meta.json'
playlist_id = 'PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK'

## Prepare `meta.json` info

In [3]:
text = """
module_number	module_title	unit_number	unit_title	full_title	youtube
1	Introduction	1	Introduction	MLOps Zoomcamp 1.1 - Introduction	https://www.youtube.com/watch?v=s0uaFZSzwfI
1	Introduction	2	Environment preparation	MLOps Zoomcamp 1.2 - Environment preparation	https://www.youtube.com/watch?v=IXSiYkP23zo
1	Introduction	3	(Optional) Training a ride duration prediction model	MLOps Zoomcamp 1.3 - (Optional) Training a ride duration prediction model	https://www.youtube.com/watch?v=iRunifGSHFc
1	Introduction	4	Course overview	MLOps Zoomcamp 1.4 - Course overview	https://www.youtube.com/watch?v=teP9KWkP6SM
1	Introduction	5	MLOps maturity model	MLOps Zoomcamp 1.5 - MLOps maturity model	https://www.youtube.com/watch?v=XwTH8BDGzYk
1	Introduction	6	Homework	MLOps Zoomcamp 1.6 - Homework	NA
""".strip()

In [4]:
df = pd.read_csv(io.StringIO(text), delimiter='\t')

In [5]:
df['youtube'] = df['youtube'].fillna('')

In [6]:
df_units = df[['unit_number', 'unit_title', 'youtube']]
units = df_units \
    .rename(columns={'unit_number': 'number', 'unit_title': 'title'}) \
    .to_dict(orient='records')
units

[{'number': 1,
  'title': 'Introduction',
  'youtube': 'https://www.youtube.com/watch?v=s0uaFZSzwfI'},
 {'number': 2,
  'title': 'Environment preparation',
  'youtube': 'https://www.youtube.com/watch?v=IXSiYkP23zo'},
 {'number': 3,
  'title': '(Optional) Training a ride duration prediction model',
  'youtube': 'https://www.youtube.com/watch?v=iRunifGSHFc'},
 {'number': 4,
  'title': 'Course overview',
  'youtube': 'https://www.youtube.com/watch?v=teP9KWkP6SM'},
 {'number': 5,
  'title': 'MLOps maturity model',
  'youtube': 'https://www.youtube.com/watch?v=XwTH8BDGzYk'},
 {'number': 6, 'title': 'Homework', 'youtube': ''}]

In [7]:
meta = {
    'module': {
        'number': module_number,
        'title': module_title
    },
    'units': units
}

In [8]:
meta

{'module': {'number': 1, 'title': 'Introduction'},
 'units': [{'number': 1,
   'title': 'Introduction',
   'youtube': 'https://www.youtube.com/watch?v=s0uaFZSzwfI'},
  {'number': 2,
   'title': 'Environment preparation',
   'youtube': 'https://www.youtube.com/watch?v=IXSiYkP23zo'},
  {'number': 3,
   'title': '(Optional) Training a ride duration prediction model',
   'youtube': 'https://www.youtube.com/watch?v=iRunifGSHFc'},
  {'number': 4,
   'title': 'Course overview',
   'youtube': 'https://www.youtube.com/watch?v=teP9KWkP6SM'},
  {'number': 5,
   'title': 'MLOps maturity model',
   'youtube': 'https://www.youtube.com/watch?v=XwTH8BDGzYk'},
  {'number': 6, 'title': 'Homework', 'youtube': ''}]}

In [9]:
module_folder.mkdir(parents=True, exist_ok=True)

with open(meta_json_file, 'wt') as f_out:
    json.dump(meta, f_out, indent=2)

In [10]:
!head {meta_json_file}

{
  "module": {
    "number": 1,
    "title": "Introduction"
  },
  "units": [
    {
      "number": 1,
      "title": "Introduction",
      "youtube": "https://www.youtube.com/watch?v=s0uaFZSzwfI"


## Generate page

In [11]:
with meta_json_file.open('rt') as f_in:
    meta = json.load(f_in)

In [12]:
module_info = meta['module']
units = meta['units']

In [13]:
module_info

{'number': 1, 'title': 'Introduction'}

In [14]:
images_folder = module_folder / 'images'
images_folder.mkdir(parents=True, exist_ok=True)

In [15]:
template_string = """
## {{ module_number }}.{{ unit_number }} {{ unit_title }}

{% if youtube %}<a href="{{ youtube }}">
  <img src="{{ thumbnail }}">
</a>{% endif %}{% if not youtube %}COMING SOON{% endif %}


""".lstrip()

template = Template(template_string)

In [16]:
def download_thumbnail(video, module, unit, folder):
    if type(unit) in [int, np.int64]:
        thumbnail_file = f'thumbnail-{module}-{unit:02d}.jpg'
    else:
        thumbnail_file = f'thumbnail-{module}-{unit}.jpg'

    thumbnail_file = folder / thumbnail_file

    if thumbnail_file.exists():
        print(f'{thumbnail_file} exists')
        return thumbnail_file

    video_id = parse_qs(urlparse(video).query)['v'][0]
    print(f'processing video {video_id}...')
    thumbnail_url = f'https://img.youtube.com/vi/{video_id}/0.jpg'

    response = requests.get(thumbnail_url)
    thumbnail = Image.open(io.BytesIO(response.content))
    w_img, h_img = thumbnail.size

    play = Image.open(Path('../images/play.png'))
    w_play, h_play = play.size
    
    x0 = w_img // 2 - w_play // 2
    y0 = h_img // 2 - h_play // 2

    thumbnail.paste(play, (x0, y0), play)
    thumbnail.save(thumbnail_file, quality=90)

    print('saved to', thumbnail_file)

    return thumbnail_file

In [23]:
module_number = module_info['number']

parts = []

for unit in units:
    unit_number = unit['number']

    params = {
        'module_number': module_info['number'],
        'module_name': module_info['title'],
        'unit_number': unit['number'],
        'unit_title': unit['title']          
    }

    if 'youtube' in unit:
        youtube = unit['youtube']
        if len(youtube) and youtube.startswith('https'):
            thumbnail = download_thumbnail(youtube, module_number, unit_number, images_folder)
            thumbnail_path = '/'.join(thumbnail.parts[2:])
            params['youtube'] = f'{youtube}&list={playlist_id}'
            params['thumbnail'] = thumbnail_path
        
    template_string = template.render(params)
    print(template_string)
    parts.append(template_string)

..\01-intro\images\thumbnail-1-01.jpg exists
## 1.1 Introduction

<a href="https://www.youtube.com/watch?v=s0uaFZSzwfI&list=PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK">
  <img src="images/thumbnail-1-01.jpg">
</a>


processing video IXSiYkP23zo...
saved to ..\01-intro\images\thumbnail-1-02.jpg
## 1.2 Environment preparation

<a href="https://www.youtube.com/watch?v=IXSiYkP23zo&list=PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK">
  <img src="images/thumbnail-1-02.jpg">
</a>


..\01-intro\images\thumbnail-1-03.jpg exists
## 1.3 (Optional) Training a ride duration prediction model

<a href="https://www.youtube.com/watch?v=iRunifGSHFc&list=PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK">
  <img src="images/thumbnail-1-03.jpg">
</a>


..\01-intro\images\thumbnail-1-04.jpg exists
## 1.4 Course overview

<a href="https://www.youtube.com/watch?v=teP9KWkP6SM&list=PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK">
  <img src="images/thumbnail-1-04.jpg">
</a>


..\01-intro\images\thumbnail-1-05.jpg exists
## 1.5 MLOps maturity model

<a hre

In [148]:
prefix = f"""
# {module_info['number']}. {module_info['name']} 
""".strip()


final_result = '\n\n'.join([prefix] + parts)

In [152]:
clipboard.copy(final_result)