In [1]:
import io
import json

from pathlib import Path
from urllib.parse import urlparse, parse_qs

import pandas as pd
import numpy as np

import requests
import clipboard

from PIL import Image
from jinja2 import Template

In [11]:
module_number = 2
module_title = 'Experiment tracking and model management'
module_folder = Path('..') / '02-experiment-tracking'

meta_json_file = module_folder / 'meta.json'
playlist_id = 'PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK'

## Prepare `meta.json` info

In [12]:
text = """
module_number	module_title	unit_number	unit_title	full_title	youtube
2	Experiment tracking	1	Experiment tracking intro	MLOps Zoomcamp 2.1 - Experiment tracking intro	https://www.youtube.com/watch?v=MiA7LQin9c8
2	Experiment tracking	2	Getting started with MLflow	MLOps Zoomcamp 2.2 - Getting started with MLflow	https://www.youtube.com/watch?v=cESCQE9J3ZE
2	Experiment tracking	3	Experiment tracking with MLflow	MLOps Zoomcamp 2.3 - Experiment tracking with MLflow	https://www.youtube.com/watch?v=iaJz-T7VWec
2	Experiment tracking	4	Model management	MLOps Zoomcamp 2.4 - Model management	https://www.youtube.com/watch?v=OVUPIX88q88
2	Experiment tracking	5	Model registry	MLOps Zoomcamp 2.5 - Model registry	https://www.youtube.com/watch?v=TKHU7HAvGH8
2	Experiment tracking	6	MLflow in practice	MLOps Zoomcamp 2.6 - MLflow in practice	NA
2	Experiment tracking	7	Homework	MLOps Zoomcamp 2.7 - Homework	NA
""".strip()

In [13]:
df = pd.read_csv(io.StringIO(text), delimiter='\t')

In [14]:
df['youtube'] = df['youtube'].fillna('')

In [15]:
df_units = df[['unit_number', 'unit_title', 'youtube']]
units = df_units \
    .rename(columns={'unit_number': 'number', 'unit_title': 'title'}) \
    .to_dict(orient='records')
units

[{'number': 1,
  'title': 'Experiment tracking intro',
  'youtube': 'https://www.youtube.com/watch?v=MiA7LQin9c8'},
 {'number': 2,
  'title': 'Getting started with MLflow',
  'youtube': 'https://www.youtube.com/watch?v=cESCQE9J3ZE'},
 {'number': 3,
  'title': 'Experiment tracking with MLflow',
  'youtube': 'https://www.youtube.com/watch?v=iaJz-T7VWec'},
 {'number': 4,
  'title': 'Model management',
  'youtube': 'https://www.youtube.com/watch?v=OVUPIX88q88'},
 {'number': 5,
  'title': 'Model registry',
  'youtube': 'https://www.youtube.com/watch?v=TKHU7HAvGH8'},
 {'number': 6, 'title': 'MLflow in practice', 'youtube': ''},
 {'number': 7, 'title': 'Homework', 'youtube': ''}]

In [16]:
meta = {
    'module': {
        'number': module_number,
        'title': module_title
    },
    'units': units
}

In [17]:
meta

{'module': {'number': 2, 'title': 'Experiment tracking and model management'},
 'units': [{'number': 1,
   'title': 'Experiment tracking intro',
   'youtube': 'https://www.youtube.com/watch?v=MiA7LQin9c8'},
  {'number': 2,
   'title': 'Getting started with MLflow',
   'youtube': 'https://www.youtube.com/watch?v=cESCQE9J3ZE'},
  {'number': 3,
   'title': 'Experiment tracking with MLflow',
   'youtube': 'https://www.youtube.com/watch?v=iaJz-T7VWec'},
  {'number': 4,
   'title': 'Model management',
   'youtube': 'https://www.youtube.com/watch?v=OVUPIX88q88'},
  {'number': 5,
   'title': 'Model registry',
   'youtube': 'https://www.youtube.com/watch?v=TKHU7HAvGH8'},
  {'number': 6, 'title': 'MLflow in practice', 'youtube': ''},
  {'number': 7, 'title': 'Homework', 'youtube': ''}]}

In [18]:
module_folder.mkdir(parents=True, exist_ok=True)

with open(meta_json_file, 'wt') as f_out:
    json.dump(meta, f_out, indent=2)

In [19]:
!head {meta_json_file}

{
  "module": {
    "number": 2,
    "title": "Experiment tracking and model management"
  },
  "units": [
    {
      "number": 1,
      "title": "Experiment tracking intro",
      "youtube": "https://www.youtube.com/watch?v=MiA7LQin9c8"


## Generate page

In [20]:
with meta_json_file.open('rt') as f_in:
    meta = json.load(f_in)

In [21]:
module_info = meta['module']
units = meta['units']

In [22]:
module_info

{'number': 2, 'title': 'Experiment tracking and model management'}

In [23]:
images_folder = module_folder / 'images'
images_folder.mkdir(parents=True, exist_ok=True)

In [24]:
template_string = """
## {{ module_number }}.{{ unit_number }} {{ unit_title }}

{% if youtube %}<a href="{{ youtube }}">
  <img src="{{ thumbnail }}">
</a>{% endif %}{% if not youtube %}COMING SOON{% endif %}


""".lstrip()

template = Template(template_string)

In [25]:
def download_thumbnail(video, module, unit, folder):
    if type(unit) in [int, np.int64]:
        thumbnail_file = f'thumbnail-{module}-{unit:02d}.jpg'
    else:
        thumbnail_file = f'thumbnail-{module}-{unit}.jpg'

    thumbnail_file = folder / thumbnail_file

    if thumbnail_file.exists():
        print(f'{thumbnail_file} exists')
        return thumbnail_file

    video_id = parse_qs(urlparse(video).query)['v'][0]
    print(f'processing video {video_id}...')
    thumbnail_url = f'https://img.youtube.com/vi/{video_id}/0.jpg'

    response = requests.get(thumbnail_url)
    thumbnail = Image.open(io.BytesIO(response.content))
    w_img, h_img = thumbnail.size

    play = Image.open(Path('../images/play.png'))
    w_play, h_play = play.size
    
    x0 = w_img // 2 - w_play // 2
    y0 = h_img // 2 - h_play // 2

    thumbnail.paste(play, (x0, y0), play)
    thumbnail.save(thumbnail_file, quality=90)

    print('saved to', thumbnail_file)

    return thumbnail_file

In [26]:
module_number = module_info['number']

parts = []

for unit in units:
    unit_number = unit['number']

    params = {
        'module_number': module_info['number'],
        'module_name': module_info['title'],
        'unit_number': unit['number'],
        'unit_title': unit['title']          
    }

    if 'youtube' in unit:
        youtube = unit['youtube']
        if len(youtube) and youtube.startswith('https'):
            thumbnail = download_thumbnail(youtube, module_number, unit_number, images_folder)
            thumbnail_path = '/'.join(thumbnail.parts[2:])
            params['youtube'] = f'{youtube}&list={playlist_id}'
            params['thumbnail'] = thumbnail_path
        
    template_string = template.render(params)
    print(template_string)
    parts.append(template_string)

processing video MiA7LQin9c8...
saved to ..\02-experiment-tracking\images\thumbnail-2-01.jpg
## 2.1 Experiment tracking intro

<a href="https://www.youtube.com/watch?v=MiA7LQin9c8&list=PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK">
  <img src="images/thumbnail-2-01.jpg">
</a>


processing video cESCQE9J3ZE...
saved to ..\02-experiment-tracking\images\thumbnail-2-02.jpg
## 2.2 Getting started with MLflow

<a href="https://www.youtube.com/watch?v=cESCQE9J3ZE&list=PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK">
  <img src="images/thumbnail-2-02.jpg">
</a>


processing video iaJz-T7VWec...
saved to ..\02-experiment-tracking\images\thumbnail-2-03.jpg
## 2.3 Experiment tracking with MLflow

<a href="https://www.youtube.com/watch?v=iaJz-T7VWec&list=PL3MmuxUbc_hIUISrluw_A7wDSmfOhErJK">
  <img src="images/thumbnail-2-03.jpg">
</a>


processing video OVUPIX88q88...
saved to ..\02-experiment-tracking\images\thumbnail-2-04.jpg
## 2.4 Model management

<a href="https://www.youtube.com/watch?v=OVUPIX88q88&list=PL3MmuxUb

In [29]:
prefix = f"""
# {module_info['number']}. {module_info['title']} 
""".strip()


final_result = '\n\n'.join([prefix] + parts)

In [30]:
clipboard.copy(final_result)